-
Bug
-
Resolution: Not a Bug
-
Normal
-
None
-
4.14.z
-
None
-
Critical
-
Yes
-
Rejected
-
s390x
-
False
-
Description of problem:
Descheduler: Deployments failed under "AffinityAndTaints ,TopologyAndDuplicates, LifecycleAndUtilization, EvictPodsWithLocalStorage ,EvictPodsWithPVC" Descheduler profiles due to security previlege on 4.14.0-RC.4 onwards 1.install KubeDescheduler operator 2.Create a new instance for KubeDescheduler and using TopologyAndDuplicates, AffinityAndTaints TopologyAndDuplicates LifecycleAndUtilization EvictPodsWithLocalStorage EvictPodsWithPVC profiles 3.oc get KubeDescheduler -n openshift-kube-descheduler-operator -o yaml apiVersion: v1 items: - apiVersion: operator.openshift.io/v1 kind: KubeDescheduler metadata: annotations: kubectl.kubernetes.io/last-applied-configuration: | {"apiVersion":"operator.openshift.io/v1","kind":"KubeDescheduler","metadata":{"annotations":{},"name":"cluster","namespace":"openshift-kube-descheduler-operator"},"spec":{"deschedulingIntervalSeconds":3600,"logLevel":"Normal","managementState":"Managed","mode":"Predictive","operatorLogLevel":"Normal","profileCustomizations":{"devLowNodeUtilizationThresholds":"Medium"},"profiles":["AffinityAndTaints","TopologyAndDuplicates","LifecycleAndUtilization","EvictPodsWithLocalStorage","EvictPodsWithPVC"]}} creationTimestamp: "2023-10-11T13:33:32Z" generation: 2 name: cluster namespace: openshift-kube-descheduler-operator resourceVersion: "227867" uid: 1b28414f-1d69-4439-a649-dcffb7eeb416 spec: deschedulingIntervalSeconds: 3600 logLevel: Normal managementState: Managed mode: Predictive observedConfig: servingInfo: cipherSuites: - TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 - TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 - TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 - TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 - TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256 - TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256 minTLSVersion: VersionTLS12 operatorLogLevel: Normal profileCustomizations: devLowNodeUtilizationThresholds: Medium namespaces: {} profiles: - AffinityAndTaints - TopologyAndDuplicates - LifecycleAndUtilization - EvictPodsWithLocalStorage - EvictPodsWithPVC unsupportedConfigOverrides: null status: conditions: - lastTransitionTime: "2023-10-11T13:33:32Z" status: "False" type: ResourceSyncControllerDegraded - lastTransitionTime: "2023-10-11T13:33:35Z" status: "False" type: ConfigObservationDegraded - lastTransitionTime: "2023-10-11T13:33:37Z" status: "False" type: TargetConfigControllerDegraded generations: - group: apps hash: "" lastGeneration: 2 name: descheduler namespace: openshift-kube-descheduler-operator resource: deployments readyReplicas: 0 kind: List metadata: resourceVersion: "" [root@bastion ~]# oc get sub -n openshift-kube-descheduler-operator NAME PACKAGE SOURCE CHANNEL cluster-kube-descheduler-operator cluster-kube-descheduler-operator redhat-operators-stage stable [root@bastion ~]# oc get csv -n openshift-kube-descheduler-operator No resources found in openshift-kube-descheduler-operator namespace. [root@bastion ~]# oc get csv -n openshift-kube-descheduler-operator NAME DISPLAY VERSION REPLACES PHASE clusterkubedescheduleroperator.4.14.0-202307211703 Kube Descheduler Operator 4.14.0-202307211703 Pending [root@bastion ~]# oc get csv -n openshift-kube-descheduler-operator NAME DISPLAY VERSION REPLACES PHASE clusterkubedescheduleroperator.4.14.0-202307211703 Kube Descheduler Operator 4.14.0-202307211703 Installing [root@bastion ~]# oc get csv -n openshift-kube-descheduler-operator NAME DISPLAY VERSION REPLACES PHASE clusterkubedescheduleroperator.4.14.0-202307211703 Kube Descheduler Operator 4.14.0-202307211703 Succeeded [root@bastion ~]# oc get all -n openshift-kube-descheduler-operator Warning: apps.openshift.io/v1 DeploymentConfig is deprecated in v4.14+, unavailable in v4.10000+ NAME READY STATUS RESTARTS AGE pod/descheduler-operator-68b86cb754-wshx8 1/1 Running 0 16s NAME READY UP-TO-DATE AVAILABLE AGE deployment.apps/descheduler-operator 1/1 1 1 16s NAME DESIRED CURRENT READY AGE replicaset.apps/descheduler-operator-68b86cb754 1 1 1 16s 4.clone the deschedular tests 5.Execute all the tests [root@bastion descheduler-tests]# cat /root/e2e-results/descheduler/e2eTests-13_10_2023_0844.txt | grep FAIL --- FAIL: TestRemoveDuplicates (131.53s) --- FAIL: TestRemoveDuplicates/Evict_Pod_even_Pods_schedule_to_specific_node (56.13s) --- FAIL: TestRemoveDuplicates/Evict_Pod_even_Pods_with_local_storage (75.16s) --- FAIL: TestFailedPods (121.88s) --- FAIL: TestFailedPods/test-failed-pods-default-args (31.18s) --- FAIL: TestFailedPods/test-failed-pods-reason-unmatched (30.14s) --- FAIL: TestFailedPods/test-failed-pods-min-age-unmet (30.12s) --- FAIL: TestFailedPods/test-failed-pods-exclude-job-kind (30.10s) --- FAIL: TestLeaderElection (61.32s) --- FAIL: TestNamespaceConstraintsInclude (7.30s) --- FAIL: TestNamespaceConstraintsExclude (7.28s) --- FAIL: TestEvictSystemCriticalPriority (9.39s) --- FAIL: TestEvictSystemCriticalPriorityClass (9.36s) --- FAIL: TestThresholdPriority (8.34s) --- FAIL: TestThresholdPriorityClass (7.35s) --- FAIL: TestPodLabelSelector (8.33s) --- FAIL: TestEvictAnnotation (37.32s) --- FAIL: TestPodLifeTimeOldestEvicted (302.28s) --- FAIL: TestTooManyRestarts (6.26s) --- FAIL: TestTopologySpreadConstraint (603.36s) --- FAIL: TestTopologySpreadConstraint/test-rc-topology-spread-hard-constraint (302.06s) --- FAIL: TestTopologySpreadConstraint/test-rc-topology-spread-soft-constraint (301.06s) FAIL FAIL sigs.k8s.io/descheduler/test/e2e 1666.362s FAIL 6. oc logs pod/descheduler-6665f74ff-54j25 I1013 12:44:34.037226 1 dynamic_serving_content.go:113] "Loaded a new cert/key pair" name="serving-cert::/certs-dir/tls.crt::/certs-dir/tls.key" I1013 12:44:34.770676 1 dynamic_serving_content.go:132] "Starting controller" name="serving-cert::/certs-dir/tls.crt::/certs-dir/tls.key" I1013 12:44:34.770940 1 tlsconfig.go:200] "Loaded serving cert" certName="serving-cert::/certs-dir/tls.crt::/certs-dir/tls.key" certDetail="\"metrics.openshift-kube-descheduler-operator.svc\" [serving] validServingFor=[metrics.openshift-kube-descheduler-operator.svc,metrics.openshift-kube-descheduler-operator.svc.cluster.local] issuer=\"openshift-service-serving-signer@1697176862\" (2023-10-13 12:44:26 +0000 UTC to 2025-10-12 12:44:27 +0000 UTC (now=2023-10-13 12:44:34.770898238 +0000 UTC))" I1013 12:44:34.771416 1 named_certificates.go:53] "Loaded SNI cert" index=0 certName="self-signed loopback" certDetail="\"apiserver-loopback-client@1697201074\" [serving] validServingFor=[apiserver-loopback-client] issuer=\"apiserver-loopback-client-ca@1697201074\" (2023-10-13 11:44:34 +0000 UTC to 2024-10-12 11:44:34 +0000 UTC (now=2023-10-13 12:44:34.77139138 +0000 UTC))" I1013 12:44:34.771461 1 secure_serving.go:210] Serving securely on [::]:10258 I1013 12:44:34.771593 1 tlsconfig.go:240] "Starting DynamicServingCertificateController" W1013 12:44:34.793799 I1013 12:44:35.135538 1 lownodeutilization.go:134] "Criteria for a node under utilization" CPU=20 Mem=20 Pods=20 I1013 12:44:35.135548 1 lownodeutilization.go:135] "Number of underutilized nodes" totalNumber=5 I1013 12:44:35.135558 1 lownodeutilization.go:148] "Criteria for a node above target utilization" CPU=50 Mem=50 Pods=50 I1013 12:44:35.135567 1 lownodeutilization.go:149] "Number of overutilized nodes" totalNumber=0 I1013 12:44:35.135579 1 lownodeutilization.go:167] "All nodes are under target utilization, nothing to do here" I1013 12:44:35.135588 1 profile.go:345] "Total number of pods evicted" extension point="Balance" evictedPods=0 I1013 12:44:35.136368 1 profile.go:345] "Total number of pods evicted" extension point="Balance" evictedPods=0 I1013 12:44:35.136386 1 descheduler.go:163] "Number of evicted pods" totalEvicted=0
Version-Release number of selected component (if applicable):
4.14.0-RC.4
How reproducible:
everytime
Steps to Reproduce:
1.install KubeDescheduler operator 2.Create a new instance for KubeDescheduler and using TopologyAndDuplicates, AffinityAndTaints TopologyAndDuplicates LifecycleAndUtilization EvictPodsWithLocalStorage EvictPodsWithPVC profiles 3.oc get KubeDescheduler -n openshift-kube-descheduler-operator -o yaml apiVersion: v1 items: - apiVersion: operator.openshift.io/v1 kind: KubeDescheduler metadata: annotations: kubectl.kubernetes.io/last-applied-configuration: | {"apiVersion":"operator.openshift.io/v1","kind":"KubeDescheduler","metadata":{"annotations":{},"name":"cluster","namespace":"openshift-kube-descheduler-operator"},"spec":{"deschedulingIntervalSeconds":3600,"logLevel":"Normal","managementState":"Managed","mode":"Predictive","operatorLogLevel":"Normal","profileCustomizations":{"devLowNodeUtilizationThresholds":"Medium"},"profiles":["AffinityAndTaints","TopologyAndDuplicates","LifecycleAndUtilization","EvictPodsWithLocalStorage","EvictPodsWithPVC"]}} creationTimestamp: "2023-10-11T13:33:32Z" generation: 2 name: cluster namespace: openshift-kube-descheduler-operator resourceVersion: "227867" uid: 1b28414f-1d69-4439-a649-dcffb7eeb416 spec: deschedulingIntervalSeconds: 3600 logLevel: Normal managementState: Managed mode: Predictive observedConfig: servingInfo: cipherSuites: - TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 - TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 - TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 - TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 - TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256 - TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256 minTLSVersion: VersionTLS12 operatorLogLevel: Normal profileCustomizations: devLowNodeUtilizationThresholds: Medium namespaces: {} profiles: - AffinityAndTaints - TopologyAndDuplicates - LifecycleAndUtilization - EvictPodsWithLocalStorage - EvictPodsWithPVC unsupportedConfigOverrides: null status: conditions: - lastTransitionTime: "2023-10-11T13:33:32Z" status: "False" type: ResourceSyncControllerDegraded - lastTransitionTime: "2023-10-11T13:33:35Z" status: "False" type: ConfigObservationDegraded - lastTransitionTime: "2023-10-11T13:33:37Z" status: "False" type: TargetConfigControllerDegraded generations: - group: apps hash: "" lastGeneration: 2 name: descheduler namespace: openshift-kube-descheduler-operator resource: deployments readyReplicas: 0 kind: List metadata: resourceVersion: "" [root@bastion ~]# oc get sub -n openshift-kube-descheduler-operator NAME PACKAGE SOURCE CHANNEL cluster-kube-descheduler-operator cluster-kube-descheduler-operator redhat-operators-stage stable [root@bastion ~]# oc get csv -n openshift-kube-descheduler-operator No resources found in openshift-kube-descheduler-operator namespace. [root@bastion ~]# oc get csv -n openshift-kube-descheduler-operator NAME DISPLAY VERSION REPLACES PHASE clusterkubedescheduleroperator.4.14.0-202307211703 Kube Descheduler Operator 4.14.0-202307211703 Pending [root@bastion ~]# oc get csv -n openshift-kube-descheduler-operator NAME DISPLAY VERSION REPLACES PHASE clusterkubedescheduleroperator.4.14.0-202307211703 Kube Descheduler Operator 4.14.0-202307211703 Installing [root@bastion ~]# oc get csv -n openshift-kube-descheduler-operator NAME DISPLAY VERSION REPLACES PHASE clusterkubedescheduleroperator.4.14.0-202307211703 Kube Descheduler Operator 4.14.0-202307211703 Succeeded [root@bastion ~]# oc get all -n openshift-kube-descheduler-operator Warning: apps.openshift.io/v1 DeploymentConfig is deprecated in v4.14+, unavailable in v4.10000+ NAME READY STATUS RESTARTS AGE pod/descheduler-operator-68b86cb754-wshx8 1/1 Running 0 16s NAME READY UP-TO-DATE AVAILABLE AGE deployment.apps/descheduler-operator 1/1 1 1 16s NAME DESIRED CURRENT READY AGE replicaset.apps/descheduler-operator-68b86cb754 1 1 1 16s 4.clone the deschedular tests 5.Execute all the tests [root@bastion descheduler-tests]# cat /root/e2e-results/descheduler/e2eTests-13_10_2023_0844.txt | grep FAIL --- FAIL: TestRemoveDuplicates (131.53s) --- FAIL: TestRemoveDuplicates/Evict_Pod_even_Pods_schedule_to_specific_node (56.13s) --- FAIL: TestRemoveDuplicates/Evict_Pod_even_Pods_with_local_storage (75.16s) --- FAIL: TestFailedPods (121.88s) --- FAIL: TestFailedPods/test-failed-pods-default-args (31.18s) --- FAIL: TestFailedPods/test-failed-pods-reason-unmatched (30.14s) --- FAIL: TestFailedPods/test-failed-pods-min-age-unmet (30.12s) --- FAIL: TestFailedPods/test-failed-pods-exclude-job-kind (30.10s) --- FAIL: TestLeaderElection (61.32s) --- FAIL: TestNamespaceConstraintsInclude (7.30s) --- FAIL: TestNamespaceConstraintsExclude (7.28s) --- FAIL: TestEvictSystemCriticalPriority (9.39s) --- FAIL: TestEvictSystemCriticalPriorityClass (9.36s) --- FAIL: TestThresholdPriority (8.34s) --- FAIL: TestThresholdPriorityClass (7.35s) --- FAIL: TestPodLabelSelector (8.33s) --- FAIL: TestEvictAnnotation (37.32s) --- FAIL: TestPodLifeTimeOldestEvicted (302.28s) --- FAIL: TestTooManyRestarts (6.26s) --- FAIL: TestTopologySpreadConstraint (603.36s) --- FAIL: TestTopologySpreadConstraint/test-rc-topology-spread-hard-constraint (302.06s) --- FAIL: TestTopologySpreadConstraint/test-rc-topology-spread-soft-constraint (301.06s) FAIL FAIL sigs.k8s.io/descheduler/test/e2e 1666.362s FAIL 6. oc logs pod/descheduler-6665f74ff-54j25 I1013 12:44:34.037226 1 dynamic_serving_content.go:113] "Loaded a new cert/key pair" name="serving-cert::/certs-dir/tls.crt::/certs-dir/tls.key" I1013 12:44:34.770676 1 dynamic_serving_content.go:132] "Starting controller" name="serving-cert::/certs-dir/tls.crt::/certs-dir/tls.key" I1013 12:44:34.770940 1 tlsconfig.go:200] "Loaded serving cert" certName="serving-cert::/certs-dir/tls.crt::/certs-dir/tls.key" certDetail="\"metrics.openshift-kube-descheduler-operator.svc\" [serving] validServingFor=[metrics.openshift-kube-descheduler-operator.svc,metrics.openshift-kube-descheduler-operator.svc.cluster.local] issuer=\"openshift-service-serving-signer@1697176862\" (2023-10-13 12:44:26 +0000 UTC to 2025-10-12 12:44:27 +0000 UTC (now=2023-10-13 12:44:34.770898238 +0000 UTC))" I1013 12:44:34.771416 1 named_certificates.go:53] "Loaded SNI cert" index=0 certName="self-signed loopback" certDetail="\"apiserver-loopback-client@1697201074\" [serving] validServingFor=[apiserver-loopback-client] issuer=\"apiserver-loopback-client-ca@1697201074\" (2023-10-13 11:44:34 +0000 UTC to 2024-10-12 11:44:34 +0000 UTC (now=2023-10-13 12:44:34.77139138 +0000 UTC))" I1013 12:44:34.771461 1 secure_serving.go:210] Serving securely on [::]:10258 I1013 12:44:34.771593 1 tlsconfig.go:240] "Starting DynamicServingCertificateController" W1013 12:44:34.793799 1 descheduler.go:232] failed to convert Descheduler minor version to float I1013 12:44:35.132125 1 profile.go:316] "Total number of pods evicted" extension point="Deschedule" evictedPods=0 I1013 12:44:35.132136 1 node_affinity.go:80] "Executing for nodeAffinityType" nodeAffinity="requiredDuringSchedulingIgnoredDuringExecution" I1013 12:44:35.132929 1 profile.go:316] "Total number of pods evicted" extension point="Deschedule" evictedPods=0 I1013 12:44:35.133840 1 profile.go:316] "Total number of pods evicted" extension point="Deschedule" evictedPods=0 I1013 12:44:35.133857 1 topologyspreadconstraint.go:106] Processing namespaces for topology spread constraints I1013 12:44:35.134654 1 profile.go:345] "Total number of pods evicted" extension point="Balance" evictedPods=0 I1013 12:44:35.136368 1 profile.go:345] "Total number of pods evicted" extension point="Balance" evictedPods=0 I1013 12:44:35.136386 1 descheduler.go:163] "Number of evicted pods" totalEvicted=0
Actual results:
IT was performed as expected till 4.14.0-RC.3
Expected results:
Additional info: