-
Bug
-
Resolution: Done
-
Major
-
1.25.0, 1.29.0
-
None
-
None
-
False
-
-
False
-
Webhook HPA settings are overridden by the operator.
-
Known Issue
-
Done
Running KnativeServing with default highAvailability setting (2 replicas by default) under a heavier load of creating ~300 ksvcs at the same time, I see an an attempt at the webhook HorizontalPodAutoscaler to scale up webhook replicas, but any new pods are immediately deleted
webhook-69cff6d4b9-2dhgd 2/2 Running 0 16h webhook-69cff6d4b9-7gj9b 2/2 Running 0 15h webhook-69cff6d4b9-fg8vj 1/2 Terminating 0 22s webhook-69cff6d4b9-l9rt8 2/2 Terminating 0 7s webhook-69cff6d4b9-sdnwr 2/2 Terminating 0 7s
I suspect the operator is overriding the `replicas` spec that was updated by the HPA.
Notice the manifestival update that includes "replicas" update one second after the kube-controller-manager update:
apiVersion: apps/v1 kind: Deployment metadata: annotations: deployment.kubernetes.io/revision: "1" kubectl.kubernetes.io/last-applied-configuration: | {"apiVersion":"apps/v1","kind":"Deployment","metadata":{"creationTimestamp":null,"labels":{"app.kubernetes.io/component":"webhook","app.kubernetes.io/name":"knative-serving","app.kubernetes.io/version":"1.4.0"},"name":"webhook","namespace":"knative-serving","ownerReferences":[{"apiVersion":"operator.knative.dev/v1alpha1","blockOwnerDeletion":true,"controller":true,"kind":"KnativeServing","name":"knative-serving","uid":"bf5961c1-8747-4332-b50b-749c6fed77fc"}]},"spec":{"replicas":2,"selector":{"matchLabels":{"app":"webhook","role":"webhook"}},"strategy":{},"template":{"metadata":{"annotations":{"cluster-autoscaler.kubernetes.io/safe-to-evict":"false"},"creationTimestamp":null,"labels":{"app":"webhook","app.kubernetes.io/name":"knative-serving","app.kubernetes.io/version":"1.4.0","role":"webhook"}},"spec":{"affinity":{"podAntiAffinity":{"preferredDuringSchedulingIgnoredDuringExecution":[{"podAffinityTerm":{"labelSelector":{"matchLabels":{"app":"webhook"}},"topologyKey":"kubernetes.io/hostname"},"weight":100}]}},"containers":[{"env":[{"name":"POD_NAME","valueFrom":{"fieldRef":{"fieldPath":"metadata.name"}}},{"name":"SYSTEM_NAMESPACE","valueFrom":{"fieldRef":{"fieldPath":"metadata.namespace"}}},{"name":"CONFIG_LOGGING_NAME","value":"config-logging"},{"name":"CONFIG_OBSERVABILITY_NAME","value":"config-observability"},{"name":"WEBHOOK_NAME","value":"webhook"},{"name":"WEBHOOK_PORT","value":"8443"},{"name":"METRICS_DOMAIN","value":"knative.dev/internal/serving"},{"name":"METRICS_PROMETHEUS_HOST","value":"127.0.0.1"}],"image":"registry.ci.openshift.org/openshift/knative-v1.4.0:knative-serving-webhook","livenessProbe":{"failureThreshold":6,"httpGet":{"httpHeaders":[{"name":"k-kubelet-probe","value":"webhook"}],"port":8443,"scheme":"HTTPS"},"initialDelaySeconds":20,"periodSeconds":1},"name":"webhook","ports":[{"containerPort":9090,"name":"metrics"},{"containerPort":8008,"name":"profiling"},{"containerPort":8443,"name":"https-webhook"}],"readinessProbe":{"httpGet":{"httpHeaders":[{"name":"k-kubelet-probe","value":"webhook"}],"port":8443,"scheme":"HTTPS"},"periodSeconds":1},"resources":{"limits":{"cpu":"500m","memory":"1Gi"},"requests":{"cpu":"100m","memory":"100Mi"}},"securityContext":{"allowPrivilegeEscalation":false,"capabilities":{"drop":["all"]},"readOnlyRootFilesystem":true,"runAsNonRoot":true}},{"args":["--secure-listen-address=0.0.0.0:8444","--upstream=http://127.0.0.1:9090/","--tls-cert-file=/etc/tls/private/tls.crt","--tls-private-key-file=/etc/tls/private/tls.key","--logtostderr=true","--v=10"],"image":"registry.ci.openshift.org/origin/4.7:kube-rbac-proxy","name":"kube-rbac-proxy","resources":{"requests":{"cpu":"10m","memory":"20Mi"}},"volumeMounts":[{"mountPath":"/etc/tls/private","name":"secret-webhook-sm-service-tls"}]}],"serviceAccountName":"controller","terminationGracePeriodSeconds":300,"volumes":[{"name":"secret-webhook-sm-service-tls","secret":{"secretName":"webhook-sm-service-tls"}}]}}},"status":{}} creationTimestamp: "2022-08-31T21:50:33Z" generation: 1804 labels: app.kubernetes.io/component: webhook app.kubernetes.io/name: knative-serving app.kubernetes.io/version: 1.4.0 managedFields: - apiVersion: apps/v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: f:deployment.kubernetes.io/revision: {} f:status: f:availableReplicas: {} f:conditions: .: {} k:{"type":"Available"}: .: {} f:lastTransitionTime: {} f:lastUpdateTime: {} f:message: {} f:reason: {} f:status: {} f:type: {} k:{"type":"Progressing"}: .: {} f:lastTransitionTime: {} f:lastUpdateTime: {} f:message: {} f:reason: {} f:status: {} f:type: {} f:observedGeneration: {} f:readyReplicas: {} f:replicas: {} f:updatedReplicas: {} manager: kube-controller-manager operation: Update subresource: status time: "2022-09-02T09:52:17Z" - apiVersion: apps/v1 fieldsType: FieldsV1 fieldsV1: f:metadata: f:annotations: .: {} f:kubectl.kubernetes.io/last-applied-configuration: {} f:labels: .: {} f:app.kubernetes.io/component: {} f:app.kubernetes.io/name: {} f:app.kubernetes.io/version: {} f:ownerReferences: .: {} k:{"uid":"bf5961c1-8747-4332-b50b-749c6fed77fc"}: {} f:spec: f:progressDeadlineSeconds: {} f:replicas: {} f:revisionHistoryLimit: {} f:selector: {} f:strategy: f:rollingUpdate: .: {} f:maxSurge: {} f:maxUnavailable: {} f:type: {} f:template: f:metadata: f:annotations: .: {} f:cluster-autoscaler.kubernetes.io/safe-to-evict: {} f:labels: .: {} f:app: {} f:app.kubernetes.io/name: {} f:app.kubernetes.io/version: {} f:role: {} f:spec: f:affinity: .: {} f:podAntiAffinity: .: {} f:preferredDuringSchedulingIgnoredDuringExecution: {} f:containers: k:{"name":"kube-rbac-proxy"}: .: {} f:args: {} f:image: {} f:imagePullPolicy: {} f:name: {} f:resources: .: {} f:requests: .: {} f:cpu: {} f:memory: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:volumeMounts: .: {} k:{"mountPath":"/etc/tls/private"}: .: {} f:mountPath: {} f:name: {} k:{"name":"webhook"}: .: {} f:env: .: {} k:{"name":"CONFIG_LOGGING_NAME"}: .: {} f:name: {} f:value: {} k:{"name":"CONFIG_OBSERVABILITY_NAME"}: .: {} f:name: {} f:value: {} k:{"name":"METRICS_DOMAIN"}: .: {} f:name: {} f:value: {} k:{"name":"METRICS_PROMETHEUS_HOST"}: .: {} f:name: {} f:value: {} k:{"name":"POD_NAME"}: .: {} f:name: {} f:valueFrom: .: {} f:fieldRef: {} k:{"name":"SYSTEM_NAMESPACE"}: .: {} f:name: {} f:valueFrom: .: {} f:fieldRef: {} k:{"name":"WEBHOOK_NAME"}: .: {} f:name: {} f:value: {} k:{"name":"WEBHOOK_PORT"}: .: {} f:name: {} f:value: {} f:image: {} f:imagePullPolicy: {} f:livenessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:httpHeaders: {} f:path: {} f:port: {} f:scheme: {} f:initialDelaySeconds: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:name: {} f:ports: .: {} k:{"containerPort":8008,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} k:{"containerPort":8443,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} k:{"containerPort":9090,"protocol":"TCP"}: .: {} f:containerPort: {} f:name: {} f:protocol: {} f:readinessProbe: .: {} f:failureThreshold: {} f:httpGet: .: {} f:httpHeaders: {} f:path: {} f:port: {} f:scheme: {} f:periodSeconds: {} f:successThreshold: {} f:timeoutSeconds: {} f:resources: .: {} f:limits: .: {} f:cpu: {} f:memory: {} f:requests: .: {} f:cpu: {} f:memory: {} f:securityContext: .: {} f:allowPrivilegeEscalation: {} f:capabilities: .: {} f:drop: {} f:readOnlyRootFilesystem: {} f:runAsNonRoot: {} f:terminationMessagePath: {} f:terminationMessagePolicy: {} f:dnsPolicy: {} f:restartPolicy: {} f:schedulerName: {} f:securityContext: {} f:serviceAccount: {} f:serviceAccountName: {} f:terminationGracePeriodSeconds: {} f:volumes: .: {} k:{"name":"secret-webhook-sm-service-tls"}: .: {} f:name: {} f:secret: .: {} f:defaultMode: {} f:secretName: {} manager: manifestival operation: Update time: "2022-09-02T09:52:18Z" name: webhook namespace: knative-serving ownerReferences: - apiVersion: operator.knative.dev/v1alpha1 blockOwnerDeletion: true controller: true kind: KnativeServing name: knative-serving uid: bf5961c1-8747-4332-b50b-749c6fed77fc resourceVersion: "8571844" uid: 9948ceea-8e7c-445d-baea-af62c2055f51 spec: progressDeadlineSeconds: 600 replicas: 2 revisionHistoryLimit: 10 selector: matchLabels: app: webhook role: webhook strategy: rollingUpdate: maxSurge: 25% maxUnavailable: 25% type: RollingUpdate template: metadata: annotations: cluster-autoscaler.kubernetes.io/safe-to-evict: "false" creationTimestamp: null labels: app: webhook app.kubernetes.io/name: knative-serving app.kubernetes.io/version: 1.4.0 role: webhook spec: affinity: podAntiAffinity: preferredDuringSchedulingIgnoredDuringExecution: - podAffinityTerm: labelSelector: matchLabels: app: webhook topologyKey: kubernetes.io/hostname weight: 100 containers: - env: - name: POD_NAME valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.name - name: SYSTEM_NAMESPACE valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.namespace - name: CONFIG_LOGGING_NAME value: config-logging - name: CONFIG_OBSERVABILITY_NAME value: config-observability - name: WEBHOOK_NAME value: webhook - name: WEBHOOK_PORT value: "8443" - name: METRICS_DOMAIN value: knative.dev/internal/serving - name: METRICS_PROMETHEUS_HOST value: 127.0.0.1 image: registry.ci.openshift.org/openshift/knative-v1.4.0:knative-serving-webhook imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 6 httpGet: httpHeaders: - name: k-kubelet-probe value: webhook path: / port: 8443 scheme: HTTPS initialDelaySeconds: 20 periodSeconds: 1 successThreshold: 1 timeoutSeconds: 1 name: webhook ports: - containerPort: 9090 name: metrics protocol: TCP - containerPort: 8008 name: profiling protocol: TCP - containerPort: 8443 name: https-webhook protocol: TCP readinessProbe: failureThreshold: 3 httpGet: httpHeaders: - name: k-kubelet-probe value: webhook path: / port: 8443 scheme: HTTPS periodSeconds: 1 successThreshold: 1 timeoutSeconds: 1 resources: limits: cpu: 500m memory: 1Gi requests: cpu: 100m memory: 100Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - all readOnlyRootFilesystem: true runAsNonRoot: true terminationMessagePath: /dev/termination-log terminationMessagePolicy: File - args: - --secure-listen-address=0.0.0.0:8444 - --upstream=http://127.0.0.1:9090/ - --tls-cert-file=/etc/tls/private/tls.crt - --tls-private-key-file=/etc/tls/private/tls.key - --logtostderr=true - --v=10 image: registry.ci.openshift.org/origin/4.7:kube-rbac-proxy imagePullPolicy: IfNotPresent name: kube-rbac-proxy resources: requests: cpu: 10m memory: 20Mi terminationMessagePath: /dev/termination-log terminationMessagePolicy: File volumeMounts: - mountPath: /etc/tls/private name: secret-webhook-sm-service-tls dnsPolicy: ClusterFirst restartPolicy: Always schedulerName: default-scheduler securityContext: {} serviceAccount: controller serviceAccountName: controller terminationGracePeriodSeconds: 300 volumes: - name: secret-webhook-sm-service-tls secret: defaultMode: 420 secretName: webhook-sm-service-tls status: availableReplicas: 2 conditions: - lastTransitionTime: "2022-08-31T21:50:33Z" lastUpdateTime: "2022-08-31T21:51:07Z" message: ReplicaSet "webhook-69cff6d4b9" has successfully progressed. reason: NewReplicaSetAvailable status: "True" type: Progressing - lastTransitionTime: "2022-09-02T09:52:17Z" lastUpdateTime: "2022-09-02T09:52:17Z" message: Deployment has minimum availability. reason: MinimumReplicasAvailable status: "True" type: Available observedGeneration: 1804 readyReplicas: 2 replicas: 2 updatedReplicas: 2
- relates to
-
SRVKS-1113 setting KnativeServing .spec.workloads[].replicas does work for HPA-enabled components (activator and webhook)
-
- Closed
-