-
Bug
-
Resolution: Done
-
Major
-
1.25.0, 1.29.0
-
None
-
None
-
False
-
-
False
-
Webhook HPA settings are overridden by the operator.
-
Known Issue
-
Done
Running KnativeServing with default highAvailability setting (2 replicas by default) under a heavier load of creating ~300 ksvcs at the same time, I see an an attempt at the webhook HorizontalPodAutoscaler to scale up webhook replicas, but any new pods are immediately deleted
webhook-69cff6d4b9-2dhgd 2/2 Running 0 16h webhook-69cff6d4b9-7gj9b 2/2 Running 0 15h webhook-69cff6d4b9-fg8vj 1/2 Terminating 0 22s webhook-69cff6d4b9-l9rt8 2/2 Terminating 0 7s webhook-69cff6d4b9-sdnwr 2/2 Terminating 0 7s
I suspect the operator is overriding the `replicas` spec that was updated by the HPA.
Notice the manifestival update that includes "replicas" update one second after the kube-controller-manager update:
apiVersion: apps/v1
kind: Deployment
metadata:
annotations:
deployment.kubernetes.io/revision: "1"
kubectl.kubernetes.io/last-applied-configuration: |
{"apiVersion":"apps/v1","kind":"Deployment","metadata":{"creationTimestamp":null,"labels":{"app.kubernetes.io/component":"webhook","app.kubernetes.io/name":"knative-serving","app.kubernetes.io/version":"1.4.0"},"name":"webhook","namespace":"knative-serving","ownerReferences":[{"apiVersion":"operator.knative.dev/v1alpha1","blockOwnerDeletion":true,"controller":true,"kind":"KnativeServing","name":"knative-serving","uid":"bf5961c1-8747-4332-b50b-749c6fed77fc"}]},"spec":{"replicas":2,"selector":{"matchLabels":{"app":"webhook","role":"webhook"}},"strategy":{},"template":{"metadata":{"annotations":{"cluster-autoscaler.kubernetes.io/safe-to-evict":"false"},"creationTimestamp":null,"labels":{"app":"webhook","app.kubernetes.io/name":"knative-serving","app.kubernetes.io/version":"1.4.0","role":"webhook"}},"spec":{"affinity":{"podAntiAffinity":{"preferredDuringSchedulingIgnoredDuringExecution":[{"podAffinityTerm":{"labelSelector":{"matchLabels":{"app":"webhook"}},"topologyKey":"kubernetes.io/hostname"},"weight":100}]}},"containers":[{"env":[{"name":"POD_NAME","valueFrom":{"fieldRef":{"fieldPath":"metadata.name"}}},{"name":"SYSTEM_NAMESPACE","valueFrom":{"fieldRef":{"fieldPath":"metadata.namespace"}}},{"name":"CONFIG_LOGGING_NAME","value":"config-logging"},{"name":"CONFIG_OBSERVABILITY_NAME","value":"config-observability"},{"name":"WEBHOOK_NAME","value":"webhook"},{"name":"WEBHOOK_PORT","value":"8443"},{"name":"METRICS_DOMAIN","value":"knative.dev/internal/serving"},{"name":"METRICS_PROMETHEUS_HOST","value":"127.0.0.1"}],"image":"registry.ci.openshift.org/openshift/knative-v1.4.0:knative-serving-webhook","livenessProbe":{"failureThreshold":6,"httpGet":{"httpHeaders":[{"name":"k-kubelet-probe","value":"webhook"}],"port":8443,"scheme":"HTTPS"},"initialDelaySeconds":20,"periodSeconds":1},"name":"webhook","ports":[{"containerPort":9090,"name":"metrics"},{"containerPort":8008,"name":"profiling"},{"containerPort":8443,"name":"https-webhook"}],"readinessProbe":{"httpGet":{"httpHeaders":[{"name":"k-kubelet-probe","value":"webhook"}],"port":8443,"scheme":"HTTPS"},"periodSeconds":1},"resources":{"limits":{"cpu":"500m","memory":"1Gi"},"requests":{"cpu":"100m","memory":"100Mi"}},"securityContext":{"allowPrivilegeEscalation":false,"capabilities":{"drop":["all"]},"readOnlyRootFilesystem":true,"runAsNonRoot":true}},{"args":["--secure-listen-address=0.0.0.0:8444","--upstream=http://127.0.0.1:9090/","--tls-cert-file=/etc/tls/private/tls.crt","--tls-private-key-file=/etc/tls/private/tls.key","--logtostderr=true","--v=10"],"image":"registry.ci.openshift.org/origin/4.7:kube-rbac-proxy","name":"kube-rbac-proxy","resources":{"requests":{"cpu":"10m","memory":"20Mi"}},"volumeMounts":[{"mountPath":"/etc/tls/private","name":"secret-webhook-sm-service-tls"}]}],"serviceAccountName":"controller","terminationGracePeriodSeconds":300,"volumes":[{"name":"secret-webhook-sm-service-tls","secret":{"secretName":"webhook-sm-service-tls"}}]}}},"status":{}}
creationTimestamp: "2022-08-31T21:50:33Z"
generation: 1804
labels:
app.kubernetes.io/component: webhook
app.kubernetes.io/name: knative-serving
app.kubernetes.io/version: 1.4.0
managedFields:
- apiVersion: apps/v1
fieldsType: FieldsV1
fieldsV1:
f:metadata:
f:annotations:
f:deployment.kubernetes.io/revision: {}
f:status:
f:availableReplicas: {}
f:conditions:
.: {}
k:{"type":"Available"}:
.: {}
f:lastTransitionTime: {}
f:lastUpdateTime: {}
f:message: {}
f:reason: {}
f:status: {}
f:type: {}
k:{"type":"Progressing"}:
.: {}
f:lastTransitionTime: {}
f:lastUpdateTime: {}
f:message: {}
f:reason: {}
f:status: {}
f:type: {}
f:observedGeneration: {}
f:readyReplicas: {}
f:replicas: {}
f:updatedReplicas: {}
manager: kube-controller-manager
operation: Update
subresource: status
time: "2022-09-02T09:52:17Z"
- apiVersion: apps/v1
fieldsType: FieldsV1
fieldsV1:
f:metadata:
f:annotations:
.: {}
f:kubectl.kubernetes.io/last-applied-configuration: {}
f:labels:
.: {}
f:app.kubernetes.io/component: {}
f:app.kubernetes.io/name: {}
f:app.kubernetes.io/version: {}
f:ownerReferences:
.: {}
k:{"uid":"bf5961c1-8747-4332-b50b-749c6fed77fc"}: {}
f:spec:
f:progressDeadlineSeconds: {}
f:replicas: {}
f:revisionHistoryLimit: {}
f:selector: {}
f:strategy:
f:rollingUpdate:
.: {}
f:maxSurge: {}
f:maxUnavailable: {}
f:type: {}
f:template:
f:metadata:
f:annotations:
.: {}
f:cluster-autoscaler.kubernetes.io/safe-to-evict: {}
f:labels:
.: {}
f:app: {}
f:app.kubernetes.io/name: {}
f:app.kubernetes.io/version: {}
f:role: {}
f:spec:
f:affinity:
.: {}
f:podAntiAffinity:
.: {}
f:preferredDuringSchedulingIgnoredDuringExecution: {}
f:containers:
k:{"name":"kube-rbac-proxy"}:
.: {}
f:args: {}
f:image: {}
f:imagePullPolicy: {}
f:name: {}
f:resources:
.: {}
f:requests:
.: {}
f:cpu: {}
f:memory: {}
f:terminationMessagePath: {}
f:terminationMessagePolicy: {}
f:volumeMounts:
.: {}
k:{"mountPath":"/etc/tls/private"}:
.: {}
f:mountPath: {}
f:name: {}
k:{"name":"webhook"}:
.: {}
f:env:
.: {}
k:{"name":"CONFIG_LOGGING_NAME"}:
.: {}
f:name: {}
f:value: {}
k:{"name":"CONFIG_OBSERVABILITY_NAME"}:
.: {}
f:name: {}
f:value: {}
k:{"name":"METRICS_DOMAIN"}:
.: {}
f:name: {}
f:value: {}
k:{"name":"METRICS_PROMETHEUS_HOST"}:
.: {}
f:name: {}
f:value: {}
k:{"name":"POD_NAME"}:
.: {}
f:name: {}
f:valueFrom:
.: {}
f:fieldRef: {}
k:{"name":"SYSTEM_NAMESPACE"}:
.: {}
f:name: {}
f:valueFrom:
.: {}
f:fieldRef: {}
k:{"name":"WEBHOOK_NAME"}:
.: {}
f:name: {}
f:value: {}
k:{"name":"WEBHOOK_PORT"}:
.: {}
f:name: {}
f:value: {}
f:image: {}
f:imagePullPolicy: {}
f:livenessProbe:
.: {}
f:failureThreshold: {}
f:httpGet:
.: {}
f:httpHeaders: {}
f:path: {}
f:port: {}
f:scheme: {}
f:initialDelaySeconds: {}
f:periodSeconds: {}
f:successThreshold: {}
f:timeoutSeconds: {}
f:name: {}
f:ports:
.: {}
k:{"containerPort":8008,"protocol":"TCP"}:
.: {}
f:containerPort: {}
f:name: {}
f:protocol: {}
k:{"containerPort":8443,"protocol":"TCP"}:
.: {}
f:containerPort: {}
f:name: {}
f:protocol: {}
k:{"containerPort":9090,"protocol":"TCP"}:
.: {}
f:containerPort: {}
f:name: {}
f:protocol: {}
f:readinessProbe:
.: {}
f:failureThreshold: {}
f:httpGet:
.: {}
f:httpHeaders: {}
f:path: {}
f:port: {}
f:scheme: {}
f:periodSeconds: {}
f:successThreshold: {}
f:timeoutSeconds: {}
f:resources:
.: {}
f:limits:
.: {}
f:cpu: {}
f:memory: {}
f:requests:
.: {}
f:cpu: {}
f:memory: {}
f:securityContext:
.: {}
f:allowPrivilegeEscalation: {}
f:capabilities:
.: {}
f:drop: {}
f:readOnlyRootFilesystem: {}
f:runAsNonRoot: {}
f:terminationMessagePath: {}
f:terminationMessagePolicy: {}
f:dnsPolicy: {}
f:restartPolicy: {}
f:schedulerName: {}
f:securityContext: {}
f:serviceAccount: {}
f:serviceAccountName: {}
f:terminationGracePeriodSeconds: {}
f:volumes:
.: {}
k:{"name":"secret-webhook-sm-service-tls"}:
.: {}
f:name: {}
f:secret:
.: {}
f:defaultMode: {}
f:secretName: {}
manager: manifestival
operation: Update
time: "2022-09-02T09:52:18Z"
name: webhook
namespace: knative-serving
ownerReferences:
- apiVersion: operator.knative.dev/v1alpha1
blockOwnerDeletion: true
controller: true
kind: KnativeServing
name: knative-serving
uid: bf5961c1-8747-4332-b50b-749c6fed77fc
resourceVersion: "8571844"
uid: 9948ceea-8e7c-445d-baea-af62c2055f51
spec:
progressDeadlineSeconds: 600
replicas: 2
revisionHistoryLimit: 10
selector:
matchLabels:
app: webhook
role: webhook
strategy:
rollingUpdate:
maxSurge: 25%
maxUnavailable: 25%
type: RollingUpdate
template:
metadata:
annotations:
cluster-autoscaler.kubernetes.io/safe-to-evict: "false"
creationTimestamp: null
labels:
app: webhook
app.kubernetes.io/name: knative-serving
app.kubernetes.io/version: 1.4.0
role: webhook
spec:
affinity:
podAntiAffinity:
preferredDuringSchedulingIgnoredDuringExecution:
- podAffinityTerm:
labelSelector:
matchLabels:
app: webhook
topologyKey: kubernetes.io/hostname
weight: 100
containers:
- env:
- name: POD_NAME
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.name
- name: SYSTEM_NAMESPACE
valueFrom:
fieldRef:
apiVersion: v1
fieldPath: metadata.namespace
- name: CONFIG_LOGGING_NAME
value: config-logging
- name: CONFIG_OBSERVABILITY_NAME
value: config-observability
- name: WEBHOOK_NAME
value: webhook
- name: WEBHOOK_PORT
value: "8443"
- name: METRICS_DOMAIN
value: knative.dev/internal/serving
- name: METRICS_PROMETHEUS_HOST
value: 127.0.0.1
image: registry.ci.openshift.org/openshift/knative-v1.4.0:knative-serving-webhook
imagePullPolicy: IfNotPresent
livenessProbe:
failureThreshold: 6
httpGet:
httpHeaders:
- name: k-kubelet-probe
value: webhook
path: /
port: 8443
scheme: HTTPS
initialDelaySeconds: 20
periodSeconds: 1
successThreshold: 1
timeoutSeconds: 1
name: webhook
ports:
- containerPort: 9090
name: metrics
protocol: TCP
- containerPort: 8008
name: profiling
protocol: TCP
- containerPort: 8443
name: https-webhook
protocol: TCP
readinessProbe:
failureThreshold: 3
httpGet:
httpHeaders:
- name: k-kubelet-probe
value: webhook
path: /
port: 8443
scheme: HTTPS
periodSeconds: 1
successThreshold: 1
timeoutSeconds: 1
resources:
limits:
cpu: 500m
memory: 1Gi
requests:
cpu: 100m
memory: 100Mi
securityContext:
allowPrivilegeEscalation: false
capabilities:
drop:
- all
readOnlyRootFilesystem: true
runAsNonRoot: true
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
- args:
- --secure-listen-address=0.0.0.0:8444
- --upstream=http://127.0.0.1:9090/
- --tls-cert-file=/etc/tls/private/tls.crt
- --tls-private-key-file=/etc/tls/private/tls.key
- --logtostderr=true
- --v=10
image: registry.ci.openshift.org/origin/4.7:kube-rbac-proxy
imagePullPolicy: IfNotPresent
name: kube-rbac-proxy
resources:
requests:
cpu: 10m
memory: 20Mi
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /etc/tls/private
name: secret-webhook-sm-service-tls
dnsPolicy: ClusterFirst
restartPolicy: Always
schedulerName: default-scheduler
securityContext: {}
serviceAccount: controller
serviceAccountName: controller
terminationGracePeriodSeconds: 300
volumes:
- name: secret-webhook-sm-service-tls
secret:
defaultMode: 420
secretName: webhook-sm-service-tls
status:
availableReplicas: 2
conditions:
- lastTransitionTime: "2022-08-31T21:50:33Z"
lastUpdateTime: "2022-08-31T21:51:07Z"
message: ReplicaSet "webhook-69cff6d4b9" has successfully progressed.
reason: NewReplicaSetAvailable
status: "True"
type: Progressing
- lastTransitionTime: "2022-09-02T09:52:17Z"
lastUpdateTime: "2022-09-02T09:52:17Z"
message: Deployment has minimum availability.
reason: MinimumReplicasAvailable
status: "True"
type: Available
observedGeneration: 1804
readyReplicas: 2
replicas: 2
updatedReplicas: 2
- relates to
-
SRVKS-1113 setting KnativeServing .spec.workloads[].replicas does work for HPA-enabled components (activator and webhook)
-
- Closed
-