Uploaded image for project: 'Knative Serving'
  1. Knative Serving
  2. SRVKS-959

webhook HPA attempt at scaling up apparently reverted by the operator

XMLWordPrintable

    • Icon: Bug Bug
    • Resolution: Done
    • Icon: Major Major
    • 1.30.0
    • 1.25.0, 1.29.0
    • None
    • None

      Running KnativeServing with default highAvailability setting (2 replicas by default) under a heavier load of creating ~300 ksvcs at the same time, I see an an attempt at the webhook HorizontalPodAutoscaler to scale up webhook replicas, but any new pods are immediately deleted

      webhook-69cff6d4b9-2dhgd                                       2/2     Running       0          16h
      webhook-69cff6d4b9-7gj9b                                       2/2     Running       0          15h
      webhook-69cff6d4b9-fg8vj                                       1/2     Terminating   0          22s
      webhook-69cff6d4b9-l9rt8                                       2/2     Terminating   0          7s
      webhook-69cff6d4b9-sdnwr                                       2/2     Terminating   0          7s
      

      I suspect the operator is overriding the `replicas` spec that was updated by the HPA.

      Notice the manifestival update that includes "replicas" update one second after the kube-controller-manager update:

      apiVersion: apps/v1
      kind: Deployment
      metadata:
        annotations:
          deployment.kubernetes.io/revision: "1"
          kubectl.kubernetes.io/last-applied-configuration: |
            {"apiVersion":"apps/v1","kind":"Deployment","metadata":{"creationTimestamp":null,"labels":{"app.kubernetes.io/component":"webhook","app.kubernetes.io/name":"knative-serving","app.kubernetes.io/version":"1.4.0"},"name":"webhook","namespace":"knative-serving","ownerReferences":[{"apiVersion":"operator.knative.dev/v1alpha1","blockOwnerDeletion":true,"controller":true,"kind":"KnativeServing","name":"knative-serving","uid":"bf5961c1-8747-4332-b50b-749c6fed77fc"}]},"spec":{"replicas":2,"selector":{"matchLabels":{"app":"webhook","role":"webhook"}},"strategy":{},"template":{"metadata":{"annotations":{"cluster-autoscaler.kubernetes.io/safe-to-evict":"false"},"creationTimestamp":null,"labels":{"app":"webhook","app.kubernetes.io/name":"knative-serving","app.kubernetes.io/version":"1.4.0","role":"webhook"}},"spec":{"affinity":{"podAntiAffinity":{"preferredDuringSchedulingIgnoredDuringExecution":[{"podAffinityTerm":{"labelSelector":{"matchLabels":{"app":"webhook"}},"topologyKey":"kubernetes.io/hostname"},"weight":100}]}},"containers":[{"env":[{"name":"POD_NAME","valueFrom":{"fieldRef":{"fieldPath":"metadata.name"}}},{"name":"SYSTEM_NAMESPACE","valueFrom":{"fieldRef":{"fieldPath":"metadata.namespace"}}},{"name":"CONFIG_LOGGING_NAME","value":"config-logging"},{"name":"CONFIG_OBSERVABILITY_NAME","value":"config-observability"},{"name":"WEBHOOK_NAME","value":"webhook"},{"name":"WEBHOOK_PORT","value":"8443"},{"name":"METRICS_DOMAIN","value":"knative.dev/internal/serving"},{"name":"METRICS_PROMETHEUS_HOST","value":"127.0.0.1"}],"image":"registry.ci.openshift.org/openshift/knative-v1.4.0:knative-serving-webhook","livenessProbe":{"failureThreshold":6,"httpGet":{"httpHeaders":[{"name":"k-kubelet-probe","value":"webhook"}],"port":8443,"scheme":"HTTPS"},"initialDelaySeconds":20,"periodSeconds":1},"name":"webhook","ports":[{"containerPort":9090,"name":"metrics"},{"containerPort":8008,"name":"profiling"},{"containerPort":8443,"name":"https-webhook"}],"readinessProbe":{"httpGet":{"httpHeaders":[{"name":"k-kubelet-probe","value":"webhook"}],"port":8443,"scheme":"HTTPS"},"periodSeconds":1},"resources":{"limits":{"cpu":"500m","memory":"1Gi"},"requests":{"cpu":"100m","memory":"100Mi"}},"securityContext":{"allowPrivilegeEscalation":false,"capabilities":{"drop":["all"]},"readOnlyRootFilesystem":true,"runAsNonRoot":true}},{"args":["--secure-listen-address=0.0.0.0:8444","--upstream=http://127.0.0.1:9090/","--tls-cert-file=/etc/tls/private/tls.crt","--tls-private-key-file=/etc/tls/private/tls.key","--logtostderr=true","--v=10"],"image":"registry.ci.openshift.org/origin/4.7:kube-rbac-proxy","name":"kube-rbac-proxy","resources":{"requests":{"cpu":"10m","memory":"20Mi"}},"volumeMounts":[{"mountPath":"/etc/tls/private","name":"secret-webhook-sm-service-tls"}]}],"serviceAccountName":"controller","terminationGracePeriodSeconds":300,"volumes":[{"name":"secret-webhook-sm-service-tls","secret":{"secretName":"webhook-sm-service-tls"}}]}}},"status":{}}
        creationTimestamp: "2022-08-31T21:50:33Z"
        generation: 1804
        labels:
          app.kubernetes.io/component: webhook
          app.kubernetes.io/name: knative-serving
          app.kubernetes.io/version: 1.4.0
        managedFields:
        - apiVersion: apps/v1
          fieldsType: FieldsV1
          fieldsV1:
            f:metadata:
              f:annotations:
                f:deployment.kubernetes.io/revision: {}
            f:status:
              f:availableReplicas: {}
              f:conditions:
                .: {}
                k:{"type":"Available"}:
                  .: {}
                  f:lastTransitionTime: {}
                  f:lastUpdateTime: {}
                  f:message: {}
                  f:reason: {}
                  f:status: {}
                  f:type: {}
                k:{"type":"Progressing"}:
                  .: {}
                  f:lastTransitionTime: {}
                  f:lastUpdateTime: {}
                  f:message: {}
                  f:reason: {}
                  f:status: {}
                  f:type: {}
              f:observedGeneration: {}
              f:readyReplicas: {}
              f:replicas: {}
              f:updatedReplicas: {}
          manager: kube-controller-manager
          operation: Update
          subresource: status
          time: "2022-09-02T09:52:17Z"
        - apiVersion: apps/v1
          fieldsType: FieldsV1
          fieldsV1:
            f:metadata:
              f:annotations:
                .: {}
                f:kubectl.kubernetes.io/last-applied-configuration: {}
              f:labels:
                .: {}
                f:app.kubernetes.io/component: {}
                f:app.kubernetes.io/name: {}
                f:app.kubernetes.io/version: {}
              f:ownerReferences:
                .: {}
                k:{"uid":"bf5961c1-8747-4332-b50b-749c6fed77fc"}: {}
            f:spec:
              f:progressDeadlineSeconds: {}
              f:replicas: {}
              f:revisionHistoryLimit: {}
              f:selector: {}
              f:strategy:
                f:rollingUpdate:
                  .: {}
                  f:maxSurge: {}
                  f:maxUnavailable: {}
                f:type: {}
              f:template:
                f:metadata:
                  f:annotations:
                    .: {}
                    f:cluster-autoscaler.kubernetes.io/safe-to-evict: {}
                  f:labels:
                    .: {}
                    f:app: {}
                    f:app.kubernetes.io/name: {}
                    f:app.kubernetes.io/version: {}
                    f:role: {}
                f:spec:
                  f:affinity:
                    .: {}
                    f:podAntiAffinity:
                      .: {}
                      f:preferredDuringSchedulingIgnoredDuringExecution: {}
                  f:containers:
                    k:{"name":"kube-rbac-proxy"}:
                      .: {}
                      f:args: {}
                      f:image: {}
                      f:imagePullPolicy: {}
                      f:name: {}
                      f:resources:
                        .: {}
                        f:requests:
                          .: {}
                          f:cpu: {}
                          f:memory: {}
                      f:terminationMessagePath: {}
                      f:terminationMessagePolicy: {}
                      f:volumeMounts:
                        .: {}
                        k:{"mountPath":"/etc/tls/private"}:
                          .: {}
                          f:mountPath: {}
                          f:name: {}
                    k:{"name":"webhook"}:
                      .: {}
                      f:env:
                        .: {}
                        k:{"name":"CONFIG_LOGGING_NAME"}:
                          .: {}
                          f:name: {}
                          f:value: {}
                        k:{"name":"CONFIG_OBSERVABILITY_NAME"}:
                          .: {}
                          f:name: {}
                          f:value: {}
                        k:{"name":"METRICS_DOMAIN"}:
                          .: {}
                          f:name: {}
                          f:value: {}
                        k:{"name":"METRICS_PROMETHEUS_HOST"}:
                          .: {}
                          f:name: {}
                          f:value: {}
                        k:{"name":"POD_NAME"}:
                          .: {}
                          f:name: {}
                          f:valueFrom:
                            .: {}
                            f:fieldRef: {}
                        k:{"name":"SYSTEM_NAMESPACE"}:
                          .: {}
                          f:name: {}
                          f:valueFrom:
                            .: {}
                            f:fieldRef: {}
                        k:{"name":"WEBHOOK_NAME"}:
                          .: {}
                          f:name: {}
                          f:value: {}
                        k:{"name":"WEBHOOK_PORT"}:
                          .: {}
                          f:name: {}
                          f:value: {}
                      f:image: {}
                      f:imagePullPolicy: {}
                      f:livenessProbe:
                        .: {}
                        f:failureThreshold: {}
                        f:httpGet:
                          .: {}
                          f:httpHeaders: {}
                          f:path: {}
                          f:port: {}
                          f:scheme: {}
                        f:initialDelaySeconds: {}
                        f:periodSeconds: {}
                        f:successThreshold: {}
                        f:timeoutSeconds: {}
                      f:name: {}
                      f:ports:
                        .: {}
                        k:{"containerPort":8008,"protocol":"TCP"}:
                          .: {}
                          f:containerPort: {}
                          f:name: {}
                          f:protocol: {}
                        k:{"containerPort":8443,"protocol":"TCP"}:
                          .: {}
                          f:containerPort: {}
                          f:name: {}
                          f:protocol: {}
                        k:{"containerPort":9090,"protocol":"TCP"}:
                          .: {}
                          f:containerPort: {}
                          f:name: {}
                          f:protocol: {}
                      f:readinessProbe:
                        .: {}
                        f:failureThreshold: {}
                        f:httpGet:
                          .: {}
                          f:httpHeaders: {}
                          f:path: {}
                          f:port: {}
                          f:scheme: {}
                        f:periodSeconds: {}
                        f:successThreshold: {}
                        f:timeoutSeconds: {}
                      f:resources:
                        .: {}
                        f:limits:
                          .: {}
                          f:cpu: {}
                          f:memory: {}
                        f:requests:
                          .: {}
                          f:cpu: {}
                          f:memory: {}
                      f:securityContext:
                        .: {}
                        f:allowPrivilegeEscalation: {}
                        f:capabilities:
                          .: {}
                          f:drop: {}
                        f:readOnlyRootFilesystem: {}
                        f:runAsNonRoot: {}
                      f:terminationMessagePath: {}
                      f:terminationMessagePolicy: {}
                  f:dnsPolicy: {}
                  f:restartPolicy: {}
                  f:schedulerName: {}
                  f:securityContext: {}
                  f:serviceAccount: {}
                  f:serviceAccountName: {}
                  f:terminationGracePeriodSeconds: {}
                  f:volumes:
                    .: {}
                    k:{"name":"secret-webhook-sm-service-tls"}:
                      .: {}
                      f:name: {}
                      f:secret:
                        .: {}
                        f:defaultMode: {}
                        f:secretName: {}
          manager: manifestival
          operation: Update
          time: "2022-09-02T09:52:18Z"
        name: webhook
        namespace: knative-serving
        ownerReferences:
        - apiVersion: operator.knative.dev/v1alpha1
          blockOwnerDeletion: true
          controller: true
          kind: KnativeServing
          name: knative-serving
          uid: bf5961c1-8747-4332-b50b-749c6fed77fc
        resourceVersion: "8571844"
        uid: 9948ceea-8e7c-445d-baea-af62c2055f51
      spec:
        progressDeadlineSeconds: 600
        replicas: 2
        revisionHistoryLimit: 10
        selector:
          matchLabels:
            app: webhook
            role: webhook
        strategy:
          rollingUpdate:
            maxSurge: 25%
            maxUnavailable: 25%
          type: RollingUpdate
        template:
          metadata:
            annotations:
              cluster-autoscaler.kubernetes.io/safe-to-evict: "false"
            creationTimestamp: null
            labels:
              app: webhook
              app.kubernetes.io/name: knative-serving
              app.kubernetes.io/version: 1.4.0
              role: webhook
          spec:
            affinity:
              podAntiAffinity:
                preferredDuringSchedulingIgnoredDuringExecution:
                - podAffinityTerm:
                    labelSelector:
                      matchLabels:
                        app: webhook
                    topologyKey: kubernetes.io/hostname
                  weight: 100
            containers:
            - env:
              - name: POD_NAME
                valueFrom:
                  fieldRef:
                    apiVersion: v1
                    fieldPath: metadata.name
              - name: SYSTEM_NAMESPACE
                valueFrom:
                  fieldRef:
                    apiVersion: v1
                    fieldPath: metadata.namespace
              - name: CONFIG_LOGGING_NAME
                value: config-logging
              - name: CONFIG_OBSERVABILITY_NAME
                value: config-observability
              - name: WEBHOOK_NAME
                value: webhook
              - name: WEBHOOK_PORT
                value: "8443"
              - name: METRICS_DOMAIN
                value: knative.dev/internal/serving
              - name: METRICS_PROMETHEUS_HOST
                value: 127.0.0.1
              image: registry.ci.openshift.org/openshift/knative-v1.4.0:knative-serving-webhook
              imagePullPolicy: IfNotPresent
              livenessProbe:
                failureThreshold: 6
                httpGet:
                  httpHeaders:
                  - name: k-kubelet-probe
                    value: webhook
                  path: /
                  port: 8443
                  scheme: HTTPS
                initialDelaySeconds: 20
                periodSeconds: 1
                successThreshold: 1
                timeoutSeconds: 1
              name: webhook
              ports:
              - containerPort: 9090
                name: metrics
                protocol: TCP
              - containerPort: 8008
                name: profiling
                protocol: TCP
              - containerPort: 8443
                name: https-webhook
                protocol: TCP
              readinessProbe:
                failureThreshold: 3
                httpGet:
                  httpHeaders:
                  - name: k-kubelet-probe
                    value: webhook
                  path: /
                  port: 8443
                  scheme: HTTPS
                periodSeconds: 1
                successThreshold: 1
                timeoutSeconds: 1
              resources:
                limits:
                  cpu: 500m
                  memory: 1Gi
                requests:
                  cpu: 100m
                  memory: 100Mi
              securityContext:
                allowPrivilegeEscalation: false
                capabilities:
                  drop:
                  - all
                readOnlyRootFilesystem: true
                runAsNonRoot: true
              terminationMessagePath: /dev/termination-log
              terminationMessagePolicy: File
            - args:
              - --secure-listen-address=0.0.0.0:8444
              - --upstream=http://127.0.0.1:9090/
              - --tls-cert-file=/etc/tls/private/tls.crt
              - --tls-private-key-file=/etc/tls/private/tls.key
              - --logtostderr=true
              - --v=10
              image: registry.ci.openshift.org/origin/4.7:kube-rbac-proxy
              imagePullPolicy: IfNotPresent
              name: kube-rbac-proxy
              resources:
                requests:
                  cpu: 10m
                  memory: 20Mi
              terminationMessagePath: /dev/termination-log
              terminationMessagePolicy: File
              volumeMounts:
              - mountPath: /etc/tls/private
                name: secret-webhook-sm-service-tls
            dnsPolicy: ClusterFirst
            restartPolicy: Always
            schedulerName: default-scheduler
            securityContext: {}
            serviceAccount: controller
            serviceAccountName: controller
            terminationGracePeriodSeconds: 300
            volumes:
            - name: secret-webhook-sm-service-tls
              secret:
                defaultMode: 420
                secretName: webhook-sm-service-tls
      status:
        availableReplicas: 2
        conditions:
        - lastTransitionTime: "2022-08-31T21:50:33Z"
          lastUpdateTime: "2022-08-31T21:51:07Z"
          message: ReplicaSet "webhook-69cff6d4b9" has successfully progressed.
          reason: NewReplicaSetAvailable
          status: "True"
          type: Progressing
        - lastTransitionTime: "2022-09-02T09:52:17Z"
          lastUpdateTime: "2022-09-02T09:52:17Z"
          message: Deployment has minimum availability.
          reason: MinimumReplicasAvailable
          status: "True"
          type: Available
        observedGeneration: 1804
        readyReplicas: 2
        replicas: 2
        updatedReplicas: 2
      

              rhn-support-knakayam Kenjiro Nakayama (Inactive)
              maschmid@redhat.com Marek Schmidt
              Marek Schmidt Marek Schmidt
              Votes:
              0 Vote for this issue
              Watchers:
              3 Start watching this issue

                Created:
                Updated:
                Resolved: