Uploaded image for project: 'OpenShift API for Data Protection'
  1. OpenShift API for Data Protection
  2. OADP-712

Downstream operator installation fails on P

XMLWordPrintable

    • False
    • Hide

      None

      Show
      None
    • False
    • ToDo
    • 0
    • 0
    • Very Likely
    • 0
    • None
    • Unset
    • Unknown
    • No

       

      Description of problem:

       

      The operator installation using the automation script from https://gitlab.cee.redhat.com/migrationqe/oadp-qe-automation fails on ppc64le OCP 4.11 cluster.

      The installation fails during creation of catalogsource pod using the index_image from datagrepper :

       

       

      + /usr/local/bin/oc wait pods --namespace=openshift-marketplace --selector=olm.catalogSource=prestage-operators --for=condition=Ready --timeout=5m
      error: timed out waiting for the condition on pods/prestage-operators-vdcpk
      
      [root@rdr-sg-e2e-3550-tok04-bastion-0] oc describe pod prestage-operators-vdcpk -n openshift-marketplace
      .
      .
       Events:
        Type     Reason          Age               From               Message
        ----     ------          ----              ----               -------
        Normal   Scheduled       48s               default-scheduler  Successfully assigned openshift-marketplace/prestage-operators-vdcpk to tok04-worker-0.rdr-sg-e2e-3550.ibm.com by tok04-master-1.rdr-sg-e2e-3550.ibm.com
        Normal   AddedInterface  46s               multus             Add eth0 [10.128.2.135/23] from openshift-sdn
        Normal   Pulling         46s               kubelet            Pulling image "brew.registry.redhat.io/rh-osbs/iib:289368"
        Normal   Pulled          35s               kubelet            Successfully pulled image "brew.registry.redhat.io/rh-osbs/iib:289368" in 11.272099099s
        Normal   Created         34s               kubelet            Created container registry-server
        Normal   Started         34s               kubelet            Started container registry-server
        Warning  Unhealthy       7s (x3 over 27s)  kubelet            Readiness probe failed: timeout: failed to connect service ":50051" within 1s
        Warning  Unhealthy       7s (x2 over 17s)  kubelet            Liveness probe failed: timeout: failed to connect service ":50051" within 1s 
      
      

       

       

      Pod details -

      [root@rdr-sg-e2e-3550-tok04-bastion-0 oadp-qe-automation]# oc get pods prestage-operators-vdcpk -n openshift-marketplace -oyaml
      apiVersion: v1
      kind: Pod
      metadata:
        annotations:
          cluster-autoscaler.kubernetes.io/safe-to-evict: "true"
          k8s.v1.cni.cncf.io/network-status: |-
            [{
                "name": "openshift-sdn",
                "interface": "eth0",
                "ips": [
                    "10.128.2.144"
                ],
                "default": true,
                "dns": {}
            }]
          k8s.v1.cni.cncf.io/networks-status: |-
            [{
                "name": "openshift-sdn",
                "interface": "eth0",
                "ips": [
                    "10.128.2.144"
                ],
                "default": true,
                "dns": {}
            }]
          kubectl.kubernetes.io/last-applied-configuration: |
            {"apiVersion":"operators.coreos.com/v1alpha1","kind":"CatalogSource","metadata":{"annotations":{},"name":"prestage-operators","namespace":"openshift-marketplace"},"spec":{"displayName":"Custom Operator Catalog","image":"brew.registry.redhat.io/rh-osbs/iib:289368","publisher":"grpc","sourceType":"grpc"}}
          openshift.io/scc: anyuid
        creationTimestamp: "2022-08-10T06:29:05Z"
        generateName: prestage-operators-
        labels:
          olm.catalogSource: prestage-operators
          olm.pod-spec-hash: 7f58fdb856
        name: prestage-operators-vdcpk
        namespace: openshift-marketplace
        ownerReferences:
        - apiVersion: operators.coreos.com/v1alpha1
          blockOwnerDeletion: false
          controller: false
          kind: CatalogSource
          name: prestage-operators
          uid: 8276c1c1-0f81-4243-9eee-062827775f01
        resourceVersion: "140053669"
        uid: 28913c9b-0ecb-49e8-9842-b35a8392fdab
      spec:
        containers:
        - image: brew.registry.redhat.io/rh-osbs/iib:289368
          imagePullPolicy: Always
          livenessProbe:
            exec:
              command:
              - grpc_health_probe
              - -addr=:50051
            failureThreshold: 3
            initialDelaySeconds: 10
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 5
          name: registry-server
          ports:
          - containerPort: 50051
            name: grpc
            protocol: TCP
          readinessProbe:
            exec:
              command:
              - grpc_health_probe
              - -addr=:50051
            failureThreshold: 3
            initialDelaySeconds: 5
            periodSeconds: 10
            successThreshold: 1
            timeoutSeconds: 5
          resources:
            requests:
              cpu: 10m
              memory: 50Mi
          securityContext:
            capabilities:
              drop:
              - MKNOD
            readOnlyRootFilesystem: false
          terminationMessagePath: /dev/termination-log
          terminationMessagePolicy: FallbackToLogsOnError
          volumeMounts:
          - mountPath: /var/run/secrets/kubernetes.io/serviceaccount
            name: kube-api-access-h96rj
            readOnly: true
        dnsPolicy: ClusterFirst
        enableServiceLinks: true
        nodeName: tok04-worker-0.rdr-sg-e2e-3550.ibm.com
        nodeSelector:
          kubernetes.io/os: linux
        preemptionPolicy: PreemptLowerPriority
        priority: 0
        restartPolicy: Always
        schedulerName: default-scheduler
        securityContext:
          seLinuxOptions:
            level: s0:c15,c10
        serviceAccount: prestage-operators
        serviceAccountName: prestage-operators
        terminationGracePeriodSeconds: 30
        tolerations:
        - effect: NoExecute
          key: node.kubernetes.io/not-ready
          operator: Exists
          tolerationSeconds: 300
        - effect: NoExecute
          key: node.kubernetes.io/unreachable
          operator: Exists
          tolerationSeconds: 300
        - effect: NoSchedule
          key: node.kubernetes.io/memory-pressure
          operator: Exists
        volumes:
        - name: kube-api-access-h96rj
          projected:
            defaultMode: 420
            sources:
            - serviceAccountToken:
                expirationSeconds: 3607
                path: token
            - configMap:
                items:
                - key: ca.crt
                  path: ca.crt
                name: kube-root-ca.crt
            - downwardAPI:
                items:
                - fieldRef:
                    apiVersion: v1
                    fieldPath: metadata.namespace
                  path: namespace
            - configMap:
                items:
                - key: service-ca.crt
                  path: service-ca.crt
                name: openshift-service-ca.crt
      status:
        conditions:
        - lastProbeTime: null
          lastTransitionTime: "2022-08-10T06:29:05Z"
          status: "True"
          type: Initialized
        - lastProbeTime: null
          lastTransitionTime: "2022-08-10T06:29:05Z"
          message: 'containers with unready status: [registry-server]'
          reason: ContainersNotReady
          status: "False"
          type: Ready
        - lastProbeTime: null
          lastTransitionTime: "2022-08-10T06:29:05Z"
          message: 'containers with unready status: [registry-server]'
          reason: ContainersNotReady
          status: "False"
          type: ContainersReady
        - lastProbeTime: null
          lastTransitionTime: "2022-08-10T06:29:05Z"
          status: "True"
          type: PodScheduled
        containerStatuses:
        - containerID: cri-o://b24929e8349f61feb8497fd6d8b34a6993636a7f01bb366410adb0365e96af25
          image: brew.registry.redhat.io/rh-osbs/iib:289368
          imageID: brew.registry.redhat.io/rh-osbs/iib@sha256:df1c5a1f81f6b47c5c05f0c2a93b8442ed9013bf1458bb7105769145ac113f1a
          lastState:
            terminated:
              containerID: cri-o://ff1bebc05f76f7be391e786cf9fa7c36c40ac49f6b503dc34c2c481853185673
              exitCode: 2
              finishedAt: "2022-08-10T08:34:16Z"
              reason: Error
              startedAt: "2022-08-10T08:33:39Z"
          name: registry-server
          ready: false
          restartCount: 44
          started: true
          state:
            running:
              startedAt: "2022-08-10T08:39:27Z"
        hostIP: 193.168.200.112
        phase: Running
        podIP: 10.128.2.144
        podIPs:
        - ip: 10.128.2.144
        qosClass: Burstable
        startTime: "2022-08-10T06:29:05Z"
       

       

      Version-Release number of selected component (if applicable):

      Tested for OADP 1.1.0-59 on OCP 4.11 ppc64le

       

      How reproducible:

       

      Steps to Reproduce:
      1. Since bastion node is not connected to Redhat VPN, obtained the IIB index_image on Windows machine connected to Redhat VPN

      ubuntu@DESKTOP-Q4AHE7B:~$ curl -k 
      https://datagrepper.engineering.redhat.com/raw
      ?topic\=/topic/VirtualTopic.eng.ci.redhat-container-image.index.built\&contains\=oadp-operator-bundle-container-1.1\&rows_per_page\=1 | jq -r '.raw_messages[0].msg.index.index_image'
      % Total % Received % Xferd Average Speed Time Time Time Current
      Dload Upload Total Spent Left Speed
      100 3388 100 3388 0 0 1906 0 0:00:01 0:00:01 -::- 1905
      registry-proxy.engineering.redhat.com/rh-osbs/iib:289368
        

      2. Clone automation repository and copied to bastion node

      git clone https://gitlab.cee.redhat.com/migrationqe/oadp-qe-automation

      3. Execute deploy_oadp.sh script to install the operator -

       

      cd oadp-qe-automation
      export REPOSITORY='prestage' 
      export IP_APPROVAL='Manual' 
      export STREAM='downstream' 
      export OADP_VERSION="1.1.0" 
      export IIB_IMAGE='iib:289368' 
      bash oadp/deploy_oadp.sh 

       

       

      Actual results:

      The pod fails with Liveness and Readiness probe failures

       

      Expected results:

       

      Additional info:

      Tried creating a test pod using the index_image and set the periodSeconds to 20 which started successfully -

       

       

      [root@rdr-sg-e2e-3550-tok04-bastion-0 oadp-qe-automation]# oc get pod/test -oyaml
      apiVersion: v1
      kind: Pod
      metadata:
        annotations:
          k8s.v1.cni.cncf.io/network-status: |-
            [{
                "name": "openshift-sdn",
                "interface": "eth0",
                "ips": [
                    "10.128.2.164"
                ],
                "default": true,
                "dns": {}
            }]
          k8s.v1.cni.cncf.io/networks-status: |-
            [{
                "name": "openshift-sdn",
                "interface": "eth0",
                "ips": [
                    "10.128.2.164"
                ],
                "default": true,
                "dns": {}
            }]
          openshift.io/scc: anyuid
        creationTimestamp: "2022-08-10T07:59:03Z"
        labels:
          run: test
        name: test
        namespace: openshift-adp
        resourceVersion: "140003544"
        uid: 152a7070-2852-428c-9779-fa665b6469d8
      spec:
        containers:
        - image: brew.registry.redhat.io/rh-osbs/iib:289368
          imagePullPolicy: IfNotPresent
          livenessProbe:
            exec:
              command:
              - grpc_health_probe
              - -addr=:50051
            failureThreshold: 3
            initialDelaySeconds: 10
            periodSeconds: 20
            successThreshold: 1
            timeoutSeconds: 5
          name: test
          ports:
          - containerPort: 50051
            name: grpc
            protocol: TCP
          readinessProbe:
            exec:
              command:
              - grpc_health_probe
              - -addr=:50051
            failureThreshold: 3
            initialDelaySeconds: 5
            periodSeconds: 20
            successThreshold: 1
            timeoutSeconds: 5
          resources: {}
          securityContext:
            capabilities:
              drop:
              - MKNOD
          terminationMessagePath: /dev/termination-log
          terminationMessagePolicy: File
          volumeMounts:
          - mountPath: /var/run/secrets/kubernetes.io/serviceaccount
            name: kube-api-access-8vglv
            readOnly: true
        dnsPolicy: ClusterFirst
        enableServiceLinks: true
        imagePullSecrets:
        - name: default-dockercfg-2mh5d
        nodeName: tok04-worker-0.rdr-sg-e2e-3550.ibm.com
        preemptionPolicy: PreemptLowerPriority
        priority: 0
        restartPolicy: Always
        schedulerName: default-scheduler
        securityContext:
          seLinuxOptions:
            level: s0:c28,c2
        serviceAccount: default
        serviceAccountName: default
        terminationGracePeriodSeconds: 30
        tolerations:
        - effect: NoExecute
          key: node.kubernetes.io/not-ready
          operator: Exists
          tolerationSeconds: 300
        - effect: NoExecute
          key: node.kubernetes.io/unreachable
          operator: Exists
          tolerationSeconds: 300
        volumes:
        - name: kube-api-access-8vglv
          projected:
            defaultMode: 420
            sources:
            - serviceAccountToken:
                expirationSeconds: 3607
                path: token
            - configMap:
                items:
                - key: ca.crt
                  path: ca.crt
                name: kube-root-ca.crt
            - downwardAPI:
                items:
                - fieldRef:
                    apiVersion: v1
                    fieldPath: metadata.namespace
                  path: namespace
            - configMap:
                items:
                - key: service-ca.crt
                  path: service-ca.crt
                name: openshift-service-ca.crt
      status:
        conditions:
        - lastProbeTime: null
          lastTransitionTime: "2022-08-10T07:59:03Z"
          status: "True"
          type: Initialized
        - lastProbeTime: null
          lastTransitionTime: "2022-08-10T08:00:03Z"
          status: "True"
          type: Ready
        - lastProbeTime: null
          lastTransitionTime: "2022-08-10T08:00:03Z"
          status: "True"
          type: ContainersReady
        - lastProbeTime: null
          lastTransitionTime: "2022-08-10T07:59:03Z"
          status: "True"
          type: PodScheduled
        containerStatuses:
        - containerID: cri-o://53a3ab8973351c0da078703e48214c9f1112da988fe1e854431a9bb222e6ef79
          image: brew.registry.redhat.io/rh-osbs/iib:289368
          imageID: brew.registry.redhat.io/rh-osbs/iib@sha256:df1c5a1f81f6b47c5c05f0c2a93b8442ed9013bf1458bb7105769145ac113f1a
          lastState: {}
          name: test
          ready: true
          restartCount: 0
          started: true
          state:
            running:
              startedAt: "2022-08-10T07:59:05Z"
        hostIP: 193.168.200.112
        phase: Running
        podIP: 10.128.2.164
        podIPs:
        - ip: 10.128.2.164
        qosClass: BestEffort   
        startTime: "2022-08-10T07:59:03Z" 

       

       

       

       

              rjohnson@redhat.com Rayford Johnson
              sgarudi Sonia Garudi
              Votes:
              0 Vote for this issue
              Watchers:
              6 Start watching this issue

                Created:
                Updated:
                Resolved: