-
Bug
-
Resolution: Won't Do
-
Major
-
OADP 1.1.0
-
None
-
False
-
-
False
-
ToDo
-
0
-
0
-
Very Likely
-
0
-
None
-
Unset
-
Unknown
-
No
Description of problem:
The operator installation using the automation script from https://gitlab.cee.redhat.com/migrationqe/oadp-qe-automation fails on ppc64le OCP 4.11 cluster.
The installation fails during creation of catalogsource pod using the index_image from datagrepper :
+ /usr/local/bin/oc wait pods --namespace=openshift-marketplace --selector=olm.catalogSource=prestage-operators --for=condition=Ready --timeout=5m error: timed out waiting for the condition on pods/prestage-operators-vdcpk [root@rdr-sg-e2e-3550-tok04-bastion-0] oc describe pod prestage-operators-vdcpk -n openshift-marketplace . . Events: Type Reason Age From Message ---- ------ ---- ---- ------- Normal Scheduled 48s default-scheduler Successfully assigned openshift-marketplace/prestage-operators-vdcpk to tok04-worker-0.rdr-sg-e2e-3550.ibm.com by tok04-master-1.rdr-sg-e2e-3550.ibm.com Normal AddedInterface 46s multus Add eth0 [10.128.2.135/23] from openshift-sdn Normal Pulling 46s kubelet Pulling image "brew.registry.redhat.io/rh-osbs/iib:289368" Normal Pulled 35s kubelet Successfully pulled image "brew.registry.redhat.io/rh-osbs/iib:289368" in 11.272099099s Normal Created 34s kubelet Created container registry-server Normal Started 34s kubelet Started container registry-server Warning Unhealthy 7s (x3 over 27s) kubelet Readiness probe failed: timeout: failed to connect service ":50051" within 1s Warning Unhealthy 7s (x2 over 17s) kubelet Liveness probe failed: timeout: failed to connect service ":50051" within 1s
Pod details -
[root@rdr-sg-e2e-3550-tok04-bastion-0 oadp-qe-automation]# oc get pods prestage-operators-vdcpk -n openshift-marketplace -oyaml apiVersion: v1 kind: Pod metadata: annotations: cluster-autoscaler.kubernetes.io/safe-to-evict: "true" k8s.v1.cni.cncf.io/network-status: |- [{ "name": "openshift-sdn", "interface": "eth0", "ips": [ "10.128.2.144" ], "default": true, "dns": {} }] k8s.v1.cni.cncf.io/networks-status: |- [{ "name": "openshift-sdn", "interface": "eth0", "ips": [ "10.128.2.144" ], "default": true, "dns": {} }] kubectl.kubernetes.io/last-applied-configuration: | {"apiVersion":"operators.coreos.com/v1alpha1","kind":"CatalogSource","metadata":{"annotations":{},"name":"prestage-operators","namespace":"openshift-marketplace"},"spec":{"displayName":"Custom Operator Catalog","image":"brew.registry.redhat.io/rh-osbs/iib:289368","publisher":"grpc","sourceType":"grpc"}} openshift.io/scc: anyuid creationTimestamp: "2022-08-10T06:29:05Z" generateName: prestage-operators- labels: olm.catalogSource: prestage-operators olm.pod-spec-hash: 7f58fdb856 name: prestage-operators-vdcpk namespace: openshift-marketplace ownerReferences: - apiVersion: operators.coreos.com/v1alpha1 blockOwnerDeletion: false controller: false kind: CatalogSource name: prestage-operators uid: 8276c1c1-0f81-4243-9eee-062827775f01 resourceVersion: "140053669" uid: 28913c9b-0ecb-49e8-9842-b35a8392fdab spec: containers: - image: brew.registry.redhat.io/rh-osbs/iib:289368 imagePullPolicy: Always livenessProbe: exec: command: - grpc_health_probe - -addr=:50051 failureThreshold: 3 initialDelaySeconds: 10 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 5 name: registry-server ports: - containerPort: 50051 name: grpc protocol: TCP readinessProbe: exec: command: - grpc_health_probe - -addr=:50051 failureThreshold: 3 initialDelaySeconds: 5 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 5 resources: requests: cpu: 10m memory: 50Mi securityContext: capabilities: drop: - MKNOD readOnlyRootFilesystem: false terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-h96rj readOnly: true dnsPolicy: ClusterFirst enableServiceLinks: true nodeName: tok04-worker-0.rdr-sg-e2e-3550.ibm.com nodeSelector: kubernetes.io/os: linux preemptionPolicy: PreemptLowerPriority priority: 0 restartPolicy: Always schedulerName: default-scheduler securityContext: seLinuxOptions: level: s0:c15,c10 serviceAccount: prestage-operators serviceAccountName: prestage-operators terminationGracePeriodSeconds: 30 tolerations: - effect: NoExecute key: node.kubernetes.io/not-ready operator: Exists tolerationSeconds: 300 - effect: NoExecute key: node.kubernetes.io/unreachable operator: Exists tolerationSeconds: 300 - effect: NoSchedule key: node.kubernetes.io/memory-pressure operator: Exists volumes: - name: kube-api-access-h96rj projected: defaultMode: 420 sources: - serviceAccountToken: expirationSeconds: 3607 path: token - configMap: items: - key: ca.crt path: ca.crt name: kube-root-ca.crt - downwardAPI: items: - fieldRef: apiVersion: v1 fieldPath: metadata.namespace path: namespace - configMap: items: - key: service-ca.crt path: service-ca.crt name: openshift-service-ca.crt status: conditions: - lastProbeTime: null lastTransitionTime: "2022-08-10T06:29:05Z" status: "True" type: Initialized - lastProbeTime: null lastTransitionTime: "2022-08-10T06:29:05Z" message: 'containers with unready status: [registry-server]' reason: ContainersNotReady status: "False" type: Ready - lastProbeTime: null lastTransitionTime: "2022-08-10T06:29:05Z" message: 'containers with unready status: [registry-server]' reason: ContainersNotReady status: "False" type: ContainersReady - lastProbeTime: null lastTransitionTime: "2022-08-10T06:29:05Z" status: "True" type: PodScheduled containerStatuses: - containerID: cri-o://b24929e8349f61feb8497fd6d8b34a6993636a7f01bb366410adb0365e96af25 image: brew.registry.redhat.io/rh-osbs/iib:289368 imageID: brew.registry.redhat.io/rh-osbs/iib@sha256:df1c5a1f81f6b47c5c05f0c2a93b8442ed9013bf1458bb7105769145ac113f1a lastState: terminated: containerID: cri-o://ff1bebc05f76f7be391e786cf9fa7c36c40ac49f6b503dc34c2c481853185673 exitCode: 2 finishedAt: "2022-08-10T08:34:16Z" reason: Error startedAt: "2022-08-10T08:33:39Z" name: registry-server ready: false restartCount: 44 started: true state: running: startedAt: "2022-08-10T08:39:27Z" hostIP: 193.168.200.112 phase: Running podIP: 10.128.2.144 podIPs: - ip: 10.128.2.144 qosClass: Burstable startTime: "2022-08-10T06:29:05Z"
Version-Release number of selected component (if applicable):
Tested for OADP 1.1.0-59 on OCP 4.11 ppc64le
How reproducible:
Steps to Reproduce:
1. Since bastion node is not connected to Redhat VPN, obtained the IIB index_image on Windows machine connected to Redhat VPN
ubuntu@DESKTOP-Q4AHE7B:~$ curl -k https://datagrepper.engineering.redhat.com/raw ?topic\=/topic/VirtualTopic.eng.ci.redhat-container-image.index.built\&contains\=oadp-operator-bundle-container-1.1\&rows_per_page\=1 | jq -r '.raw_messages[0].msg.index.index_image' % Total % Received % Xferd Average Speed Time Time Time Current Dload Upload Total Spent Left Speed 100 3388 100 3388 0 0 1906 0 0:00:01 0:00:01 -::- 1905 registry-proxy.engineering.redhat.com/rh-osbs/iib:289368
2. Clone automation repository and copied to bastion node
git clone https://gitlab.cee.redhat.com/migrationqe/oadp-qe-automation
3. Execute deploy_oadp.sh script to install the operator -
cd oadp-qe-automation export REPOSITORY='prestage' export IP_APPROVAL='Manual' export STREAM='downstream' export OADP_VERSION="1.1.0" export IIB_IMAGE='iib:289368' bash oadp/deploy_oadp.sh
Actual results:
The pod fails with Liveness and Readiness probe failures
Expected results:
Additional info:
Tried creating a test pod using the index_image and set the periodSeconds to 20 which started successfully -
[root@rdr-sg-e2e-3550-tok04-bastion-0 oadp-qe-automation]# oc get pod/test -oyaml apiVersion: v1 kind: Pod metadata: annotations: k8s.v1.cni.cncf.io/network-status: |- [{ "name": "openshift-sdn", "interface": "eth0", "ips": [ "10.128.2.164" ], "default": true, "dns": {} }] k8s.v1.cni.cncf.io/networks-status: |- [{ "name": "openshift-sdn", "interface": "eth0", "ips": [ "10.128.2.164" ], "default": true, "dns": {} }] openshift.io/scc: anyuid creationTimestamp: "2022-08-10T07:59:03Z" labels: run: test name: test namespace: openshift-adp resourceVersion: "140003544" uid: 152a7070-2852-428c-9779-fa665b6469d8 spec: containers: - image: brew.registry.redhat.io/rh-osbs/iib:289368 imagePullPolicy: IfNotPresent livenessProbe: exec: command: - grpc_health_probe - -addr=:50051 failureThreshold: 3 initialDelaySeconds: 10 periodSeconds: 20 successThreshold: 1 timeoutSeconds: 5 name: test ports: - containerPort: 50051 name: grpc protocol: TCP readinessProbe: exec: command: - grpc_health_probe - -addr=:50051 failureThreshold: 3 initialDelaySeconds: 5 periodSeconds: 20 successThreshold: 1 timeoutSeconds: 5 resources: {} securityContext: capabilities: drop: - MKNOD terminationMessagePath: /dev/termination-log terminationMessagePolicy: File volumeMounts: - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-8vglv readOnly: true dnsPolicy: ClusterFirst enableServiceLinks: true imagePullSecrets: - name: default-dockercfg-2mh5d nodeName: tok04-worker-0.rdr-sg-e2e-3550.ibm.com preemptionPolicy: PreemptLowerPriority priority: 0 restartPolicy: Always schedulerName: default-scheduler securityContext: seLinuxOptions: level: s0:c28,c2 serviceAccount: default serviceAccountName: default terminationGracePeriodSeconds: 30 tolerations: - effect: NoExecute key: node.kubernetes.io/not-ready operator: Exists tolerationSeconds: 300 - effect: NoExecute key: node.kubernetes.io/unreachable operator: Exists tolerationSeconds: 300 volumes: - name: kube-api-access-8vglv projected: defaultMode: 420 sources: - serviceAccountToken: expirationSeconds: 3607 path: token - configMap: items: - key: ca.crt path: ca.crt name: kube-root-ca.crt - downwardAPI: items: - fieldRef: apiVersion: v1 fieldPath: metadata.namespace path: namespace - configMap: items: - key: service-ca.crt path: service-ca.crt name: openshift-service-ca.crt status: conditions: - lastProbeTime: null lastTransitionTime: "2022-08-10T07:59:03Z" status: "True" type: Initialized - lastProbeTime: null lastTransitionTime: "2022-08-10T08:00:03Z" status: "True" type: Ready - lastProbeTime: null lastTransitionTime: "2022-08-10T08:00:03Z" status: "True" type: ContainersReady - lastProbeTime: null lastTransitionTime: "2022-08-10T07:59:03Z" status: "True" type: PodScheduled containerStatuses: - containerID: cri-o://53a3ab8973351c0da078703e48214c9f1112da988fe1e854431a9bb222e6ef79 image: brew.registry.redhat.io/rh-osbs/iib:289368 imageID: brew.registry.redhat.io/rh-osbs/iib@sha256:df1c5a1f81f6b47c5c05f0c2a93b8442ed9013bf1458bb7105769145ac113f1a lastState: {} name: test ready: true restartCount: 0 started: true state: running: startedAt: "2022-08-10T07:59:05Z" hostIP: 193.168.200.112 phase: Running podIP: 10.128.2.164 podIPs: - ip: 10.128.2.164 qosClass: BestEffort startTime: "2022-08-10T07:59:03Z"