-
Bug
-
Resolution: Won't Do
-
Major
-
OADP 1.1.0
-
None
-
Quality / Stability / Reliability
-
False
-
-
False
-
ToDo
-
0
-
Very Likely
-
0
-
None
-
Unset
-
Unknown
-
No
Description of problem:
The operator installation using the automation script from https://gitlab.cee.redhat.com/migrationqe/oadp-qe-automation fails on ppc64le OCP 4.11 cluster.
The installation fails during creation of catalogsource pod using the index_image from datagrepper :
+ /usr/local/bin/oc wait pods --namespace=openshift-marketplace --selector=olm.catalogSource=prestage-operators --for=condition=Ready --timeout=5m error: timed out waiting for the condition on pods/prestage-operators-vdcpk [root@rdr-sg-e2e-3550-tok04-bastion-0] oc describe pod prestage-operators-vdcpk -n openshift-marketplace . . Events: Type Reason Age From Message ---- ------ ---- ---- ------- Normal Scheduled 48s default-scheduler Successfully assigned openshift-marketplace/prestage-operators-vdcpk to tok04-worker-0.rdr-sg-e2e-3550.ibm.com by tok04-master-1.rdr-sg-e2e-3550.ibm.com Normal AddedInterface 46s multus Add eth0 [10.128.2.135/23] from openshift-sdn Normal Pulling 46s kubelet Pulling image "brew.registry.redhat.io/rh-osbs/iib:289368" Normal Pulled 35s kubelet Successfully pulled image "brew.registry.redhat.io/rh-osbs/iib:289368" in 11.272099099s Normal Created 34s kubelet Created container registry-server Normal Started 34s kubelet Started container registry-server Warning Unhealthy 7s (x3 over 27s) kubelet Readiness probe failed: timeout: failed to connect service ":50051" within 1s Warning Unhealthy 7s (x2 over 17s) kubelet Liveness probe failed: timeout: failed to connect service ":50051" within 1s
Pod details -
[root@rdr-sg-e2e-3550-tok04-bastion-0 oadp-qe-automation]# oc get pods prestage-operators-vdcpk -n openshift-marketplace -oyaml apiVersion: v1 kind: Pod metadata: annotations: cluster-autoscaler.kubernetes.io/safe-to-evict: "true" k8s.v1.cni.cncf.io/network-status: |- [{ "name": "openshift-sdn", "interface": "eth0", "ips": [ "10.128.2.144" ], "default": true, "dns": {} }] k8s.v1.cni.cncf.io/networks-status: |- [{ "name": "openshift-sdn", "interface": "eth0", "ips": [ "10.128.2.144" ], "default": true, "dns": {} }] kubectl.kubernetes.io/last-applied-configuration: | {"apiVersion":"operators.coreos.com/v1alpha1","kind":"CatalogSource","metadata":{"annotations":{},"name":"prestage-operators","namespace":"openshift-marketplace"},"spec":{"displayName":"Custom Operator Catalog","image":"brew.registry.redhat.io/rh-osbs/iib:289368","publisher":"grpc","sourceType":"grpc"}} openshift.io/scc: anyuid creationTimestamp: "2022-08-10T06:29:05Z" generateName: prestage-operators- labels: olm.catalogSource: prestage-operators olm.pod-spec-hash: 7f58fdb856 name: prestage-operators-vdcpk namespace: openshift-marketplace ownerReferences: - apiVersion: operators.coreos.com/v1alpha1 blockOwnerDeletion: false controller: false kind: CatalogSource name: prestage-operators uid: 8276c1c1-0f81-4243-9eee-062827775f01 resourceVersion: "140053669" uid: 28913c9b-0ecb-49e8-9842-b35a8392fdab spec: containers: - image: brew.registry.redhat.io/rh-osbs/iib:289368 imagePullPolicy: Always livenessProbe: exec: command: - grpc_health_probe - -addr=:50051 failureThreshold: 3 initialDelaySeconds: 10 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 5 name: registry-server ports: - containerPort: 50051 name: grpc protocol: TCP readinessProbe: exec: command: - grpc_health_probe - -addr=:50051 failureThreshold: 3 initialDelaySeconds: 5 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 5 resources: requests: cpu: 10m memory: 50Mi securityContext: capabilities: drop: - MKNOD readOnlyRootFilesystem: false terminationMessagePath: /dev/termination-log terminationMessagePolicy: FallbackToLogsOnError volumeMounts: - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-h96rj readOnly: true dnsPolicy: ClusterFirst enableServiceLinks: true nodeName: tok04-worker-0.rdr-sg-e2e-3550.ibm.com nodeSelector: kubernetes.io/os: linux preemptionPolicy: PreemptLowerPriority priority: 0 restartPolicy: Always schedulerName: default-scheduler securityContext: seLinuxOptions: level: s0:c15,c10 serviceAccount: prestage-operators serviceAccountName: prestage-operators terminationGracePeriodSeconds: 30 tolerations: - effect: NoExecute key: node.kubernetes.io/not-ready operator: Exists tolerationSeconds: 300 - effect: NoExecute key: node.kubernetes.io/unreachable operator: Exists tolerationSeconds: 300 - effect: NoSchedule key: node.kubernetes.io/memory-pressure operator: Exists volumes: - name: kube-api-access-h96rj projected: defaultMode: 420 sources: - serviceAccountToken: expirationSeconds: 3607 path: token - configMap: items: - key: ca.crt path: ca.crt name: kube-root-ca.crt - downwardAPI: items: - fieldRef: apiVersion: v1 fieldPath: metadata.namespace path: namespace - configMap: items: - key: service-ca.crt path: service-ca.crt name: openshift-service-ca.crt status: conditions: - lastProbeTime: null lastTransitionTime: "2022-08-10T06:29:05Z" status: "True" type: Initialized - lastProbeTime: null lastTransitionTime: "2022-08-10T06:29:05Z" message: 'containers with unready status: [registry-server]' reason: ContainersNotReady status: "False" type: Ready - lastProbeTime: null lastTransitionTime: "2022-08-10T06:29:05Z" message: 'containers with unready status: [registry-server]' reason: ContainersNotReady status: "False" type: ContainersReady - lastProbeTime: null lastTransitionTime: "2022-08-10T06:29:05Z" status: "True" type: PodScheduled containerStatuses: - containerID: cri-o://b24929e8349f61feb8497fd6d8b34a6993636a7f01bb366410adb0365e96af25 image: brew.registry.redhat.io/rh-osbs/iib:289368 imageID: brew.registry.redhat.io/rh-osbs/iib@sha256:df1c5a1f81f6b47c5c05f0c2a93b8442ed9013bf1458bb7105769145ac113f1a lastState: terminated: containerID: cri-o://ff1bebc05f76f7be391e786cf9fa7c36c40ac49f6b503dc34c2c481853185673 exitCode: 2 finishedAt: "2022-08-10T08:34:16Z" reason: Error startedAt: "2022-08-10T08:33:39Z" name: registry-server ready: false restartCount: 44 started: true state: running: startedAt: "2022-08-10T08:39:27Z" hostIP: 193.168.200.112 phase: Running podIP: 10.128.2.144 podIPs: - ip: 10.128.2.144 qosClass: Burstable startTime: "2022-08-10T06:29:05Z"
Version-Release number of selected component (if applicable):
Tested for OADP 1.1.0-59 on OCP 4.11 ppc64le
How reproducible:
Steps to Reproduce:
1. Since bastion node is not connected to Redhat VPN, obtained the IIB index_image on Windows machine connected to Redhat VPN
ubuntu@DESKTOP-Q4AHE7B:~$ curl -k https://datagrepper.engineering.redhat.com/raw ?topic\=/topic/VirtualTopic.eng.ci.redhat-container-image.index.built\&contains\=oadp-operator-bundle-container-1.1\&rows_per_page\=1 | jq -r '.raw_messages[0].msg.index.index_image' % Total % Received % Xferd Average Speed Time Time Time Current Dload Upload Total Spent Left Speed 100 3388 100 3388 0 0 1906 0 0:00:01 0:00:01 -::- 1905 registry-proxy.engineering.redhat.com/rh-osbs/iib:289368
2. Clone automation repository and copied to bastion node
git clone https://gitlab.cee.redhat.com/migrationqe/oadp-qe-automation
3. Execute deploy_oadp.sh script to install the operator -
cd oadp-qe-automation export REPOSITORY='prestage' export IP_APPROVAL='Manual' export STREAM='downstream' export OADP_VERSION="1.1.0" export IIB_IMAGE='iib:289368' bash oadp/deploy_oadp.sh
Actual results:
The pod fails with Liveness and Readiness probe failures
Expected results:
Additional info:
Tried creating a test pod using the index_image and set the periodSeconds to 20 which started successfully -
[root@rdr-sg-e2e-3550-tok04-bastion-0 oadp-qe-automation]# oc get pod/test -oyaml
apiVersion: v1
kind: Pod
metadata:
annotations:
k8s.v1.cni.cncf.io/network-status: |-
[{
"name": "openshift-sdn",
"interface": "eth0",
"ips": [
"10.128.2.164"
],
"default": true,
"dns": {}
}]
k8s.v1.cni.cncf.io/networks-status: |-
[{
"name": "openshift-sdn",
"interface": "eth0",
"ips": [
"10.128.2.164"
],
"default": true,
"dns": {}
}]
openshift.io/scc: anyuid
creationTimestamp: "2022-08-10T07:59:03Z"
labels:
run: test
name: test
namespace: openshift-adp
resourceVersion: "140003544"
uid: 152a7070-2852-428c-9779-fa665b6469d8
spec:
containers:
- image: brew.registry.redhat.io/rh-osbs/iib:289368
imagePullPolicy: IfNotPresent
livenessProbe:
exec:
command:
- grpc_health_probe
- -addr=:50051
failureThreshold: 3
initialDelaySeconds: 10
periodSeconds: 20
successThreshold: 1
timeoutSeconds: 5
name: test
ports:
- containerPort: 50051
name: grpc
protocol: TCP
readinessProbe:
exec:
command:
- grpc_health_probe
- -addr=:50051
failureThreshold: 3
initialDelaySeconds: 5
periodSeconds: 20
successThreshold: 1
timeoutSeconds: 5
resources: {}
securityContext:
capabilities:
drop:
- MKNOD
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /var/run/secrets/kubernetes.io/serviceaccount
name: kube-api-access-8vglv
readOnly: true
dnsPolicy: ClusterFirst
enableServiceLinks: true
imagePullSecrets:
- name: default-dockercfg-2mh5d
nodeName: tok04-worker-0.rdr-sg-e2e-3550.ibm.com
preemptionPolicy: PreemptLowerPriority
priority: 0
restartPolicy: Always
schedulerName: default-scheduler
securityContext:
seLinuxOptions:
level: s0:c28,c2
serviceAccount: default
serviceAccountName: default
terminationGracePeriodSeconds: 30
tolerations:
- effect: NoExecute
key: node.kubernetes.io/not-ready
operator: Exists
tolerationSeconds: 300
- effect: NoExecute
key: node.kubernetes.io/unreachable
operator: Exists
tolerationSeconds: 300
volumes:
- name: kube-api-access-8vglv
projected:
defaultMode: 420
sources:
- serviceAccountToken:
expirationSeconds: 3607
path: token
- configMap:
items:
- key: ca.crt
path: ca.crt
name: kube-root-ca.crt
- downwardAPI:
items:
- fieldRef:
apiVersion: v1
fieldPath: metadata.namespace
path: namespace
- configMap:
items:
- key: service-ca.crt
path: service-ca.crt
name: openshift-service-ca.crt
status:
conditions:
- lastProbeTime: null
lastTransitionTime: "2022-08-10T07:59:03Z"
status: "True"
type: Initialized
- lastProbeTime: null
lastTransitionTime: "2022-08-10T08:00:03Z"
status: "True"
type: Ready
- lastProbeTime: null
lastTransitionTime: "2022-08-10T08:00:03Z"
status: "True"
type: ContainersReady
- lastProbeTime: null
lastTransitionTime: "2022-08-10T07:59:03Z"
status: "True"
type: PodScheduled
containerStatuses:
- containerID: cri-o://53a3ab8973351c0da078703e48214c9f1112da988fe1e854431a9bb222e6ef79
image: brew.registry.redhat.io/rh-osbs/iib:289368
imageID: brew.registry.redhat.io/rh-osbs/iib@sha256:df1c5a1f81f6b47c5c05f0c2a93b8442ed9013bf1458bb7105769145ac113f1a
lastState: {}
name: test
ready: true
restartCount: 0
started: true
state:
running:
startedAt: "2022-08-10T07:59:05Z"
hostIP: 193.168.200.112
phase: Running
podIP: 10.128.2.164
podIPs:
- ip: 10.128.2.164
qosClass: BestEffort
startTime: "2022-08-10T07:59:03Z"