-
Bug
-
Resolution: Done
-
High
-
OCP 4.13, OCP 4.14
-
None
-
False
-
None
-
False
-
-
Bug Fix
-
Done
-
-
-
Kata Sprint #245
-
0
-
0
Description
Start a specific pod on OCP 4.13.0-rc.5 (based on RHCOS 9); it stays in ContainerCreating seemingly indefinitely. Other pods have worked; this pod worked on 4.12 and earlier builds of OCP 4.13 that based on RHCOS 8 (4.13.0-ec).
Note that RHEL/RHCOS 9 detects disks asynchronously, so that the /dev/sdX names cannot be relied on; the names in e. g. /dev/disk/by-id must be used.
Steps to reproduce
<What actions did you take to hit the bug?>
1. Node has at least two disks with LSO/ODF installed.
2. oc apply the attached pod
Expected result
Pod should start and run correctly.
Actual result
Pod is created but never transitions from ContainerCreating to Running.
Impact
Unable to run the pod in question.
Env
OCP 4.13.0-rc.5, kata-containers-3.0.2-5.el9.x86_64, 3 masters+3 workers in IBM cloud (x86) with two attached disks on worker, ODF operator 4.12.2-rhodf with LSO and/or ODF in use. Fails with both Kata operator 1.3 and 1.4.
Additional helpful info
Have must-gather, but too large to attach.
Pod:
[root@ebattat-perf-ci-server0 ~]# oc get pod -n benchmark-runner
NAME READY STATUS RESTARTS AGE
vdbench-kata-2ac4cc43 0/1 ContainerCreating 0 3h55m
[root@ebattat-perf-ci-server0 ~]# oc get pod -n benchmark-runner -oyaml
apiVersion: v1
items:
- apiVersion: v1
kind: Pod
metadata:
annotations:
io.katacontainers.config.hypervisor.virtio_fs_extra_args: '["-o","allow_direct_io","--thread-pool-size=16"]'
k8s.ovn.org/pod-networks: '{"default":{"ip_addresses":["10.128.3.22/23"],"mac_address":"0a:58:0a:80:03:16","gateway_ips":["10.128.2.1"],"ip_address":"10.128.3.22/23","gateway_ip":"10.128.2.1"}}'
k8s.v1.cni.cncf.io/network-status: |-
[{
"name": "ovn-kubernetes",
"interface": "eth0",
"ips": [
"10.128.3.22"
],
"mac": "0a:58:0a:80:03:16",
"default": true,
"dns": {}
}]
openshift.io/scc: privileged
creationTimestamp: "2023-04-27T15:02:42Z"
labels:
app: vdbench-2ac4cc43
benchmark-runner-workload: vdbench
benchmark-uuid: 2ac4cc43-3aaa-4dd6-8599-b387ee14005b
type: vdbench-kata-2ac4cc43
name: vdbench-kata-2ac4cc43
namespace: benchmark-runner
resourceVersion: "8417690"
uid: 471beea6-30ae-4533-835d-08f6a2970d3b
spec:
containers:
- args:
- -c
- $WORKLOAD_METHOD
command:
- /bin/bash
env:
- name: BLOCK_SIZES
value: 64,oltp1
- name: IO_OPERATION
value: write,oltp1
- name: IO_THREADS
value: 16,3
- name: FILES_IO
value: random,oltp1
- name: IO_RATE
value: max,max
- name: MIX_PRECENTAGE
- name: DURATION
value: "20"
- name: PAUSE
value: "0"
- name: WARMUP
value: "20"
- name: FILES_SELECTION
value: random
- name: COMPRESSION_RATIO
value: "2"
- name: RUN_FILLUP
value: "yes"
- name: LOGS_DIR
value: /workload/
- name: DIRECTORIES
value: "100"
- name: FILES_PER_DIRECTORY
value: "10"
- name: SIZE_PER_FILE
value: "5"
- name: REDIS_HOST
value: redis-deployment.benchmark-runner.svc.cluster.local
- name: WORKLOAD_METHOD
value: /vdbench/vdbench_runner.sh
- name: TIMEOUT
value: "3600"
image: quay.io/ebattat/centos-stream8-vdbench5.04.07-pod:v1.0.13
imagePullPolicy: IfNotPresent
name: vdbench-pod
resources:
limits:
cpu: "2"
requests:
cpu: 10m
memory: 4Gi
securityContext:
capabilities:
drop:
- MKNOD
terminationMessagePath: /dev/termination-log
terminationMessagePolicy: File
volumeMounts:
- mountPath: /workload
name: vdbench-pod-pvc-claim
- mountPath: /var/run/secrets/kubernetes.io/serviceaccount
name: kube-api-access-f5thb
readOnly: true
dnsPolicy: ClusterFirst
enableServiceLinks: true
imagePullSecrets:
- name: default-dockercfg-92zhb
nodeName: worker-1
nodeSelector:
kubernetes.io/hostname: worker-1
node-role.kubernetes.io/kata-oc: ""
overhead:
cpu: 250m
memory: 350Mi
preemptionPolicy: PreemptLowerPriority
priority: 0
restartPolicy: Never
runtimeClassName: kata
schedulerName: default-scheduler
securityContext:
seLinuxOptions:
level: s0:c47,c9
serviceAccount: default
serviceAccountName: default
terminationGracePeriodSeconds: 30
tolerations:
- effect: NoExecute
key: node.kubernetes.io/not-ready
operator: Exists
tolerationSeconds: 300
- effect: NoExecute
key: node.kubernetes.io/unreachable
operator: Exists
tolerationSeconds: 300
- effect: NoSchedule
key: node.kubernetes.io/memory-pressure
operator: Exists
volumes:
- name: vdbench-pod-pvc-claim
persistentVolumeClaim:
claimName: vdbench-pod-pvc-claim
- name: kube-api-access-f5thb
projected:
defaultMode: 420
sources:
- serviceAccountToken:
expirationSeconds: 3607
path: token
- configMap:
items:
- key: ca.crt
path: ca.crt
name: kube-root-ca.crt
- downwardAPI:
items:
- fieldRef:
apiVersion: v1
fieldPath: metadata.namespace
path: namespace
- configMap:
items:
- key: service-ca.crt
path: service-ca.crt
name: openshift-service-ca.crt
status:
conditions:
- lastProbeTime: null
lastTransitionTime: "2023-04-27T15:02:42Z"
status: "True"
type: Initialized
- lastProbeTime: null
lastTransitionTime: "2023-04-27T15:02:42Z"
message: 'containers with unready status: [vdbench-pod]'
reason: ContainersNotReady
status: "False"
type: Ready
- lastProbeTime: null
lastTransitionTime: "2023-04-27T15:02:42Z"
message: 'containers with unready status: [vdbench-pod]'
reason: ContainersNotReady
status: "False"
type: ContainersReady
- lastProbeTime: null
lastTransitionTime: "2023-04-27T15:02:42Z"
status: "True"
type: PodScheduled
containerStatuses:
- image: quay.io/ebattat/centos-stream8-vdbench5.04.07-pod:v1.0.13
imageID: ""
lastState: {}
name: vdbench-pod
ready: false
restartCount: 0
started: false
state:
waiting:
reason: ContainerCreating
hostIP: 10.36.200.199
phase: Pending
qosClass: Burstable
startTime: "2023-04-27T15:02:42Z"
kind: List
metadata:
resourceVersion: ""