[root@rdr-ah-412-syd05-bastion-0 ~]# oc get pods NAME READY STATUS RESTARTS AGE netobserv-ebpf-agent-47zrn 1/1 Running 0 42m netobserv-ebpf-agent-bnqph 0/1 CrashLoopBackOff 13 (67s ago) 42m netobserv-ebpf-agent-djp4s 1/1 Running 0 42m netobserv-ebpf-agent-f7b4h 0/1 CrashLoopBackOff 13 (68s ago) 42m netobserv-ebpf-agent-rkxfs 1/1 Running 0 42m [root@rdr-ah-412-syd05-bastion-0 ~]# oc get pods NAME READY STATUS RESTARTS AGE netobserv-ebpf-agent-47zrn 1/1 Running 0 42m netobserv-ebpf-agent-bnqph 0/1 CrashLoopBackOff 13 (69s ago) 42m netobserv-ebpf-agent-djp4s 1/1 Running 0 42m netobserv-ebpf-agent-f7b4h 0/1 CrashLoopBackOff 13 (70s ago) 42m netobserv-ebpf-agent-rkxfs 1/1 Running 0 42m [root@rdr-ah-412-syd05-bastion-0 ~]# oc get pods -o node error: unable to match a printer suitable for the output format "node", allowed formats are: custom-columns,custom-columns-file,go-template,go-template-file,json,jsonpath,jsonpath-as-json,jsonpath-file,name,template,templatefile,wide,yaml [root@rdr-ah-412-syd05-bastion-0 ~]# oc adm top node NAME CPU(cores) CPU% MEMORY(bytes) MEMORY% syd05-master-0.rdr-ah-412.ibm.com 1529m 20% 10385Mi 32% syd05-master-1.rdr-ah-412.ibm.com 2117m 28% 17318Mi 54% syd05-master-2.rdr-ah-412.ibm.com 2172m 28% 10753Mi 34% syd05-worker-0.rdr-ah-412.ibm.com 1248m 3% 14342Mi 16% syd05-worker-1.rdr-ah-412.ibm.com 534m 1% 13571Mi 16% [root@rdr-ah-412-syd05-bastion-0 ~]# oc adm top pod NAME CPU(cores) MEMORY(bytes) netobserv-ebpf-agent-47zrn 110m 287Mi netobserv-ebpf-agent-djp4s 309m 300Mi netobserv-ebpf-agent-rkxfs 154m 288Mi [root@rdr-ah-412-syd05-bastion-0 ~]# -- later increased the memory limits 1000Gi which seemed enough and than reverted back to 800Mi: [root@rdr-ah-412-syd05-bastion-0 ~]# oc get pods -o wide NAME READY STATUS RESTARTS AGE IP NODE NOMINATED NODE READINESS GATES netobserv-ebpf-agent-2q56n 1/1 Running 0 96s 193.168.200.56 syd05-master-0.rdr-ah-412.ibm.com netobserv-ebpf-agent-6vl8q 0/1 CrashLoopBackOff 3 (48s ago) 99s 193.168.200.116 syd05-worker-1.rdr-ah-412.ibm.com netobserv-ebpf-agent-m5xt4 0/1 CrashLoopBackOff 2 (26s ago) 47s 193.168.200.19 syd05-worker-0.rdr-ah-412.ibm.com netobserv-ebpf-agent-pfppx 1/1 Running 0 2m50s 193.168.200.36 syd05-master-1.rdr-ah-412.ibm.com netobserv-ebpf-agent-z4wst 1/1 Running 0 76s 193.168.200.77 syd05-master-2.rdr-ah-412.ibm.com [root@rdr-ah-412-syd05-bastion-0 ~]# Journalctl logs: scope,mems_allowed=15,oom_memcg=/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-pod6b9b7c9e_9112_44cf_abe1_2e9080d33a92.slice,task_memcg=/kubepods.slice/kubepods-burstable.slic> rss:0kB, UID:0 pgtables:86kB oom_score_adj:999 ESCOD: rss pgtables_bytes swapents oom_score_adj name 61 53248 0 -1000 conmon 317 88064 0 999 netobserv-ebpf- ,nodemask=(null),cpuset=crio-2d792400af8847fd4713983f1437baa371e7480aab6e40dce04a43b022f8fe35.scope,mems_allowed=15,oom_memcg=/kubepods.slice/kubepods-burstable.slice/kubepods-burstable-po> process 877021 (netobserv-ebpf-) total-vm:1178240kB, anon-rss:6784kB, file-rss:13504kB, shmem-rss:0kB, UID:0 pgtables:86kB oom_score_adj:999 etobserv-ebpf-), now anon-rss:0kB, file-rss:0kB, shmem-rss:0kB : OOM event received : OOM received : container 877021 exited with status 137 7baa371e7480aab6e40dce04a43b022f8fe35.scope: Succeeded. 7baa371e7480aab6e40dce04a43b022f8fe35.scope: Consumed 644ms CPU time ESCOD swapcached 0 anon_thp 0 file_thp 0 shmem_thp 0 inactive_anon 9502720 active_anon 65536 inactive_file 0 active_file 65536 unevictable 0 slab_reclaimable 102432 slab_unreclaimable 488616 slab 591048 workingset_refault_anon 0 workingset_refault_file 0 workingset_activate_anon 0 workingset_activate_file 0 workingset_restore_anon 0 workingset_restore_file 0 workingset_nodereclaim 0 pgfault 1306 pgmajfault 0 pgrefill 8 pgscan 9 pgsteal 0 pgactivate 9 pgdeactivate 8 pglazyfree 0 pglazyfreed 0 thp_fault_alloc 0 thp_collapse_alloc 0 Jun 20 07:32:15 syd05-worker-1.rdr-ah-412.ibm.com kernel: Tasks state (memory values in pages): Jun 20 07:32:15 syd05-worker-1.rdr-ah-412.ibm.com kernel: [ pid ] uid tgid total_vm rss pgtables_bytes swapents oom_score_adj name Jun 20 07:32:15 syd05-worker-1.rdr-ah-412.ibm.com kernel: [ 876992] 0 876992 1492 61 53248 0 -1000 conmon Jun 20 07:32:15 syd05-worker-1.rdr-ah-412.ibm.com kernel: [ 877021] 0 877021 18410 317 88064 0 999 netobserv-ebpf- Jun 20 07:32:15 syd05-worker-1.rdr-ah-412.ibm.com kernel: oom-kill:constraint=CONSTRAINT_MEMCG,nodemask=(null),cpuset=crio-2d792400af8847fd4713983f1437baa371e7480aab6e40dce04a43b022f8fe35.> Jun 20 07:32:15 syd05-worker-1.rdr-ah-412.ibm.com kernel: Memory cgroup out of memory: Killed process 877021 (netobserv-ebpf-) total-vm:1178240kB, anon-rss:6784kB, file-rss:13504kB, shmem-> Jun 20 07:32:15 syd05-worker-1.rdr-ah-412.ibm.com kernel: oom_reaper: reaped process 877021 (netobserv-ebpf-), now anon-rss:0kB, file-rss:0kB, shmem-rss:0kB Jun 20 07:32:15 syd05-worker-1.rdr-ah-412.ibm.com conmon[876992]: conmon 2d792400af8847fd4713 : OOM event received Jun 20 07:32:15 syd05-worker-1.rdr-ah-412.ibm.com conmon[876992]: conmon 2d792400af8847fd4713 : OOM received Jun 20 07:32:15 syd05-worker-1.rdr-ah-412.ibm.com conmon[876992]: conmon 2d792400af8847fd4713 : container 877021 exited with status 137 Jun 20 07:32:15 syd05-worker-1.rdr-ah-412.ibm.com systemd[1]: crio-2d792400af8847fd4713983f1437baa371e7480aab6e40dce04a43b022f8fe35.scope: Succeeded. Jun 20 07:32:15 syd05-worker-1.rdr-ah-412.ibm.com systemd[1]: crio-2d792400af8847fd4713983f1437baa371e7480aab6e40dce04a43b022f8fe35.scope: Consumed 644ms CPU time ESCOD swapcached 0 anon_thp 0 file_thp 0 shmem_thp 0 inactive_anon 9502720 active_anon 65536 inactive_file 0 active_file 65536 unevictable 0 slab_reclaimable 102432 slab_unreclaimable 488616 slab 591048 workingset_refault_anon 0 workingset_refault_file 0 workingset_activate_anon 0 workingset_activate_file 0 workingset_restore_anon 0 workingset_restore_file 0 workingset_nodereclaim 0 inactive_file 0 active_file 65536 unevictable 0 pgfault 1306 pgmajfault 0 pgrefill 8 pgscan 9 pgsteal 0 pgactivate 9 pgdeactivate 8 pglazyfree 0 pglazyfreed 0 thp_fault_alloc 0 thp_collapse_alloc 0 Jun 20 07:32:15 syd05-worker-1.rdr-ah-412.ibm.com kernel: Tasks state (memory values in pages): Jun 20 07:32:15 syd05-worker-1.rdr-ah-412.ibm.com kernel: [ pid ] uid tgid total_vm rss pgtables_bytes swapents oom_score_adj name Jun 20 07:32:15 syd05-worker-1.rdr-ah-412.ibm.com kernel: [ 876992] 0 876992 1492 61 53248 0 -1000 conmon Jun 20 07:32:15 syd05-worker-1.rdr-ah-412.ibm.com kernel: [ 877021] 0 877021 18410 317 88064 0 999 netobserv-ebpf- Jun 20 07:32:15 syd05-worker-1.rdr-ah-412.ibm.com kernel: oom-kill:constraint=CONSTRAINT_MEMCG,nodemask=(null),cpuset=crio-2d792400af8847fd4713983f1437baa371e7480aab6e40dce04a43b022f8fe35.> Jun 20 07:32:15 syd05-worker-1.rdr-ah-412.ibm.com kernel: Memory cgroup out of memory: Killed process 877021 (netobserv-ebpf-) total-vm:1178240kB, anon-rss:6784kB, file-rss:13504kB, shmem-> Jun 20 07:32:15 syd05-worker-1.rdr-ah-412.ibm.com kernel: oom_reaper: reaped process 877021 (netobserv-ebpf-), now anon-rss:0kB, file-rss:0kB, shmem-rss:0kB Jun 20 07:32:15 syd05-worker-1.rdr-ah-412.ibm.com conmon[876992]: conmon 2d792400af8847fd4713 : OOM event received Jun 20 07:32:15 syd05-worker-1.rdr-ah-412.ibm.com conmon[876992]: conmon 2d792400af8847fd4713 : OOM received Jun 20 07:32:15 syd05-worker-1.rdr-ah-412.ibm.com conmon[876992]: conmon 2d792400af8847fd4713 : container 877021 exited with status 137 Jun 20 07:32:15 syd05-worker-1.rdr-ah-412.ibm.com systemd[1]: crio-2d792400af8847fd4713983f1437baa371e7480aab6e40dce04a43b022f8fe35.scope: Succeeded. Jun 20 07:32:15 syd05-worker-1.rdr-ah-412.ibm.com systemd[1]: crio-2d792400af8847fd4713983f1437baa371e7480aab6e40dce04a43b022f8fe35.scope: Consumed 644ms CPU time Jun 20 07:32:15 syd05-worker-1.rdr-ah-412.ibm.com systemd[1]: crio-conmon-2d792400af8847fd4713983f1437baa371e7480aab6e40dce04a43b022f8fe35.scope: Succeeded. Jun 20 07:32:15 syd05-worker-1.rdr-ah-412.ibm.com systemd[1]: crio-conmon-2d792400af8847fd4713983f1437baa371e7480aab6e40dce04a43b022f8fe35.scope: Consumed 29ms CPU time Jun 20 07:32:15 syd05-worker-1.rdr-ah-412.ibm.com kubenswrapper[3379]: I0620 07:32:15.448527 3379 kubelet.go:2157] "SyncLoop (PLEG): event for pod" pod="e2e-test-netobserv-6sstp-privile> Jun 20 07:32:15 syd05-worker-1.rdr-ah-412.ibm.com kubenswrapper[3379]: I0620 07:32:15.449535 3379 kubelet.go:2157] "SyncLoop (PLEG): event for pod" pod="e2e-test-netobserv-6sstp/flowlog> Jun 20 07:32:16 syd05-worker-1.rdr-ah-412.ibm.com kubenswrapper[3379]: I0620 07:32:16.453746 3379 generic.go:296] "Generic (PLEG): container finished" podID=6b9b7c9e-9112-44cf-abe1-2e90> Jun 20 07:32:16 syd05-worker-1.rdr-ah-412.ibm.com kubenswrapper[3379]: I0620 07:32:16.453815 3379 kubelet.go:2157] "SyncLoop (PLEG): event for pod" pod="e2e-test-netobserv-6sstp-privile> Jun 20 07:32:16 syd05-worker-1.rdr-ah-412.ibm.com kubenswrapper[3379]: I0620 07:32:16.454121 3379 scope.go:115] "RemoveContainer" containerID="2d792400af8847fd4713983f1437baa371e7480aab> Jun 20 07:32:16 syd05-worker-1.rdr-ah-412.ibm.com crio[3334]: time="2023-06-20 07:32:16.454644100Z" level=info msg="Checking image status: registry.redhat.io/network-observability/network- ESCOD inactive_file 0 active_file 65536 unevictable 0 slab_reclaimable 102432 slab_unreclaimable 488616 slab 591048 workingset_refault_anon 0 workingset_refault_file 0 workingset_activate_anon 0 workingset_activate_file 0 workingset_restore_anon 0 workingset_restore_file 0 workingset_nodereclaim 0 pgfault 1306 pgmajfault 0 pgrefill 8 pgscan 9 pgsteal 0 pgactivate 9 pgdeactivate 8 pglazyfree 0 pglazyfreed 0 thp_fault_alloc 0 thp_collapse_alloc 0 Jun 20 07:32:15 syd05-worker-1.rdr-ah-412.ibm.com kernel: Tasks state (memory values in pages): Jun 20 07:32:15 syd05-worker-1.rdr-ah-412.ibm.com kernel: [ pid ] uid tgid total_vm rss pgtables_bytes swapents oom_score_adj name Jun 20 07:32:15 syd05-worker-1.rdr-ah-412.ibm.com kernel: [ 876992] 0 876992 1492 61 53248 0 -1000 conmon Jun 20 07:32:15 syd05-worker-1.rdr-ah-412.ibm.com kernel: [ 877021] 0 877021 18410 317 88064 0 999 netobserv-ebpf- Jun 20 07:32:15 syd05-worker-1.rdr-ah-412.ibm.com kernel: oom-kill:constraint=CONSTRAINT_MEMCG,nodemask=(null),cpuset=crio-2d792400af8847fd4713983f1437baa371e7480aab6e40dce04a43b022f8fe35.> Jun 20 07:32:15 syd05-worker-1.rdr-ah-412.ibm.com kernel: Memory cgroup out of memory: Killed process 877021 (netobserv-ebpf-) total-vm:1178240kB, anon-rss:6784kB, file-rss:13504kB, shmem-> Jun 20 07:32:15 syd05-worker-1.rdr-ah-412.ibm.com kernel: oom_reaper: reaped process 877021 (netobserv-ebpf-), now anon-rss:0kB, file-rss:0kB, shmem-rss:0kB Jun 20 07:32:15 syd05-worker-1.rdr-ah-412.ibm.com conmon[876992]: conmon 2d792400af8847fd4713 : OOM event received Jun 20 07:32:15 syd05-worker-1.rdr-ah-412.ibm.com conmon[876992]: conmon 2d792400af8847fd4713 : OOM received Jun 20 07:32:15 syd05-worker-1.rdr-ah-412.ibm.com conmon[876992]: conmon 2d792400af8847fd4713 : container 877021 exited with status 137 Jun 20 07:32:15 syd05-worker-1.rdr-ah-412.ibm.com systemd[1]: crio-2d792400af8847fd4713983f1437baa371e7480aab6e40dce04a43b022f8fe35.scope: Succeeded. Jun 20 07:32:15 syd05-worker-1.rdr-ah-412.ibm.com systemd[1]: crio-2d792400af8847fd4713983f1437baa371e7480aab6e40dce04a43b022f8fe35.scope: Consumed 644ms CPU time Jun 20 07:32:15 syd05-worker-1.rdr-ah-412.ibm.com systemd[1]: crio-conmon-2d792400af8847fd4713983f1437baa371e7480aab6e40dce04a43b022f8fe35.scope: Succeeded. Jun 20 07:32:18 syd05-worker-1.rdr-ah-412.ibm.com kubenswrapper[3379]: I0620 07:32:18.461283 3379 generic.go:296] "Generic (PLEG): container finished" podID=6b9b7c9e-9112-44cf-abe1-2e90 0d33a92 containerID="42da8624bca20f73fcb43e3dde876f8f92e903a19f25879386594907088a4e64" exitCode=137 Jun 20 07:32:18 syd05-worker-1.rdr-ah-412.ibm.com kubenswrapper[3379]: I0620 07:32:18.461327 3379 kubelet.go:2157] "SyncLoop (PLEG): event for pod" pod="e2e-test-netobserv-6sstp-privile ed/netobserv-ebpf-agent-db57v" event=&{ID:6b9b7c9e-9112-44cf-abe1-2e9080d33a92 Type:ContainerDied Data:42da8624bca20f73fcb43e3dde876f8f92e903a19f25879386594907088a4e64} Jun 20 07:32:18 syd05-worker-1.rdr-ah-412.ibm.com kubenswrapper[3379]: I0620 07:32:18.461366 3379 scope.go:115] "RemoveContainer" containerID="2d792400af8847fd4713983f1437baa371e7480aab e40dce04a43b022f8fe35" Jun 20 07:32:18 syd05-worker-1.rdr-ah-412.ibm.com kubenswrapper[3379]: I0620 07:32:18.461704 3379 scope.go:115] "RemoveContainer" containerID="42da8624bca20f73fcb43e3dde876f8f92e903a19f 5879386594907088a4e64" Jun 20 07:32:18 syd05-worker-1.rdr-ah-412.ibm.com kubenswrapper[3379]: E0620 07:32:18.462177 3379 pod_workers.go:965] "Error syncing pod, skipping" err="failed to \"StartContainer\" for \"netobserv-ebpf-agent\" with CrashLoopBackOff: \"back-off 10s restarting failed container=netobserv-ebpf-agent pod=netobserv-ebpf-agent-db57v_e2e-test-netobserv-6sstp-privileged(6b9b7c9e- 112-44cf-abe1-2e9080d33a92)\"" pod="e2e-test-netobserv-6sstp-privileged/netobserv-ebpf-agent-db57v" podUID=6b9b7c9e-9112-44cf-abe1-2e9080d33a92 Jun 20 07:32:18 syd05-worker-1.rdr-ah-412.ibm.com crio[3334]: time="2023-06-20 07:32:18.462196598Z" level=info msg="Removing container: 2d792400af8847fd4713983f1437baa371e7480aab6e40dce04a 3b022f8fe35" id=65855c80-841e-4620-bb18-be964742f527 name=/runtime.v1.RuntimeService/RemoveContainer Jun 20 07:32:18 syd05-worker-1.rdr-ah-412.ibm.com systemd[1]: var-lib-containers-storage-overlay-fa00202cf1c2857db293d47a5e18e1bcb930951a36b23ffcaac7082cc79f2855-merged.mount: Succeeded. Jun 20 07:32:18 syd05-worker-1.rdr-ah-412.ibm.com crio[3334]: time="2023-06-20 07:32:18.584771532Z" level=info msg="Removed container 2d792400af8847fd4713983f1437baa371e7480aab6e40dce04a43 022f8fe35: e2e-test-netobserv-6sstp-privileged/netobserv-ebpf-agent-db57v/netobserv-ebpf-agent" id=65855c80-841e-4620-bb18-be964742f527 name=/runtime.v1.RuntimeService/RemoveContainer Jun 20 07:32:18 syd05-worker-1.rdr-ah-412.ibm.com systemd[1]: run-runc-bd86d7da00c784c9a97bbce9d9c6101d1ee53c597bffd136cefb5a488c749043-runc.QDWFsA.mount: Succeeded. Jun 20 07:32:18 syd05-worker-1.rdr-ah-412.ibm.com systemd[1]: run-runc-bd86d7da00c784c9a97bbce9d9c6101d1ee53c597bffd136cefb5a488c749043-runc.I3cveb.mount: Succeeded. Jun 20 07:32:19 syd05-worker-1.rdr-ah-412.ibm.com kubenswrapper[3379]: I0620 07:32:19.465158 3379 scope.go:115] "RemoveContainer" containerID="42da8624bca20f73fcb43e3dde876f8f92e903a19f 5879386594907088a4e64" Jun 20 07:32:19 syd05-worker-1.rdr-ah-412.ibm.com kubenswrapper[3379]: E0620 07:32:19.465484 3379 pod_workers.go:965] "Error syncing pod, skipping" err="failed to \"StartContainer\" for \"netobserv-ebpf-agent\" with CrashLoopBackOff: \"back-off 10s restarting failed container=netobserv-ebpf-agent pod=netobserv-ebpf-agent-db57v_e2e-test-netobserv-6sstp-privileged(6b9b7c9e- 112-44cf-abe1-2e9080d33a92)\"" pod="e2e-test-netobserv-6sstp-privileged/netobserv-ebpf-agent-db57v" podUID=6b9b7c9e-9112-44cf-abe1-2e9080d33a92