-
Bug
-
Resolution: Obsolete
-
Critical
-
None
-
4.7
-
Quality / Stability / Reliability
-
False
-
-
None
-
None
-
None
-
None
-
None
-
Rejected
-
None
-
Customer Escalated
-
-
-
None
-
None
-
None
-
None
-
None
-
None
-
None
Description of problem:
When a node stops getting communication from crio, the endpoint for all it's services still shows as available
Version-Release number of selected component (if applicable):
How reproducible:
Easily
Steps to Reproduce:
1.Stop crio on the node and wait for it to go to the unknown state
[root@openshift-jumpserver-0 ~]# ssh core@openshift-worker-3 'sudo systemctl stop crio'
2. Check the container runtime posting unknown
[root@openshift-jumpserver-0 ~]# oc get nodes openshift-worker-3 -o wide NAME STATUS ROLES AGE VERSION INTERNAL-IP EXTERNAL-IP OS-IMAGE KERNEL-VERSION CONTAINER-RUNTIME openshift-worker-3 NotReady worker,workerperf40core 2d18h v1.20.10+bbbc079 192.168.123.223 <none> Red Hat Enterprise Linux CoreOS 47.84.202111031903-0 (Ootpa) 4.18.0-305.25.1.el8_4.x86_64 cri-o://Unknown
3. Kill the node
[root@openshift-jumpserver-0 ~]# ssh core@openshift-worker-3 [core@openshift-worker-3 ~]$ sudo su -
[root@openshift-worker-3 ~]# :(){ :|:& };:
[1] 81471
[root@openshift-jumpserver-0 ~]# ping -c 3 openshift-worker-3
PING openshift-worker-3.example.com (192.168.123.223) 56(84) bytes of data.
From openshift-jumpserver-0 (192.168.123.1) icmp_seq=1 Destination Host Unreachable
From openshift-jumpserver-0 (192.168.123.1) icmp_seq=2 Destination Host Unreachable
From openshift-jumpserver-0 (192.168.123.1) icmp_seq=3 Destination Host Unreachable--- openshift-worker-3.example.com ping statistics ---
3 packets transmitted, 0 received, +3 errors, 100% packet loss, time 2007ms
4.Look at the endpoint for worker-3 pod from the openshift-dns namespace showing as ready
[root@openshift-jumpserver-0 ~]# oc get endpoints -n openshift-dns -o yaml | yq -e '.items | .[].subsets.[]'
addresses:
- ip: 172.24.0.60
nodeName: openshift-master-1
targetRef:
kind: Pod
name: dns-default-ms446
namespace: openshift-dns
resourceVersion: "520469"
uid: ecd0545b-5c3c-4db2-a72f-a94fa886878c
- ip: 172.25.0.49
nodeName: openshift-master-2
targetRef:
kind: Pod
name: dns-default-97bch
namespace: openshift-dns
resourceVersion: "632694"
uid: 44586966-b900-47a1-aede-fb80633805bb
- ip: 172.25.4.8
nodeName: openshift-worker-2
targetRef:
kind: Pod
name: dns-default-bdt59
namespace: openshift-dns
resourceVersion: "2638165"
uid: ad7f6ac0-b87b-45a8-9465-a27de367e4ab
- ip: 172.26.0.20
nodeName: openshift-master-0
targetRef:
kind: Pod
name: dns-default-7j48x
namespace: openshift-dns
resourceVersion: "540306"
uid: 5b86231d-2ffd-4fd0-b3d1-44bb051eb6cf
- ip: 172.27.0.7
nodeName: openshift-worker-3
targetRef:
kind: Pod
name: dns-default-6z6z4
namespace: openshift-dns
resourceVersion: "2655019"
uid: 5eae9e06-02c9-40f9-be3b-e209e1ce807e
ports:
- name: dns
port: 5353
protocol: UDP
- name: metrics
port: 9154
protocol: TCP
- name: dns-tcp
port: 5353
protocol: TCP
Actual results:
Endpoint for dns pod on worker-3 showing as ready
Expected results:
Endpoint for dns pod on worker-3 to go into the notReadyAddresses
Additional info:
- is cloned by
-
OCPBUGS-17829 openshift-dns endpoint shows as ready when node stop communicating
-
- Closed
-