[root@cert-rhosp-02 ~]# oc project openshift-workload-availability Now using project "openshift-workload-availability" on server "https://api.ocp-edge-cluster-0.qe.lab.redhat.com:6443". [root@cert-rhosp-02 ~]# oc get clusterversion NAME VERSION AVAILABLE PROGRESSING SINCE STATUS version 4.20.0-0.nightly-2025-11-14-130157 True False 29h Cluster version is 4.20.0-0.nightly-2025-11-14-130157 [root@cert-rhosp-02 ~]# oc get csv NAME DISPLAY VERSION REPLACES PHASE machine-deletion-remediation.v0.5.0 Machine Deletion Remediation operator 0.5.0 machine-deletion-remediation.v0.4.1 Succeeded node-healthcheck-operator.v0.10.1 Node Health Check Operator 0.10.1 node-healthcheck-operator.v0.10.0 Succeeded self-node-remediation.v0.11.0 Self Node Remediation Operator 0.11.0 self-node-remediation.v0.10.1 Succeeded [root@cert-rhosp-02 ~]# PODS=$(oc get pods -o name -n openshift-workload-availability | grep node-healthcheck-controller-manager) [root@cert-rhosp-02 ~]# echo $PODS pod/node-healthcheck-controller-manager-79965596bb-7pdwc pod/node-healthcheck-controller-manager-79965596bb-bbnk6 [root@cert-rhosp-02 ~]# for p in $PODS; do > echo "== $p" > oc get "$p" -n openshift-workload-availability -o json | jq .spec.nodeName > done == pod/node-healthcheck-controller-manager-79965596bb-7pdwc "master-0-2" == pod/node-healthcheck-controller-manager-79965596bb-bbnk6 "master-0-1" [root@cert-rhosp-02 ~]# PODS=$(oc get pods -o name -n openshift-workload-availability | grep self-node-remediation-controller-manager) [root@cert-rhosp-02 ~]# echo $PODS pod/self-node-remediation-controller-manager-77c688fb7b-h7hjd pod/self-node-remediation-controller-manager-77c688fb7b-s8rvc [root@cert-rhosp-02 ~]# for p in $PODS; do > echo "== $p" > oc get "$p" -n openshift-workload-availability -o json | jq .spec.nodeName > done == pod/self-node-remediation-controller-manager-77c688fb7b-h7hjd "worker-0-2" == pod/self-node-remediation-controller-manager-77c688fb7b-s8rvc "worker-0-1" [root@cert-rhosp-02 ~]# PODS=$(oc get pods -o name -n openshift-workload-availability | grep machine-deletion-remediation-controller) [root@cert-rhosp-02 ~]# echo $PODS pod/machine-deletion-remediation-controller-manager-b468d785d-6kql6 [root@cert-rhosp-02 ~]# for p in $PODS; do > echo "== $p" > oc get "$p" -n openshift-workload-availability -o json | jq .spec.nodeName > done == pod/machine-deletion-remediation-controller-manager-b468d785d-6kql6 "worker-0-1" [root@cert-rhosp-02 ~]# vi test.yaml [root@cert-rhosp-02 ~]# cat test.yaml --- kind: MachineDeletionRemediationTemplate apiVersion: machine-deletion-remediation.medik8s.io/v1alpha1 metadata: name: mdr-template namespace: openshift-workload-availability spec: template: spec: {} --- apiVersion: self-node-remediation.medik8s.io/v1alpha1 kind: SelfNodeRemediationTemplate metadata: namespace: openshift-workload-availability name: selfnoderemediationtemplate-sample spec: template: spec: remediationStrategy: Automatic --- apiVersion: remediation.medik8s.io/v1alpha1 kind: NodeHealthCheck metadata: name: nhc-mdr-snr spec: minHealthy: 30% healthyDelay: 5s escalatingRemediations: - remediationTemplate: apiVersion: machine-deletion-remediation.medik8s.io/v1alpha1 kind: MachineDeletionRemediationTemplate name: mdr-template namespace: openshift-workload-availability order: 0 timeout: 60s - remediationTemplate: apiVersion: self-node-remediation.medik8s.io/v1alpha1 kind: SelfNodeRemediationTemplate name: selfnoderemediationtemplate-sample namespace: openshift-workload-availability order: -1 timeout: 500s selector: matchExpressions: - key: node-role.kubernetes.io/worker operator: Exists unhealthyConditions: - type: Ready status: "False" duration: 30s - type: Ready status: Unknown duration: 30s [root@cert-rhosp-02 ~]# oc apply -f test.yaml machinedeletionremediationtemplate.machine-deletion-remediation.medik8s.io/mdr-template unchanged selfnoderemediationtemplate.self-node-remediation.medik8s.io/selfnoderemediationtemplate-sample unchanged nodehealthcheck.remediation.medik8s.io/nhc-mdr-snr configured [root@cert-rhosp-02 ~]# oc get snr No resources found in openshift-workload-availability namespace. [root@cert-rhosp-02 ~]# oc get mdr No resources found in openshift-workload-availability namespace. [root@cert-rhosp-02 ~]# oc get nodes -l 'node-role.kubernetes.io/worker' NAME STATUS ROLES AGE VERSION worker-0-0 Ready worker 43h v1.33.5 worker-0-1 Ready worker 43h v1.33.5 worker-0-2 Ready worker 43h v1.33.5 [root@cert-rhosp-02 ~]# oc get nodes -l 'node-role.kubernetes.io/worker' NAME STATUS ROLES AGE VERSION worker-0-0 NotReady worker 43h v1.33.5 worker-0-1 Ready worker 43h v1.33.5 worker-0-2 Ready worker 43h v1.33.5 [root@cert-rhosp-02 ~]# cat test.yaml --- kind: MachineDeletionRemediationTemplate apiVersion: machine-deletion-remediation.medik8s.io/v1alpha1 metadata: name: mdr-template namespace: openshift-workload-availability spec: template: spec: {} --- apiVersion: self-node-remediation.medik8s.io/v1alpha1 kind: SelfNodeRemediationTemplate metadata: namespace: openshift-workload-availability name: selfnoderemediationtemplate-sample spec: template: spec: remediationStrategy: Automatic --- apiVersion: remediation.medik8s.io/v1alpha1 kind: NodeHealthCheck metadata: name: nhc-mdr-snr spec: minHealthy: 30% healthyDelay: 5s escalatingRemediations: - remediationTemplate: apiVersion: machine-deletion-remediation.medik8s.io/v1alpha1 kind: MachineDeletionRemediationTemplate name: mdr-template namespace: openshift-workload-availability order: -1 timeout: 60s - remediationTemplate: apiVersion: self-node-remediation.medik8s.io/v1alpha1 kind: SelfNodeRemediationTemplate name: selfnoderemediationtemplate-sample namespace: openshift-workload-availability order: 0 timeout: 500s selector: matchExpressions: - key: node-role.kubernetes.io/worker operator: Exists unhealthyConditions: - type: Ready status: "False" duration: 30s - type: Ready status: Unknown duration: 30s [root@cert-rhosp-02 ~]# oc apply -f test.yaml machinedeletionremediationtemplate.machine-deletion-remediation.medik8s.io/mdr-template unchanged selfnoderemediationtemplate.self-node-remediation.medik8s.io/selfnoderemediationtemplate-sample unchanged nodehealthcheck.remediation.medik8s.io/nhc-mdr-snr configured [root@cert-rhosp-02 ~]# oc get nhc -o json | jq .items[0].status { "conditions": [ { "lastTransitionTime": "2025-11-16T06:25:20Z", "message": "No issues found, NodeHealthCheck is enabled.", "reason": "NodeHealthCheckEnabled", "status": "False", "type": "Disabled" } ], "healthyNodes": 2, "lastUpdateTime": "2025-11-16T19:55:42Z", "observedNodes": 3, "phase": "Enabled", "reason": "NHC is enabled, no ongoing remediation" } [root@cert-rhosp-02 ~]# oc get nodes -l 'node-role.kubernetes.io/worker' NAME STATUS ROLES AGE VERSION worker-0-0 NotReady worker 43h v1.33.5 worker-0-1 Ready worker 43h v1.33.5 worker-0-2 Ready worker 43h v1.33.5 [root@cert-rhosp-02 ~]# oc get nhc -o json | jq .items[0].status { "conditions": [ { "lastTransitionTime": "2025-11-16T06:25:20Z", "message": "No issues found, NodeHealthCheck is enabled.", "reason": "NodeHealthCheckEnabled", "status": "False", "type": "Disabled" } ], "healthyNodes": 2, "lastUpdateTime": "2025-11-16T19:56:12Z", "observedNodes": 3, "phase": "Remediating", "reason": "NHC is remediating 1 nodes", "unhealthyNodes": [ { "name": "worker-0-0", "remediations": [ { "resource": { "apiVersion": "machine-deletion-remediation.medik8s.io/v1alpha1", "kind": "MachineDeletionRemediation", "name": "worker-0-0", "namespace": "openshift-workload-availability", "uid": "eddd81b0-3ac3-4414-ad83-cd7a57fde0ee" }, "started": "2025-11-16T19:56:12Z", "templateName": "mdr-template" } ] } ] } [root@cert-rhosp-02 ~]# oc get mdr -o yaml apiVersion: v1 items: - apiVersion: machine-deletion-remediation.medik8s.io/v1alpha1 kind: MachineDeletionRemediation metadata: annotations: remediation.medik8s.io/node-name: worker-0-0 remediation.medik8s.io/template-name: mdr-template creationTimestamp: "2025-11-16T19:56:12Z" generation: 1 labels: app.kubernetes.io/part-of: node-healthcheck-controller name: worker-0-0 namespace: openshift-workload-availability ownerReferences: - apiVersion: remediation.medik8s.io/v1alpha1 controller: false kind: NodeHealthCheck name: nhc-mdr-snr uid: 5cc37621-86e0-4a9a-8751-641e5567058f resourceVersion: "761431" uid: eddd81b0-3ac3-4414-ad83-cd7a57fde0ee spec: {} status: conditions: - lastTransitionTime: "2025-11-16T19:56:12Z" message: "" reason: RemediationStarted status: "True" type: Processing - lastTransitionTime: "2025-11-16T19:56:12Z" message: "" reason: RemediationStarted status: Unknown type: Succeeded - lastTransitionTime: "2025-11-16T19:56:12Z" message: 'Machine will be deleted and the unhealthy node replaced. This is a BareMetal cluster provider: the new node is NOT expected to have a new name' reason: MachineDeletionOnBareMetalProviderKeepsNodeName status: "False" type: PermanentNodeDeletionExpected kind: List metadata: resourceVersion: "" [root@cert-rhosp-02 ~]# oc get nhc -o json | jq .items[0].status { "conditions": [ { "lastTransitionTime": "2025-11-16T06:25:20Z", "message": "No issues found, NodeHealthCheck is enabled.", "reason": "NodeHealthCheckEnabled", "status": "False", "type": "Disabled" } ], "healthyNodes": 2, "lastUpdateTime": "2025-11-16T19:57:12Z", "observedNodes": 3, "phase": "Remediating", "reason": "NHC is remediating 1 nodes", "unhealthyNodes": [ { "name": "worker-0-0", "remediations": [ { "resource": { "apiVersion": "machine-deletion-remediation.medik8s.io/v1alpha1", "kind": "MachineDeletionRemediation", "name": "worker-0-0", "namespace": "openshift-workload-availability", "uid": "eddd81b0-3ac3-4414-ad83-cd7a57fde0ee" }, "started": "2025-11-16T19:56:12Z", "templateName": "mdr-template", "timedOut": "2025-11-16T19:57:12Z" }, { "resource": { "apiVersion": "self-node-remediation.medik8s.io/v1alpha1", "kind": "SelfNodeRemediation", "name": "worker-0-0-7vjqc", "namespace": "openshift-workload-availability", "uid": "f6e1f328-d33d-495c-8a88-621db55bc7e9" }, "started": "2025-11-16T19:57:12Z", "templateName": "selfnoderemediationtemplate-sample" } ] } ] } [root@cert-rhosp-02 ~]# oc get snr -o yaml apiVersion: v1 items: - apiVersion: self-node-remediation.medik8s.io/v1alpha1 kind: SelfNodeRemediation metadata: annotations: remediation.medik8s.io/node-name: worker-0-0 remediation.medik8s.io/template-name: selfnoderemediationtemplate-sample creationTimestamp: "2025-11-16T19:57:12Z" finalizers: - self-node-remediation.medik8s.io/snr-finalizer generateName: worker-0-0- generation: 1 labels: app.kubernetes.io/part-of: node-healthcheck-controller name: worker-0-0-7vjqc namespace: openshift-workload-availability ownerReferences: - apiVersion: remediation.medik8s.io/v1alpha1 controller: false kind: NodeHealthCheck name: nhc-mdr-snr uid: 5cc37621-86e0-4a9a-8751-641e5567058f resourceVersion: "761815" uid: f6e1f328-d33d-495c-8a88-621db55bc7e9 spec: remediationStrategy: Automatic status: conditions: - lastTransitionTime: "2025-11-16T19:57:12Z" message: "" reason: RemediationStarted status: "True" type: Processing - lastTransitionTime: "2025-11-16T19:57:12Z" message: "" reason: RemediationStarted status: Unknown type: Succeeded phase: Pre-Reboot-Completed timeAssumedRebooted: "2025-11-16T19:59:13Z" kind: List metadata: resourceVersion: "" [root@cert-rhosp-02 ~]# oc get nodes -l 'node-role.kubernetes.io/worker' NAME STATUS ROLES AGE VERSION worker-0-0 NotReady,SchedulingDisabled worker 43h v1.33.5 worker-0-1 Ready worker 43h v1.33.5 worker-0-2 Ready worker 43h v1.33.5 [root@cert-rhosp-02 ~]# oc get snr NAME AGE worker-0-0-7vjqc 10m [root@cert-rhosp-02 ~]# oc get mdr NAME AGE worker-0-0 11m [root@cert-rhosp-02 ~]# oc get mdr NAME AGE worker-0-0 23m [root@cert-rhosp-02 ~]# oc get snr NAME AGE worker-0-0-7vjqc 22m [root@cert-rhosp-02 ~]# oc get mdr -o yaml apiVersion: v1 items: - apiVersion: machine-deletion-remediation.medik8s.io/v1alpha1 kind: MachineDeletionRemediation metadata: annotations: remediation.medik8s.io/nhc-timed-out: "2025-11-16T19:57:12Z" remediation.medik8s.io/node-name: worker-0-0 remediation.medik8s.io/template-name: mdr-template creationTimestamp: "2025-11-16T19:56:12Z" generation: 1 labels: app.kubernetes.io/part-of: node-healthcheck-controller name: worker-0-0 namespace: openshift-workload-availability ownerReferences: - apiVersion: remediation.medik8s.io/v1alpha1 controller: false kind: NodeHealthCheck name: nhc-mdr-snr uid: 5cc37621-86e0-4a9a-8751-641e5567058f resourceVersion: "761773" uid: eddd81b0-3ac3-4414-ad83-cd7a57fde0ee spec: {} status: conditions: - lastTransitionTime: "2025-11-16T19:57:12Z" message: "" reason: RemediationStoppedByNHC status: "False" type: Processing - lastTransitionTime: "2025-11-16T19:57:12Z" message: "" reason: RemediationStoppedByNHC status: "False" type: Succeeded - lastTransitionTime: "2025-11-16T19:56:12Z" message: 'Machine will be deleted and the unhealthy node replaced. This is a BareMetal cluster provider: the new node is NOT expected to have a new name' reason: MachineDeletionOnBareMetalProviderKeepsNodeName status: "False" type: PermanentNodeDeletionExpected kind: List metadata: resourceVersion: "" [root@cert-rhosp-02 ~]# oc get snr -o yaml apiVersion: v1 items: - apiVersion: self-node-remediation.medik8s.io/v1alpha1 kind: SelfNodeRemediation metadata: annotations: remediation.medik8s.io/node-name: worker-0-0 remediation.medik8s.io/template-name: selfnoderemediationtemplate-sample creationTimestamp: "2025-11-16T19:57:12Z" finalizers: - self-node-remediation.medik8s.io/snr-finalizer generateName: worker-0-0- generation: 1 labels: app.kubernetes.io/part-of: node-healthcheck-controller name: worker-0-0-7vjqc namespace: openshift-workload-availability ownerReferences: - apiVersion: remediation.medik8s.io/v1alpha1 controller: false kind: NodeHealthCheck name: nhc-mdr-snr uid: 5cc37621-86e0-4a9a-8751-641e5567058f resourceVersion: "762515" uid: f6e1f328-d33d-495c-8a88-621db55bc7e9 spec: remediationStrategy: Automatic status: conditions: - lastTransitionTime: "2025-11-16T19:59:24Z" message: "" reason: RemediationFinishedSuccessfully status: "False" type: Processing - lastTransitionTime: "2025-11-16T19:59:24Z" message: "" reason: RemediationFinishedSuccessfully status: "True" type: Succeeded phase: Fencing-Completed timeAssumedRebooted: "2025-11-16T19:59:13Z" kind: List metadata: resourceVersion: "" [root@cert-rhosp-02 ~]# oc get nhc -o yaml apiVersion: v1 items: - apiVersion: remediation.medik8s.io/v1alpha1 kind: NodeHealthCheck metadata: annotations: kubectl.kubernetes.io/last-applied-configuration: | {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","metadata":{"annotations":{},"name":"nhc-mdr-snr"},"spec":{"escalatingRemediations":[{"order":-1,"remediationTemplate":{"apiVersion":"machine-deletion-remediation.medik8s.io/v1alpha1","kind":"MachineDeletionRemediationTemplate","name":"mdr-template","namespace":"openshift-workload-availability"},"timeout":"60s"},{"order":0,"remediationTemplate":{"apiVersion":"self-node-remediation.medik8s.io/v1alpha1","kind":"SelfNodeRemediationTemplate","name":"selfnoderemediationtemplate-sample","namespace":"openshift-workload-availability"},"timeout":"500s"}],"healthyDelay":"5s","minHealthy":"30%","selector":{"matchExpressions":[{"key":"node-role.kubernetes.io/worker","operator":"Exists"}]},"unhealthyConditions":[{"duration":"30s","status":"False","type":"Ready"},{"duration":"30s","status":"Unknown","type":"Ready"}]}} creationTimestamp: "2025-11-16T06:25:20Z" generation: 3 name: nhc-mdr-snr resourceVersion: "764528" uid: 5cc37621-86e0-4a9a-8751-641e5567058f spec: escalatingRemediations: - order: -1 remediationTemplate: apiVersion: machine-deletion-remediation.medik8s.io/v1alpha1 kind: MachineDeletionRemediationTemplate name: mdr-template namespace: openshift-workload-availability timeout: 60s - order: 0 remediationTemplate: apiVersion: self-node-remediation.medik8s.io/v1alpha1 kind: SelfNodeRemediationTemplate name: selfnoderemediationtemplate-sample namespace: openshift-workload-availability timeout: 500s healthyDelay: 5s minHealthy: 30% selector: matchExpressions: - key: node-role.kubernetes.io/worker operator: Exists unhealthyConditions: - duration: 30s status: "False" type: Ready - duration: 30s status: Unknown type: Ready status: conditions: - lastTransitionTime: "2025-11-16T06:25:20Z" message: No issues found, NodeHealthCheck is enabled. reason: NodeHealthCheckEnabled status: "False" type: Disabled healthyNodes: 2 lastUpdateTime: "2025-11-16T20:05:32Z" observedNodes: 3 phase: Remediating reason: NHC is remediating 1 nodes unhealthyNodes: - name: worker-0-0 remediations: - resource: apiVersion: machine-deletion-remediation.medik8s.io/v1alpha1 kind: MachineDeletionRemediation name: worker-0-0 namespace: openshift-workload-availability uid: eddd81b0-3ac3-4414-ad83-cd7a57fde0ee started: "2025-11-16T19:56:12Z" templateName: mdr-template timedOut: "2025-11-16T19:57:12Z" - resource: apiVersion: self-node-remediation.medik8s.io/v1alpha1 kind: SelfNodeRemediation name: worker-0-0-7vjqc namespace: openshift-workload-availability uid: f6e1f328-d33d-495c-8a88-621db55bc7e9 started: "2025-11-16T19:57:12Z" templateName: selfnoderemediationtemplate-sample timedOut: "2025-11-16T20:05:32Z" kind: List metadata: resourceVersion: "" [root@cert-rhosp-02 ~]# oc delete snr error: resource(s) were provided, but no name was specified [root@cert-rhosp-02 ~]# oc delete snr worker-0-0-7vjqc selfnoderemediation.self-node-remediation.medik8s.io "worker-0-0-7vjqc" deleted [root@cert-rhosp-02 ~]# oc delete mdr worker-0-0 machinedeletionremediation.machine-deletion-remediation.medik8s.io "worker-0-0" deleted [root@cert-rhosp-02 ~]# oc delete nhc nhc-mdr-snr Error from server (Forbidden): admission webhook "vnodehealthcheck.kb.io" denied the request: deletion prohibited due to running remediation [root@cert-rhosp-02 ~]# MDR logs: [root@cert-rhosp-02 ~]# oc logs pod/machine-deletion-remediation-controller-manager-b468d785d-6kql6 -c manager 2025-11-16T19:46:21.378258233Z INFO setup Go Version: go1.24.4 (Red Hat 1.24.4-2.el9) X:strictfipsruntime 2025-11-16T19:46:21.378359977Z INFO setup Go OS/Arch: linux/amd64 2025-11-16T19:46:21.37836307Z INFO setup Operator Version: 2c7f718 2025-11-16T19:46:21.378365111Z INFO setup Git Commit: 2c7f718ad98018b682353cefdf55662191b8fe8d 2025-11-16T19:46:21.378366924Z INFO setup Build Date: 2025-11-11T13:09:46+00:00 2025-11-16T19:46:21.473267688Z INFO setup starting manager 2025-11-16T19:46:21.473461794Z INFO controller-runtime.metrics Starting metrics server 2025-11-16T19:46:21.4735023Z INFO starting server {"name": "health probe", "addr": "[::]:8081"} 2025-11-16T19:46:21.473542669Z INFO controller-runtime.metrics Serving metrics server {"bindAddress": "127.0.0.1:8080", "secure": false} I1116 19:46:21.473602 1 leaderelection.go:257] attempting to acquire leader lease openshift-workload-availability/285d4098.example.com... I1116 19:46:38.613182 1 leaderelection.go:271] successfully acquired lease openshift-workload-availability/285d4098.example.com 2025-11-16T19:46:38.613252955Z DEBUG events machine-deletion-remediation-controller-manager-b468d785d-6kql6_56410e6a-40ea-4c32-a8f8-678d83684b3f became leader {"type": "Normal", "object": {"kind":"Lease","namespace":"openshift-workload-availability","name":"285d4098.example.com","uid":"870b4024-45e4-48e0-b964-5b6156cbb17f","apiVersion":"coordination.k8s.io/v1","resourceVersion":"756449"}, "reason": "LeaderElection"} 2025-11-16T19:46:38.613483781Z INFO Starting EventSource {"controller": "machinedeletionremediation", "controllerGroup": "machine-deletion-remediation.medik8s.io", "controllerKind": "MachineDeletionRemediation", "source": "kind source: *v1alpha1.MachineDeletionRemediation"} 2025-11-16T19:46:38.613519726Z INFO Starting Controller {"controller": "machinedeletionremediation", "controllerGroup": "machine-deletion-remediation.medik8s.io", "controllerKind": "MachineDeletionRemediation"} 2025-11-16T19:46:38.715485582Z INFO Starting workers {"controller": "machinedeletionremediation", "controllerGroup": "machine-deletion-remediation.medik8s.io", "controllerKind": "MachineDeletionRemediation", "worker count": 1} 2025-11-16T19:56:12.547414065Z INFO controllers.MachineDeletionRemediation reconciling... {"machinedeletionremediation": {"name":"worker-0-0","namespace":"openshift-workload-availability"}} 2025-11-16T19:56:12.547499088Z INFO controllers.MachineDeletionRemediation Machine Deletion Remediation CR found {"machinedeletionremediation": {"name":"worker-0-0","namespace":"openshift-workload-availability"}, "name": "worker-0-0"} 2025-11-16T19:56:12.547506812Z INFO controllers.MachineDeletionRemediation updating Status Condition {"processingConditionStatus": "True", "succededConditionStatus": "Unknown", "reason": "RemediationStarted"} 2025-11-16T19:56:12.551429238Z INFO controllers.MachineDeletionRemediation reconciling... {"machinedeletionremediation": {"name":"worker-0-0","namespace":"openshift-workload-availability"}} 2025-11-16T19:56:12.55147032Z INFO controllers.MachineDeletionRemediation Machine Deletion Remediation CR found {"machinedeletionremediation": {"name":"worker-0-0","namespace":"openshift-workload-availability"}, "name": "worker-0-0"} 2025-11-16T19:56:12.653724738Z INFO controllers.MachineDeletionRemediation Looking for the target Machine {"machine": "ocp-edge-cluster-0-6bcbf-worker-0-6qhcr", "namespace": "openshift-machine-api"} 2025-11-16T19:56:12.755689563Z INFO controllers.MachineDeletionRemediation target machine found {"machinedeletionremediation": {"name":"worker-0-0","namespace":"openshift-workload-availability"}, "machine": "ocp-edge-cluster-0-6bcbf-worker-0-6qhcr"} 2025-11-16T19:56:12.755821759Z INFO controllers.MachineDeletionRemediation updating Status Condition {"PermanentNodeDeletionExpected": "False", "reason": "MachineDeletionOnBareMetalProviderKeepsNodeName", "message": "Machine will be deleted and the unhealthy node replaced. This is a BareMetal cluster provider: the new node is NOT expected to have a new name"} 2025-11-16T19:56:12.755877642Z INFO controllers.MachineDeletionRemediation Machine will be deleted and the unhealthy node replaced. This is a BareMetal cluster provider: the new node is NOT expected to have a new name {"machinedeletionremediation": {"name":"worker-0-0","namespace":"openshift-workload-availability"}} 2025-11-16T19:56:12.756144718Z DEBUG events [remediation] Machine will be deleted and the unhealthy node replaced. This is a BareMetal cluster provider: the new node is NOT expected to have a new name {"type": "Normal", "object": {"kind":"MachineDeletionRemediation","namespace":"openshift-workload-availability","name":"worker-0-0","uid":"eddd81b0-3ac3-4414-ad83-cd7a57fde0ee","apiVersion":"machine-deletion-remediation.medik8s.io/v1alpha1","resourceVersion":"761428"}, "reason": "PermanentNodeDeletionExpected"} 2025-11-16T19:56:12.762230459Z INFO controllers.MachineDeletionRemediation reconciling... {"machinedeletionremediation": {"name":"worker-0-0","namespace":"openshift-workload-availability"}} 2025-11-16T19:56:12.7622538Z INFO controllers.MachineDeletionRemediation Machine Deletion Remediation CR found {"machinedeletionremediation": {"name":"worker-0-0","namespace":"openshift-workload-availability"}, "name": "worker-0-0"} 2025-11-16T19:56:12.762290961Z INFO controllers.MachineDeletionRemediation Looking for the target Machine {"machine": "ocp-edge-cluster-0-6bcbf-worker-0-6qhcr", "namespace": "openshift-machine-api"} 2025-11-16T19:56:12.76231108Z INFO controllers.MachineDeletionRemediation target machine found {"machinedeletionremediation": {"name":"worker-0-0","namespace":"openshift-workload-availability"}, "machine": "ocp-edge-cluster-0-6bcbf-worker-0-6qhcr"} 2025-11-16T19:56:12.762314197Z INFO controllers.MachineDeletionRemediation target machine was not deleted yet {"machinedeletionremediation": {"name":"worker-0-0","namespace":"openshift-workload-availability"}, "machine": "ocp-edge-cluster-0-6bcbf-worker-0-6qhcr", "machine status.phase": "Deleting"} 2025-11-16T19:56:13.552383038Z INFO controllers.MachineDeletionRemediation reconciling... {"machinedeletionremediation": {"name":"worker-0-0","namespace":"openshift-workload-availability"}} 2025-11-16T19:56:13.552453689Z INFO controllers.MachineDeletionRemediation Machine Deletion Remediation CR found {"machinedeletionremediation": {"name":"worker-0-0","namespace":"openshift-workload-availability"}, "name": "worker-0-0"} 2025-11-16T19:56:13.552501111Z INFO controllers.MachineDeletionRemediation Looking for the target Machine {"machine": "ocp-edge-cluster-0-6bcbf-worker-0-6qhcr", "namespace": "openshift-machine-api"} 2025-11-16T19:56:13.552526265Z INFO controllers.MachineDeletionRemediation target machine found {"machinedeletionremediation": {"name":"worker-0-0","namespace":"openshift-workload-availability"}, "machine": "ocp-edge-cluster-0-6bcbf-worker-0-6qhcr"} 2025-11-16T19:56:13.552530933Z INFO controllers.MachineDeletionRemediation target machine was not deleted yet {"machinedeletionremediation": {"name":"worker-0-0","namespace":"openshift-workload-availability"}, "machine": "ocp-edge-cluster-0-6bcbf-worker-0-6qhcr", "machine status.phase": "Deleting"} 2025-11-16T19:56:43.559946045Z INFO controllers.MachineDeletionRemediation reconciling... {"machinedeletionremediation": {"name":"worker-0-0","namespace":"openshift-workload-availability"}} 2025-11-16T19:56:43.560007949Z INFO controllers.MachineDeletionRemediation Machine Deletion Remediation CR found {"machinedeletionremediation": {"name":"worker-0-0","namespace":"openshift-workload-availability"}, "name": "worker-0-0"} 2025-11-16T19:56:43.560042318Z INFO controllers.MachineDeletionRemediation Looking for the target Machine {"machine": "ocp-edge-cluster-0-6bcbf-worker-0-6qhcr", "namespace": "openshift-machine-api"} 2025-11-16T19:56:43.560057978Z INFO controllers.MachineDeletionRemediation target machine found {"machinedeletionremediation": {"name":"worker-0-0","namespace":"openshift-workload-availability"}, "machine": "ocp-edge-cluster-0-6bcbf-worker-0-6qhcr"} 2025-11-16T19:56:43.560060424Z INFO controllers.MachineDeletionRemediation target machine was not deleted yet {"machinedeletionremediation": {"name":"worker-0-0","namespace":"openshift-workload-availability"}, "machine": "ocp-edge-cluster-0-6bcbf-worker-0-6qhcr", "machine status.phase": "Deleting"} 2025-11-16T19:57:12.029446702Z INFO controllers.MachineDeletionRemediation reconciling... {"machinedeletionremediation": {"name":"worker-0-0","namespace":"openshift-workload-availability"}} 2025-11-16T19:57:12.02948046Z INFO controllers.MachineDeletionRemediation Machine Deletion Remediation CR found {"machinedeletionremediation": {"name":"worker-0-0","namespace":"openshift-workload-availability"}, "name": "worker-0-0"} 2025-11-16T19:57:12.029485075Z INFO controllers.MachineDeletionRemediation updating Status Condition {"processingConditionStatus": "False", "succededConditionStatus": "False", "reason": "RemediationStoppedByNHC"} 2025-11-16T19:57:12.029490272Z INFO controllers.MachineDeletionRemediation NHC time out annotation found, stopping remediation {"machinedeletionremediation": {"name":"worker-0-0","namespace":"openshift-workload-availability"}} 2025-11-16T19:57:12.029660203Z DEBUG events [remediation] NHC added the timed-out annotation, remediation will be stopped {"type": "Normal", "object": {"kind":"MachineDeletionRemediation","namespace":"openshift-workload-availability","name":"worker-0-0","uid":"eddd81b0-3ac3-4414-ad83-cd7a57fde0ee","apiVersion":"machine-deletion-remediation.medik8s.io/v1alpha1","resourceVersion":"761771"}, "reason": "RemediationStopped"} 2025-11-16T19:57:12.033674947Z INFO controllers.MachineDeletionRemediation reconciling... {"machinedeletionremediation": {"name":"worker-0-0","namespace":"openshift-workload-availability"}} 2025-11-16T19:57:12.033707707Z INFO controllers.MachineDeletionRemediation Machine Deletion Remediation CR found {"machinedeletionremediation": {"name":"worker-0-0","namespace":"openshift-workload-availability"}, "name": "worker-0-0"} 2025-11-16T19:57:13.565150026Z INFO controllers.MachineDeletionRemediation reconciling... {"machinedeletionremediation": {"name":"worker-0-0","namespace":"openshift-workload-availability"}} 2025-11-16T19:57:13.565196619Z INFO controllers.MachineDeletionRemediation Machine Deletion Remediation CR found {"machinedeletionremediation": {"name":"worker-0-0","namespace":"openshift-workload-availability"}, "name": "worker-0-0"} NHC logs: [root@cert-rhosp-02 ~]# oc logs pod/node-healthcheck-controller-manager-79965596bb-7pdwc 2025-11-16T06:16:33.205984714Z INFO setup Go Version: go1.24.4 (Red Hat 1.24.4-2.el9) X:strictfipsruntime 2025-11-16T06:16:33.206112848Z INFO setup Go OS/Arch: linux/amd64 2025-11-16T06:16:33.20611613Z INFO setup Operator Version: 2ef588d 2025-11-16T06:16:33.206117996Z INFO setup Git Commit: 2ef588d65de4087c46447401cd6e757459d79210 2025-11-16T06:16:33.20611971Z INFO setup Build Date: 2025-11-11T13:40:45+00:00 2025-11-16T06:16:33.206121733Z INFO setup HTTP/2 for metrics and webhook server disabled 2025-11-16T06:16:33.225637864Z INFO setup supported control plane topology {"topology": "HighlyAvailable"} 2025-11-16T06:16:33.225673763Z INFO setup Cluster capabilities {"IsOnOpenshift": true, "HasMachineAPI": true} 2025-11-16T06:16:33.225908907Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "remediation.medik8s.io/v1alpha1, Kind=NodeHealthCheck", "path": "/validate-remediation-medik8s-io-v1alpha1-nodehealthcheck"} 2025-11-16T06:16:33.225985908Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-remediation-medik8s-io-v1alpha1-nodehealthcheck"} 2025-11-16T06:16:33.226033827Z INFO setup starting manager 2025-11-16T06:16:33.226094385Z INFO controller-runtime.metrics Starting metrics server 2025-11-16T06:16:33.226146138Z INFO starting server {"name": "health probe", "addr": "[::]:8081"} 2025-11-16T06:16:33.226164248Z INFO controller-runtime.metrics Serving metrics server {"bindAddress": "127.0.0.1:8080", "secure": false} 2025-11-16T06:16:33.226193503Z INFO controller-runtime.webhook Starting webhook server 2025-11-16T06:16:33.226404986Z INFO controller-runtime.certwatcher Updated current TLS certificate 2025-11-16T06:16:33.226463783Z INFO controller-runtime.webhook Serving webhook server {"host": "", "port": 9443} 2025-11-16T06:16:33.226515675Z INFO controller-runtime.certwatcher Starting certificate poll+watcher {"interval": "10s"} I1116 06:16:33.327404 1 leaderelection.go:257] attempting to acquire leader lease openshift-workload-availability/e1f13584.medik8s.io... I1116 19:46:58.768553 1 leaderelection.go:271] successfully acquired lease openshift-workload-availability/e1f13584.medik8s.io 2025-11-16T19:46:58.768812959Z INFO Starting EventSource {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "source": "kind source: *v1alpha1.NodeHealthCheck"} 2025-11-16T19:46:58.768811341Z INFO Starting EventSource {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "source": "channel source: 0xc0000de150"} 2025-11-16T19:46:58.768820723Z INFO Starting EventSource {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "source": "kind source: *v1.Node"} 2025-11-16T19:46:58.768866672Z INFO Starting EventSource {"controller": "machinehealthcheck", "controllerGroup": "machine.openshift.io", "controllerKind": "MachineHealthCheck", "source": "kind source: *v1beta1.MachineHealthCheck"} 2025-11-16T19:46:58.768863836Z INFO Starting EventSource {"controller": "machinehealthcheck", "controllerGroup": "machine.openshift.io", "controllerKind": "MachineHealthCheck", "source": "channel source: 0xc0000de770"} 2025-11-16T19:46:58.768877287Z INFO Starting EventSource {"controller": "machinehealthcheck", "controllerGroup": "machine.openshift.io", "controllerKind": "MachineHealthCheck", "source": "kind source: *v1.Node"} 2025-11-16T19:46:58.768888888Z INFO Starting EventSource {"controller": "machinehealthcheck", "controllerGroup": "machine.openshift.io", "controllerKind": "MachineHealthCheck", "source": "kind source: *v1beta1.Machine"} 2025-11-16T19:46:58.768734491Z DEBUG events node-healthcheck-controller-manager-79965596bb-7pdwc_e2e84cbb-0a5c-4659-a559-1ce675d6301a became leader {"type": "Normal", "object": {"kind":"Lease","namespace":"openshift-workload-availability","name":"e1f13584.medik8s.io","uid":"ddc0b050-aa35-4403-a90f-4cc1fd4c2d28","apiVersion":"coordination.k8s.io/v1","resourceVersion":"757153"}, "reason": "LeaderElection"} I1116 19:46:58.770135 1 shared_informer.go:313] Waiting for caches to sync for feature gate accessor 2025-11-16T19:46:58.869579149Z INFO MHCChecker found termination handler MHC, will ignore Nodes with Terminating condition 2025-11-16T19:46:58.869618959Z INFO MHCChecker MHC Checker status changed, notifying NHC controller 2025-11-16T19:46:58.869708239Z INFO Starting Controller {"controller": "machinehealthcheck", "controllerGroup": "machine.openshift.io", "controllerKind": "MachineHealthCheck"} 2025-11-16T19:46:58.869728778Z INFO adding all NHCs to reconcile queue for handling MHC event 2025-11-16T19:46:58.869732664Z INFO Starting workers {"controller": "machinehealthcheck", "controllerGroup": "machine.openshift.io", "controllerKind": "MachineHealthCheck", "worker count": 1} I1116 19:46:58.870893 1 shared_informer.go:320] Caches are synced for feature gate accessor I1116 19:46:58.870919 1 simple_featuregate_reader.go:171] Starting feature-gate-detector 2025-11-16T19:46:58.871017656Z INFO FeatureGateAccessor FeatureGates initialized I1116 19:46:58.871158 1 recorder_logging.go:49] &Event{ObjectMeta:{dummy.18789465c14ad09d.57258c66 dummy 0 0001-01-01 00:00:00 +0000 UTC map[] map[] [] [] []},InvolvedObject:ObjectReference{Kind:Pod,Namespace:dummy,Name:dummy,UID:,APIVersion:v1,ResourceVersion:,FieldPath:,},Reason:FeatureGatesInitialized,Message:FeatureGates updated to featuregates.Features{Enabled:[]v1.FeatureGateName{"AdditionalRoutingCapabilities", "AdminNetworkPolicy", "AlibabaPlatform", "AzureWorkloadIdentity", "BuildCSIVolumes", "CPMSMachineNamePrefix", "ConsolePluginContentSecurityPolicy", "ExternalOIDC", "ExternalOIDCWithUIDAndExtraClaimMappings", "GatewayAPI", "GatewayAPIController", "HighlyAvailableArbiter", "ImageVolume", "IngressControllerLBSubnetsAWS", "KMSv1", "MachineConfigNodes", "ManagedBootImages", "ManagedBootImagesAWS", "MetricsCollectionProfiles", "NetworkDiagnosticsConfig", "NetworkLiveMigration", "NetworkSegmentation", "NewOLM", "PinnedImages", "ProcMountType", "RouteAdvertisements", "RouteExternalCertificate", "ServiceAccountTokenNodeBinding", "SetEIPForNLBIngressController", "SigstoreImageVerification", "StoragePerformantSecurityPolicy", "UpgradeStatus", "UserNamespacesPodSecurityStandards", "UserNamespacesSupport", "VSphereMultiDisk", "VSphereMultiNetworks"}, Disabled:[]v1.FeatureGateName{"AWSClusterHostedDNS", "AWSClusterHostedDNSInstall", "AWSDedicatedHosts", "AWSServiceLBNetworkSecurityGroup", "AutomatedEtcdBackup", "AzureClusterHostedDNSInstall", "AzureDedicatedHosts", "AzureMultiDisk", "BootImageSkewEnforcement", "BootcNodeManagement", "ClusterAPIInstall", "ClusterAPIInstallIBMCloud", "ClusterMonitoringConfig", "ClusterVersionOperatorConfiguration", "DNSNameResolver", "DualReplica", "DyanmicServiceEndpointIBMCloud", "DynamicResourceAllocation", "EtcdBackendQuota", "EventedPLEG", "Example", "Example2", "ExternalSnapshotMetadata", "GCPClusterHostedDNS", "GCPClusterHostedDNSInstall", "GCPCustomAPIEndpoints", "GCPCustomAPIEndpointsInstall", "ImageModeStatusReporting", "ImageStreamImportMode", "IngressControllerDynamicConfigurationManager", "InsightsConfig", "InsightsConfigAPI", "InsightsOnDemandDataGather", "IrreconcilableMachineConfig", "KMSEncryptionProvider", "MachineAPIMigration", "MachineAPIOperatorDisableMachineHealthCheckController", "ManagedBootImagesAzure", "ManagedBootImagesvSphere", "MaxUnavailableStatefulSet", "MinimumKubeletVersion", "MixedCPUsAllocation", "MultiArchInstallAzure", "MultiDiskSetup", "MutatingAdmissionPolicy", "NewOLMCatalogdAPIV1Metas", "NewOLMOwnSingleNamespace", "NewOLMPreflightPermissionChecks", "NewOLMWebhookProviderOpenshiftServiceCA", "NoRegistryClusterOperations", "NodeSwap", "NutanixMultiSubnets", "OVNObservability", "OpenShiftPodSecurityAdmission", "PreconfiguredUDNAddresses", "SELinuxMount", "ShortCertRotation", "SignatureStores", "SigstoreImageVerificationPKI", "TranslateStreamCloseWebsocketRequests", "VSphereConfigurableMaxAllowedBlockVolumesPerNode", "VSphereHostVMGroupZonal", "VSphereMixedNodeEnv", "VolumeAttributesClass", "VolumeGroupSnapshot"}},Source:EventSource{Component:,Host:,},FirstTimestamp:2025-11-16 19:46:58.871062685 +0000 UTC m=+48625.686235909,LastTimestamp:2025-11-16 19:46:58.871062685 +0000 UTC m=+48625.686235909,Count:1,Type:Normal,EventTime:0001-01-01 00:00:00 +0000 UTC,Series:nil,Action:,Related:nil,ReportingController:,ReportingInstance:,} 2025-11-16T19:46:58.970451985Z INFO Starting Controller {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck"} 2025-11-16T19:46:58.970494053Z INFO Starting workers {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "worker count": 1} 2025-11-16T19:46:58.970569807Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-mdr-snr"} 2025-11-16T19:46:58.97549873Z ERROR controllers.NodeHealthCheck failed to validate template {"NodeHealthCheck name": "nhc-mdr-snr", "error": "failed to get external remediation template openshift-workload-availability/mdr-template: machinedeletionremediationtemplates.machine-deletion-remediation.medik8s.io \"mdr-template\" is forbidden: User \"system:serviceaccount:openshift-workload-availability:node-healthcheck-controller-manager\" cannot get resource \"machinedeletionremediationtemplates\" in API group \"machine-deletion-remediation.medik8s.io\" in the namespace \"openshift-workload-availability\"", "errorVerbose": "machinedeletionremediationtemplates.machine-deletion-remediation.medik8s.io \"mdr-template\" is forbidden: User \"system:serviceaccount:openshift-workload-availability:node-healthcheck-controller-manager\" cannot get resource \"machinedeletionremediationtemplates\" in API group \"machine-deletion-remediation.medik8s.io\" in the namespace \"openshift-workload-availability\"\nfailed to get external remediation template openshift-workload-availability/mdr-template\ngithub.com/medik8s/node-healthcheck-operator/controllers/resources.(*manager).getTemplateWithFallbackNamespace\n\t/app/node-healthcheck-operator/controllers/resources/templates.go:98\ngithub.com/medik8s/node-healthcheck-operator/controllers/resources.(*manager).getTemplate\n\t/app/node-healthcheck-operator/controllers/resources/templates.go:80\ngithub.com/medik8s/node-healthcheck-operator/controllers/resources.(*manager).ValidateTemplates\n\t/app/node-healthcheck-operator/controllers/resources/templates.go:128\ngithub.com/medik8s/node-healthcheck-operator/controllers.(*NodeHealthCheckReconciler).Reconcile\n\t/app/node-healthcheck-operator/controllers/nodehealthcheck_controller.go:200\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Reconcile\n\t/app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:119\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler\n\t/app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:334\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem\n\t/app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:294\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.2\n\t/app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:255\nruntime.goexit\n\t/usr/lib/golang/src/runtime/asm_amd64.s:1700"} github.com/medik8s/node-healthcheck-operator/controllers.(*NodeHealthCheckReconciler).Reconcile /app/node-healthcheck-operator/controllers/nodehealthcheck_controller.go:201 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Reconcile /app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:119 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler /app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:334 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem /app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:294 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.2 /app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:255 2025-11-16T19:46:58.975887214Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-mdr-snr", "new status": {"observedNodes":0,"healthyNodes":0,"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-11-16T06:25:20Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Enabled","reason":"NHC is enabled, no ongoing remediation","lastUpdateTime":"2025-11-16T11:10:35Z"}, "patch": "{\"status\":{\"healthyNodes\":0,\"observedNodes\":0}}"} 2025-11-16T19:46:59.182201899Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-mdr-snr", "error": "failed to get external remediation template openshift-workload-availability/mdr-template: machinedeletionremediationtemplates.machine-deletion-remediation.medik8s.io \"mdr-template\" is forbidden: User \"system:serviceaccount:openshift-workload-availability:node-healthcheck-controller-manager\" cannot get resource \"machinedeletionremediationtemplates\" in API group \"machine-deletion-remediation.medik8s.io\" in the namespace \"openshift-workload-availability\"", "errorCauses": [{"error": "failed to get external remediation template openshift-workload-availability/mdr-template: machinedeletionremediationtemplates.machine-deletion-remediation.medik8s.io \"mdr-template\" is forbidden: User \"system:serviceaccount:openshift-workload-availability:node-healthcheck-controller-manager\" cannot get resource \"machinedeletionremediationtemplates\" in API group \"machine-deletion-remediation.medik8s.io\" in the namespace \"openshift-workload-availability\"", "errorVerbose": "machinedeletionremediationtemplates.machine-deletion-remediation.medik8s.io \"mdr-template\" is forbidden: User \"system:serviceaccount:openshift-workload-availability:node-healthcheck-controller-manager\" cannot get resource \"machinedeletionremediationtemplates\" in API group \"machine-deletion-remediation.medik8s.io\" in the namespace \"openshift-workload-availability\"\nfailed to get external remediation template openshift-workload-availability/mdr-template\ngithub.com/medik8s/node-healthcheck-operator/controllers/resources.(*manager).getTemplateWithFallbackNamespace\n\t/app/node-healthcheck-operator/controllers/resources/templates.go:98\ngithub.com/medik8s/node-healthcheck-operator/controllers/resources.(*manager).getTemplate\n\t/app/node-healthcheck-operator/controllers/resources/templates.go:80\ngithub.com/medik8s/node-healthcheck-operator/controllers/resources.(*manager).ValidateTemplates\n\t/app/node-healthcheck-operator/controllers/resources/templates.go:128\ngithub.com/medik8s/node-healthcheck-operator/controllers.(*NodeHealthCheckReconciler).Reconcile\n\t/app/node-healthcheck-operator/controllers/nodehealthcheck_controller.go:200\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Reconcile\n\t/app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:119\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler\n\t/app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:334\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem\n\t/app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:294\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.2\n\t/app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:255\nruntime.goexit\n\t/usr/lib/golang/src/runtime/asm_amd64.s:1700"}], "requeue": false, "requeuAfter": "0s"} 2025-11-16T19:46:59.182299966Z ERROR Reconciler error {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "NodeHealthCheck": {"name":"nhc-mdr-snr"}, "namespace": "", "name": "nhc-mdr-snr", "reconcileID": "76bfe82c-2159-41f0-8ff5-75a522d43a2a", "error": "failed to get external remediation template openshift-workload-availability/mdr-template: machinedeletionremediationtemplates.machine-deletion-remediation.medik8s.io \"mdr-template\" is forbidden: User \"system:serviceaccount:openshift-workload-availability:node-healthcheck-controller-manager\" cannot get resource \"machinedeletionremediationtemplates\" in API group \"machine-deletion-remediation.medik8s.io\" in the namespace \"openshift-workload-availability\"", "errorCauses": [{"error": "failed to get external remediation template openshift-workload-availability/mdr-template: machinedeletionremediationtemplates.machine-deletion-remediation.medik8s.io \"mdr-template\" is forbidden: User \"system:serviceaccount:openshift-workload-availability:node-healthcheck-controller-manager\" cannot get resource \"machinedeletionremediationtemplates\" in API group \"machine-deletion-remediation.medik8s.io\" in the namespace \"openshift-workload-availability\"", "errorVerbose": "machinedeletionremediationtemplates.machine-deletion-remediation.medik8s.io \"mdr-template\" is forbidden: User \"system:serviceaccount:openshift-workload-availability:node-healthcheck-controller-manager\" cannot get resource \"machinedeletionremediationtemplates\" in API group \"machine-deletion-remediation.medik8s.io\" in the namespace \"openshift-workload-availability\"\nfailed to get external remediation template openshift-workload-availability/mdr-template\ngithub.com/medik8s/node-healthcheck-operator/controllers/resources.(*manager).getTemplateWithFallbackNamespace\n\t/app/node-healthcheck-operator/controllers/resources/templates.go:98\ngithub.com/medik8s/node-healthcheck-operator/controllers/resources.(*manager).getTemplate\n\t/app/node-healthcheck-operator/controllers/resources/templates.go:80\ngithub.com/medik8s/node-healthcheck-operator/controllers/resources.(*manager).ValidateTemplates\n\t/app/node-healthcheck-operator/controllers/resources/templates.go:128\ngithub.com/medik8s/node-healthcheck-operator/controllers.(*NodeHealthCheckReconciler).Reconcile\n\t/app/node-healthcheck-operator/controllers/nodehealthcheck_controller.go:200\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Reconcile\n\t/app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:119\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler\n\t/app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:334\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem\n\t/app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:294\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.2\n\t/app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:255\nruntime.goexit\n\t/usr/lib/golang/src/runtime/asm_amd64.s:1700"}]} sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler /app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:347 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem /app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:294 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.2 /app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:255 2025-11-16T19:46:59.182549097Z INFO console-plugin successfully created / updated console plugin resources 2025-11-16T19:46:59.187730291Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-mdr-snr"} 2025-11-16T19:46:59.196234979Z INFO Starting EventSource {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "source": "kind source: *unstructured.Unstructured"} 2025-11-16T19:46:59.196347708Z INFO controllers.NodeHealthCheck.WatchManager added watch for remediation template CRs {"kind": "MachineDeletionRemediationTemplate"} 2025-11-16T19:46:59.196375538Z INFO Starting EventSource {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "source": "kind source: *unstructured.Unstructured"} 2025-11-16T19:46:59.196390493Z INFO controllers.NodeHealthCheck.WatchManager added watch for remediation CRs {"kind": "MachineDeletionRemediation"} 2025-11-16T19:46:59.196405292Z INFO Starting EventSource {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "source": "kind source: *unstructured.Unstructured"} 2025-11-16T19:46:59.196423587Z INFO controllers.NodeHealthCheck.WatchManager added watch for remediation template CRs {"kind": "SelfNodeRemediationTemplate"} 2025-11-16T19:46:59.196437066Z INFO Starting EventSource {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "source": "kind source: *unstructured.Unstructured"} 2025-11-16T19:46:59.196448676Z INFO controllers.NodeHealthCheck.WatchManager added watch for remediation CRs {"kind": "SelfNodeRemediation"} 2025-11-16T19:46:59.202238683Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-0"} time="2025-11-16T19:46:59Z" level=info msg="invalidating lease" time="2025-11-16T19:46:59Z" level=info msg="getting lease" 2025-11-16T19:46:59.305745331Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-1"} time="2025-11-16T19:46:59Z" level=info msg="invalidating lease" time="2025-11-16T19:46:59Z" level=info msg="getting lease" 2025-11-16T19:46:59.309813708Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-2"} time="2025-11-16T19:46:59Z" level=info msg="invalidating lease" time="2025-11-16T19:46:59Z" level=info msg="getting lease" 2025-11-16T19:46:59.313144735Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-mdr-snr", "new status": {"observedNodes":3,"healthyNodes":3,"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-11-16T06:25:20Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Enabled","reason":"NHC is enabled, no ongoing remediation","lastUpdateTime":"2025-11-16T19:46:58Z"}, "patch": "{\"status\":{\"healthyNodes\":3,\"observedNodes\":3}}"} 2025-11-16T19:46:59.518185428Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-mdr-snr", "error": null, "requeue": false, "requeuAfter": "0s"} 2025-11-16T19:49:01.643120787Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0-0", "NHC": "nhc-mdr-snr"} 2025-11-16T19:49:01.643202222Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0-0", "NHC": "nhc-mdr-snr"} 2025-11-16T19:49:01.643276296Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-mdr-snr"} 2025-11-16T19:49:01.658494749Z INFO controllers.NodeHealthCheck Node is going to match unhealthy condition {"node": "worker-0-0", "condition type": "Ready", "condition status": "Unknown", "duration left": "29.341507928s"} 2025-11-16T19:49:01.669129967Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-1"} time="2025-11-16T19:49:01Z" level=info msg="invalidating lease" time="2025-11-16T19:49:01Z" level=info msg="getting lease" 2025-11-16T19:49:01.769632482Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-2"} time="2025-11-16T19:49:01Z" level=info msg="invalidating lease" time="2025-11-16T19:49:01Z" level=info msg="getting lease" 2025-11-16T19:49:01.869842899Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-mdr-snr", "new status": {"observedNodes":3,"healthyNodes":2,"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-11-16T06:25:20Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Enabled","reason":"NHC is enabled, no ongoing remediation","lastUpdateTime":"2025-11-16T19:46:59Z"}, "patch": "{\"status\":{\"healthyNodes\":2}}"} 2025-11-16T19:49:02.075877044Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-mdr-snr", "error": null, "requeue": false, "requeuAfter": "30.341507928s"} 2025-11-16T19:49:32.418104849Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-mdr-snr"} 2025-11-16T19:49:32.424334617Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-0", "condition type": "Ready", "condition status": "Unknown"} 2025-11-16T19:49:32.424484303Z DEBUG events [remediation] Node matches unhealthy condition. Node "worker-0-0", condition type "Ready", condition status "Unknown" {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"758377"}, "reason": "DetectedUnhealthy"} 2025-11-16T19:49:32.430017821Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-1"} time="2025-11-16T19:49:32Z" level=info msg="invalidating lease" time="2025-11-16T19:49:32Z" level=info msg="getting lease" 2025-11-16T19:49:32.43288078Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-2"} time="2025-11-16T19:49:32Z" level=info msg="invalidating lease" time="2025-11-16T19:49:32Z" level=info msg="getting lease" 2025-11-16T19:49:32.4355756Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-0"} 2025-11-16T19:49:32.438577018Z INFO controllers.NodeHealthCheck.resource manager Attempting to obtain Node Lease {"Node name": "worker-0-0"} time="2025-11-16T19:49:32Z" level=info msg="request lease" time="2025-11-16T19:49:32Z" level=info msg="getting lease" time="2025-11-16T19:49:32Z" level=info msg="create lease" 2025-11-16T19:49:32.441244091Z INFO controllers.NodeHealthCheck.resource manager Creating a remediation CR {"CR name": "", "CR kind": "SelfNodeRemediation", "namespace": "openshift-workload-availability"} 2025-11-16T19:49:32.450611577Z DEBUG events [remediation] Created remediation object for node worker-0-0 {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"758377"}, "reason": "RemediationCreated"} 2025-11-16T19:49:32.529625999Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-mdr-snr", "new status": {"observedNodes":3,"healthyNodes":2,"unhealthyNodes":[{"name":"worker-0-0","remediations":[{"resource":{"kind":"SelfNodeRemediation","namespace":"openshift-workload-availability","name":"worker-0-0-vlthx","uid":"f1cceb3d-2d42-41d9-a619-2986f225e423","apiVersion":"self-node-remediation.medik8s.io/v1alpha1"},"started":"2025-11-16T19:49:32Z","templateName":"selfnoderemediationtemplate-sample"}]}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-11-16T06:25:20Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Remediating","reason":"NHC is remediating 1 nodes","lastUpdateTime":"2025-11-16T19:49:01Z"}, "patch": "{\"spec\":{\"escalatingRemediations\":[{\"order\":-1,\"remediationTemplate\":{\"apiVersion\":\"self-node-remediation.medik8s.io/v1alpha1\",\"kind\":\"SelfNodeRemediationTemplate\",\"name\":\"selfnoderemediationtemplate-sample\",\"namespace\":\"openshift-workload-availability\"},\"timeout\":\"8m20s\"},{\"order\":0,\"remediationTemplate\":{\"apiVersion\":\"machine-deletion-remediation.medik8s.io/v1alpha1\",\"kind\":\"MachineDeletionRemediationTemplate\",\"name\":\"mdr-template\",\"namespace\":\"openshift-workload-availability\"},\"timeout\":\"1m0s\"}]},\"status\":{\"phase\":\"Remediating\",\"reason\":\"NHC is remediating 1 nodes\",\"unhealthyNodes\":[{\"name\":\"worker-0-0\",\"remediations\":[{\"resource\":{\"apiVersion\":\"self-node-remediation.medik8s.io/v1alpha1\",\"kind\":\"SelfNodeRemediation\",\"name\":\"worker-0-0-vlthx\",\"namespace\":\"openshift-workload-availability\",\"uid\":\"f1cceb3d-2d42-41d9-a619-2986f225e423\"},\"started\":\"2025-11-16T19:49:32Z\",\"templateName\":\"selfnoderemediationtemplate-sample\"}]}]}}"} 2025-11-16T19:49:32.658066143Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","controller":false}} 2025-11-16T19:49:32.658176737Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-mdr-snr", "Remediation CR Name": "worker-0-0-vlthx", "Remediation CR Kind": "SelfNodeRemediation"} 2025-11-16T19:49:32.658200038Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","controller":false}} 2025-11-16T19:49:32.658220598Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-mdr-snr", "Remediation CR Name": "worker-0-0-vlthx", "Remediation CR Kind": "SelfNodeRemediation"} 2025-11-16T19:49:32.662474579Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","controller":false}} 2025-11-16T19:49:32.662506518Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-mdr-snr", "Remediation CR Name": "worker-0-0-vlthx", "Remediation CR Kind": "SelfNodeRemediation"} 2025-11-16T19:49:32.66251583Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","controller":false}} 2025-11-16T19:49:32.662520144Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-mdr-snr", "Remediation CR Name": "worker-0-0-vlthx", "Remediation CR Kind": "SelfNodeRemediation"} 2025-11-16T19:49:32.697208053Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","controller":false}} 2025-11-16T19:49:32.697254862Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-mdr-snr", "Remediation CR Name": "worker-0-0-vlthx", "Remediation CR Kind": "SelfNodeRemediation"} 2025-11-16T19:49:32.697265879Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","controller":false}} 2025-11-16T19:49:32.697270424Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-mdr-snr", "Remediation CR Name": "worker-0-0-vlthx", "Remediation CR Kind": "SelfNodeRemediation"} 2025-11-16T19:49:32.706257186Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","controller":false}} 2025-11-16T19:49:32.70629124Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-mdr-snr", "Remediation CR Name": "worker-0-0-vlthx", "Remediation CR Kind": "SelfNodeRemediation"} 2025-11-16T19:49:32.706301596Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","controller":false}} 2025-11-16T19:49:32.706306113Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-mdr-snr", "Remediation CR Name": "worker-0-0-vlthx", "Remediation CR Kind": "SelfNodeRemediation"} 2025-11-16T19:49:32.735291087Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-mdr-snr", "error": null, "requeue": false, "requeuAfter": "8m20s"} 2025-11-16T19:49:32.735403117Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-mdr-snr"} 2025-11-16T19:49:32.74162329Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-0", "condition type": "Ready", "condition status": "Unknown"} 2025-11-16T19:49:32.74761843Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-11-16T19:49:32Z"} 2025-11-16T19:49:32.747654551Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-2"} time="2025-11-16T19:49:32Z" level=info msg="invalidating lease" time="2025-11-16T19:49:32Z" level=info msg="getting lease" 2025-11-16T19:49:32.750846742Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-1"} time="2025-11-16T19:49:32Z" level=info msg="invalidating lease" time="2025-11-16T19:49:32Z" level=info msg="getting lease" 2025-11-16T19:49:32.753678259Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-0"} 2025-11-16T19:49:32.756818935Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-0-vlthx", "kind": "SelfNodeRemediation", "namespace": "openshift-workload-availability"} time="2025-11-16T19:49:32Z" level=info msg="getting lease" 2025-11-16T19:49:32.756867816Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-mdr-snr", "lease name": "node-worker-0-0", "NHC is lease owner": true, "lease expiration time": "8m20s"} 2025-11-16T19:49:32.756886596Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-11-16T19:49:32Z"} 2025-11-16T19:49:32.760299516Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-mdr-snr", "new status": {"observedNodes":3,"healthyNodes":2,"unhealthyNodes":[{"name":"worker-0-0","remediations":[{"resource":{"kind":"SelfNodeRemediation","namespace":"openshift-workload-availability","name":"worker-0-0-vlthx","uid":"f1cceb3d-2d42-41d9-a619-2986f225e423","apiVersion":"self-node-remediation.medik8s.io/v1alpha1"},"started":"2025-11-16T19:49:32Z","templateName":"selfnoderemediationtemplate-sample"}]}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-11-16T06:25:20Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Remediating","reason":"NHC is remediating 1 nodes","lastUpdateTime":"2025-11-16T19:49:32Z"}, "patch": "{\"spec\":{\"escalatingRemediations\":[{\"order\":-1,\"remediationTemplate\":{\"apiVersion\":\"self-node-remediation.medik8s.io/v1alpha1\",\"kind\":\"SelfNodeRemediationTemplate\",\"name\":\"selfnoderemediationtemplate-sample\",\"namespace\":\"openshift-workload-availability\"},\"timeout\":\"8m20s\"},{\"order\":0,\"remediationTemplate\":{\"apiVersion\":\"machine-deletion-remediation.medik8s.io/v1alpha1\",\"kind\":\"MachineDeletionRemediationTemplate\",\"name\":\"mdr-template\",\"namespace\":\"openshift-workload-availability\"},\"timeout\":\"1m0s\"}]}}"} 2025-11-16T19:49:32.965214211Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-mdr-snr", "error": null, "requeue": false, "requeuAfter": "8m19.243116443s"} 2025-11-16T19:51:33.010634001Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","controller":false}} 2025-11-16T19:51:33.010715703Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-mdr-snr", "Remediation CR Name": "worker-0-0-vlthx", "Remediation CR Kind": "SelfNodeRemediation"} 2025-11-16T19:51:33.010738707Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","controller":false}} 2025-11-16T19:51:33.010743903Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-mdr-snr", "Remediation CR Name": "worker-0-0-vlthx", "Remediation CR Kind": "SelfNodeRemediation"} 2025-11-16T19:51:33.010773123Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-mdr-snr"} 2025-11-16T19:51:33.017003978Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-0", "condition type": "Ready", "condition status": "Unknown"} 2025-11-16T19:51:33.028402358Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-11-16T19:49:32Z"} 2025-11-16T19:51:33.02844373Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-1"} time="2025-11-16T19:51:33Z" level=info msg="invalidating lease" time="2025-11-16T19:51:33Z" level=info msg="getting lease" 2025-11-16T19:51:33.128445168Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-2"} time="2025-11-16T19:51:33Z" level=info msg="invalidating lease" time="2025-11-16T19:51:33Z" level=info msg="getting lease" 2025-11-16T19:51:33.229636028Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-0"} 2025-11-16T19:51:33.234954426Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-0-vlthx", "kind": "SelfNodeRemediation", "namespace": "openshift-workload-availability"} time="2025-11-16T19:51:33Z" level=info msg="getting lease" 2025-11-16T19:51:33.235019974Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-mdr-snr", "lease name": "node-worker-0-0", "NHC is lease owner": true, "lease expiration time": "8m20s"} time="2025-11-16T19:51:33Z" level=info msg="request lease" time="2025-11-16T19:51:33Z" level=info msg="getting lease" time="2025-11-16T19:51:33Z" level=info msg="renew lease owned by NodeHealthCheck-nhc-mdr-snr setAcquireTime=false" 2025-11-16T19:51:33.237959569Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-11-16T19:49:32Z"} 2025-11-16T19:51:33.332857203Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-mdr-snr", "new status": {"observedNodes":3,"healthyNodes":2,"unhealthyNodes":[{"name":"worker-0-0","remediations":[{"resource":{"kind":"SelfNodeRemediation","namespace":"openshift-workload-availability","name":"worker-0-0-vlthx","uid":"f1cceb3d-2d42-41d9-a619-2986f225e423","apiVersion":"self-node-remediation.medik8s.io/v1alpha1"},"started":"2025-11-16T19:49:32Z","templateName":"selfnoderemediationtemplate-sample"}]}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-11-16T06:25:20Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Remediating","reason":"NHC is remediating 1 nodes","lastUpdateTime":"2025-11-16T19:49:32Z"}, "patch": "{\"spec\":{\"escalatingRemediations\":[{\"order\":-1,\"remediationTemplate\":{\"apiVersion\":\"self-node-remediation.medik8s.io/v1alpha1\",\"kind\":\"SelfNodeRemediationTemplate\",\"name\":\"selfnoderemediationtemplate-sample\",\"namespace\":\"openshift-workload-availability\"},\"timeout\":\"8m20s\"},{\"order\":0,\"remediationTemplate\":{\"apiVersion\":\"machine-deletion-remediation.medik8s.io/v1alpha1\",\"kind\":\"MachineDeletionRemediationTemplate\",\"name\":\"mdr-template\",\"namespace\":\"openshift-workload-availability\"},\"timeout\":\"1m0s\"}]}}"} 2025-11-16T19:51:33.538847311Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-mdr-snr", "error": null, "requeue": false, "requeuAfter": "6m18.762047715s"} 2025-11-16T19:51:43.17086526Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","controller":false}} 2025-11-16T19:51:43.170897785Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-mdr-snr", "Remediation CR Name": "worker-0-0-vlthx", "Remediation CR Kind": "SelfNodeRemediation"} 2025-11-16T19:51:43.17091234Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","controller":false}} 2025-11-16T19:51:43.170916309Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-mdr-snr", "Remediation CR Name": "worker-0-0-vlthx", "Remediation CR Kind": "SelfNodeRemediation"} 2025-11-16T19:51:43.170950649Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-mdr-snr"} 2025-11-16T19:51:43.181708923Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-0", "condition type": "Ready", "condition status": "Unknown"} 2025-11-16T19:51:43.187808711Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-11-16T19:51:43Z"} 2025-11-16T19:51:43.18784003Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-1"} time="2025-11-16T19:51:43Z" level=info msg="invalidating lease" time="2025-11-16T19:51:43Z" level=info msg="getting lease" 2025-11-16T19:51:43.191157552Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-2"} time="2025-11-16T19:51:43Z" level=info msg="invalidating lease" time="2025-11-16T19:51:43Z" level=info msg="getting lease" 2025-11-16T19:51:43.194172397Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-0"} 2025-11-16T19:51:43.197778715Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-0-vlthx", "kind": "SelfNodeRemediation", "namespace": "openshift-workload-availability"} time="2025-11-16T19:51:43Z" level=info msg="getting lease" 2025-11-16T19:51:43.197847307Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-mdr-snr", "lease name": "node-worker-0-0", "NHC is lease owner": true, "lease expiration time": "8m20s"} 2025-11-16T19:51:43.197875696Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-11-16T19:51:43Z"} 2025-11-16T19:51:43.28725857Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-mdr-snr", "new status": {"observedNodes":3,"healthyNodes":2,"unhealthyNodes":[{"name":"worker-0-0","remediations":[{"resource":{"kind":"SelfNodeRemediation","namespace":"openshift-workload-availability","name":"worker-0-0-vlthx","uid":"f1cceb3d-2d42-41d9-a619-2986f225e423","apiVersion":"self-node-remediation.medik8s.io/v1alpha1"},"started":"2025-11-16T19:49:32Z","templateName":"selfnoderemediationtemplate-sample"}]}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-11-16T06:25:20Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Remediating","reason":"NHC is remediating 1 nodes","lastUpdateTime":"2025-11-16T19:51:33Z"}, "patch": "{\"spec\":{\"escalatingRemediations\":[{\"order\":-1,\"remediationTemplate\":{\"apiVersion\":\"self-node-remediation.medik8s.io/v1alpha1\",\"kind\":\"SelfNodeRemediationTemplate\",\"name\":\"selfnoderemediationtemplate-sample\",\"namespace\":\"openshift-workload-availability\"},\"timeout\":\"8m20s\"},{\"order\":0,\"remediationTemplate\":{\"apiVersion\":\"machine-deletion-remediation.medik8s.io/v1alpha1\",\"kind\":\"MachineDeletionRemediationTemplate\",\"name\":\"mdr-template\",\"namespace\":\"openshift-workload-availability\"},\"timeout\":\"1m0s\"}]}}"} 2025-11-16T19:51:43.493271136Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-mdr-snr", "error": null, "requeue": false, "requeuAfter": "6m8.802128349s"} 2025-11-16T19:53:07.964545655Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0-0", "NHC": "nhc-mdr-snr"} 2025-11-16T19:53:07.964624906Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0-0", "NHC": "nhc-mdr-snr"} 2025-11-16T19:53:07.964672165Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-mdr-snr"} 2025-11-16T19:53:07.975364324Z INFO controllers.NodeHealthCheck Node is going to match unhealthy condition {"node": "worker-0-0", "condition type": "Ready", "condition status": "False", "duration left": "29.024637957s"} 2025-11-16T19:53:07.987586462Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-11-16T19:51:43Z"} 2025-11-16T19:53:07.98763471Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-1"} time="2025-11-16T19:53:08Z" level=info msg="invalidating lease" time="2025-11-16T19:53:08Z" level=info msg="getting lease" 2025-11-16T19:53:08.088237893Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-2"} time="2025-11-16T19:53:08Z" level=info msg="invalidating lease" time="2025-11-16T19:53:08Z" level=info msg="getting lease" 2025-11-16T19:53:08.184972673Z INFO controllers.NodeHealthCheck Ignoring node, because it was unhealthy, and is likely to be unhealthy again. {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-0"} 2025-11-16T19:53:08.18519789Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-mdr-snr", "error": null, "requeue": false, "requeuAfter": "30.024637957s"} 2025-11-16T19:53:08.484482573Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0-0", "NHC": "nhc-mdr-snr"} 2025-11-16T19:53:08.484560855Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0-0", "NHC": "nhc-mdr-snr"} 2025-11-16T19:53:08.484613036Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-mdr-snr"} 2025-11-16T19:53:08.50501805Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-11-16T19:51:43Z"} 2025-11-16T19:53:08.50508151Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-1"} time="2025-11-16T19:53:08Z" level=info msg="invalidating lease" time="2025-11-16T19:53:08Z" level=info msg="getting lease" 2025-11-16T19:53:08.606057654Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-2"} time="2025-11-16T19:53:08Z" level=info msg="invalidating lease" time="2025-11-16T19:53:08Z" level=info msg="getting lease" 2025-11-16T19:53:08.610016309Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-0"} 2025-11-16T19:53:08.706526174Z INFO controllers.NodeHealthCheck.resource manager setting a delay for node getting healthy {"node name": "worker-0-0", "delay in seconds": 5} 2025-11-16T19:53:08.71728597Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","controller":false}} 2025-11-16T19:53:08.717335718Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-mdr-snr", "Remediation CR Name": "worker-0-0-vlthx", "Remediation CR Kind": "SelfNodeRemediation"} 2025-11-16T19:53:08.717346876Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","controller":false}} 2025-11-16T19:53:08.717352192Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-mdr-snr", "Remediation CR Name": "worker-0-0-vlthx", "Remediation CR Kind": "SelfNodeRemediation"} 2025-11-16T19:53:08.717544215Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-mdr-snr", "new status": {"observedNodes":3,"healthyNodes":2,"unhealthyNodes":[{"name":"worker-0-0","remediations":[{"resource":{"kind":"SelfNodeRemediation","namespace":"openshift-workload-availability","name":"worker-0-0-vlthx","uid":"f1cceb3d-2d42-41d9-a619-2986f225e423","apiVersion":"self-node-remediation.medik8s.io/v1alpha1"},"started":"2025-11-16T19:49:32Z","templateName":"selfnoderemediationtemplate-sample"}],"conditionsHealthyTimestamp":"2025-11-16T19:53:08Z","healthyDelayed":true}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-11-16T06:25:20Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Remediating","reason":"NHC is remediating 1 nodes","lastUpdateTime":"2025-11-16T19:51:43Z"}, "patch": "{\"status\":{\"unhealthyNodes\":[{\"conditionsHealthyTimestamp\":\"2025-11-16T19:53:08Z\",\"healthyDelayed\":true,\"name\":\"worker-0-0\",\"remediations\":[{\"resource\":{\"apiVersion\":\"self-node-remediation.medik8s.io/v1alpha1\",\"kind\":\"SelfNodeRemediation\",\"name\":\"worker-0-0-vlthx\",\"namespace\":\"openshift-workload-availability\",\"uid\":\"f1cceb3d-2d42-41d9-a619-2986f225e423\"},\"started\":\"2025-11-16T19:49:32Z\",\"templateName\":\"selfnoderemediationtemplate-sample\"}]}]}}"} 2025-11-16T19:53:08.924906538Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-mdr-snr", "error": null, "requeue": false, "requeuAfter": "6s"} 2025-11-16T19:53:08.925063188Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-mdr-snr"} 2025-11-16T19:53:08.939736789Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-11-16T19:51:43Z"} 2025-11-16T19:53:08.939768986Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-0"} 2025-11-16T19:53:08.944576177Z INFO controllers.NodeHealthCheck.resource manager delaying node getting healthy {"node name": "worker-0-0", "remaining time in seconds": 4.055426858} 2025-11-16T19:53:08.944618064Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-1"} time="2025-11-16T19:53:08Z" level=info msg="invalidating lease" time="2025-11-16T19:53:08Z" level=info msg="getting lease" 2025-11-16T19:53:08.948870982Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-2"} time="2025-11-16T19:53:08Z" level=info msg="invalidating lease" time="2025-11-16T19:53:08Z" level=info msg="getting lease" 2025-11-16T19:53:08.954083386Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-mdr-snr", "error": null, "requeue": false, "requeuAfter": "5.055426858s"} 2025-11-16T19:53:14.00978087Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-mdr-snr"} 2025-11-16T19:53:14.023105678Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-11-16T19:51:43Z"} 2025-11-16T19:53:14.023196018Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-1"} time="2025-11-16T19:53:14Z" level=info msg="invalidating lease" time="2025-11-16T19:53:14Z" level=info msg="getting lease" 2025-11-16T19:53:14.125238583Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-2"} time="2025-11-16T19:53:14Z" level=info msg="invalidating lease" time="2025-11-16T19:53:14Z" level=info msg="getting lease" 2025-11-16T19:53:14.130239638Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-0"} 2025-11-16T19:53:14.134030252Z INFO controllers.NodeHealthCheck.resource manager delaying for node getting healthy is done, about to remove the remediation CR {"node name": "worker-0-0"} 2025-11-16T19:53:14.14180673Z INFO controllers.NodeHealthCheck.resource manager deleted remediation CR {"name": "worker-0-0-vlthx"} 2025-11-16T19:53:14.142065563Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-mdr-snr", "error": null, "requeue": false, "requeuAfter": "11s"} 2025-11-16T19:53:14.142132087Z DEBUG events [remediation] Deleted remediation CR of kind SelfNodeRemediation with name worker-0-0-vlthx {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"759976"}, "reason": "RemediationRemoved"} 2025-11-16T19:53:14.14252605Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","controller":false}} 2025-11-16T19:53:14.142545792Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-mdr-snr", "Remediation CR Name": "worker-0-0-vlthx", "Remediation CR Kind": "SelfNodeRemediation"} 2025-11-16T19:53:14.142556025Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","controller":false}} 2025-11-16T19:53:14.142563495Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-mdr-snr", "Remediation CR Name": "worker-0-0-vlthx", "Remediation CR Kind": "SelfNodeRemediation"} 2025-11-16T19:53:14.142596776Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-mdr-snr"} 2025-11-16T19:53:14.225114964Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-0"} 2025-11-16T19:53:14.229441032Z INFO controllers.NodeHealthCheck.resource manager delaying for node getting healthy is done, about to remove the remediation CR {"node name": "worker-0-0"} 2025-11-16T19:53:14.232177334Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-1"} time="2025-11-16T19:53:14Z" level=info msg="invalidating lease" time="2025-11-16T19:53:14Z" level=info msg="getting lease" 2025-11-16T19:53:14.235993942Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-2"} time="2025-11-16T19:53:14Z" level=info msg="invalidating lease" time="2025-11-16T19:53:14Z" level=info msg="getting lease" 2025-11-16T19:53:14.240132129Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-mdr-snr", "error": null, "requeue": false, "requeuAfter": "17s"} 2025-11-16T19:53:15.174903349Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","controller":false}} 2025-11-16T19:53:15.174951818Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-mdr-snr", "Remediation CR Name": "worker-0-0-vlthx", "Remediation CR Kind": "SelfNodeRemediation"} 2025-11-16T19:53:15.175011022Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-mdr-snr"} 2025-11-16T19:53:15.196932293Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-0"} time="2025-11-16T19:53:15Z" level=info msg="invalidating lease" time="2025-11-16T19:53:15Z" level=info msg="getting lease" 2025-11-16T19:53:15.301145856Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-1"} time="2025-11-16T19:53:15Z" level=info msg="invalidating lease" time="2025-11-16T19:53:15Z" level=info msg="getting lease" 2025-11-16T19:53:15.397790571Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-2"} time="2025-11-16T19:53:15Z" level=info msg="invalidating lease" time="2025-11-16T19:53:15Z" level=info msg="getting lease" 2025-11-16T19:53:15.406912715Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-mdr-snr", "new status": {"observedNodes":3,"healthyNodes":3,"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-11-16T06:25:20Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Enabled","reason":"NHC is enabled, no ongoing remediation","lastUpdateTime":"2025-11-16T19:53:08Z"}, "patch": "{\"status\":{\"healthyNodes\":3,\"phase\":\"Enabled\",\"reason\":\"NHC is enabled, no ongoing remediation\",\"unhealthyNodes\":null}}"} 2025-11-16T19:53:15.613431177Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-mdr-snr", "error": null, "requeue": false, "requeuAfter": "0s"} 2025-11-16T19:53:25.143125176Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-mdr-snr"} 2025-11-16T19:53:25.155438415Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-1"} time="2025-11-16T19:53:25Z" level=info msg="invalidating lease" time="2025-11-16T19:53:25Z" level=info msg="getting lease" 2025-11-16T19:53:25.158493524Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-2"} time="2025-11-16T19:53:25Z" level=info msg="invalidating lease" time="2025-11-16T19:53:25Z" level=info msg="getting lease" 2025-11-16T19:53:25.161614218Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-0"} time="2025-11-16T19:53:25Z" level=info msg="invalidating lease" time="2025-11-16T19:53:25Z" level=info msg="getting lease" 2025-11-16T19:53:25.164978837Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-mdr-snr", "error": null, "requeue": false, "requeuAfter": "0s"} 2025-11-16T19:54:46.43250551Z INFO nodehealthcheck-resource validate update {"name": "nhc-mdr-snr"} 2025-11-16T19:54:46.43546524Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-mdr-snr"} 2025-11-16T19:54:46.448990277Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-0"} time="2025-11-16T19:54:46Z" level=info msg="invalidating lease" time="2025-11-16T19:54:46Z" level=info msg="getting lease" 2025-11-16T19:54:46.452087269Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-1"} time="2025-11-16T19:54:46Z" level=info msg="invalidating lease" time="2025-11-16T19:54:46Z" level=info msg="getting lease" 2025-11-16T19:54:46.454879837Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-2"} time="2025-11-16T19:54:46Z" level=info msg="invalidating lease" time="2025-11-16T19:54:46Z" level=info msg="getting lease" 2025-11-16T19:54:46.458227335Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-mdr-snr", "error": null, "requeue": false, "requeuAfter": "0s"} 2025-11-16T19:55:41.983066146Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0-0", "NHC": "nhc-mdr-snr"} 2025-11-16T19:55:41.983249501Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0-0", "NHC": "nhc-mdr-snr"} 2025-11-16T19:55:41.983327334Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-mdr-snr"} 2025-11-16T19:55:41.997944697Z INFO controllers.NodeHealthCheck Node is going to match unhealthy condition {"node": "worker-0-0", "condition type": "Ready", "condition status": "Unknown", "duration left": "29.002057679s"} 2025-11-16T19:55:42.012138364Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-1"} time="2025-11-16T19:55:42Z" level=info msg="invalidating lease" time="2025-11-16T19:55:42Z" level=info msg="getting lease" 2025-11-16T19:55:42.112533701Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-2"} time="2025-11-16T19:55:42Z" level=info msg="invalidating lease" time="2025-11-16T19:55:42Z" level=info msg="getting lease" 2025-11-16T19:55:42.212554967Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-mdr-snr", "new status": {"observedNodes":3,"healthyNodes":2,"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-11-16T06:25:20Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Enabled","reason":"NHC is enabled, no ongoing remediation","lastUpdateTime":"2025-11-16T19:53:15Z"}, "patch": "{\"status\":{\"healthyNodes\":2}}"} 2025-11-16T19:55:42.418003538Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-mdr-snr", "error": null, "requeue": false, "requeuAfter": "30.002057679s"} 2025-11-16T19:56:12.420700263Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-mdr-snr"} 2025-11-16T19:56:12.426241466Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-0", "condition type": "Ready", "condition status": "Unknown"} 2025-11-16T19:56:12.426370977Z DEBUG events [remediation] Node matches unhealthy condition. Node "worker-0-0", condition type "Ready", condition status "Unknown" {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"761251"}, "reason": "DetectedUnhealthy"} 2025-11-16T19:56:12.432076316Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-1"} time="2025-11-16T19:56:12Z" level=info msg="invalidating lease" time="2025-11-16T19:56:12Z" level=info msg="getting lease" 2025-11-16T19:56:12.533705204Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-2"} time="2025-11-16T19:56:12Z" level=info msg="invalidating lease" time="2025-11-16T19:56:12Z" level=info msg="getting lease" 2025-11-16T19:56:12.536835173Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-0"} 2025-11-16T19:56:12.540186222Z INFO controllers.NodeHealthCheck.resource manager Attempting to obtain Node Lease {"Node name": "worker-0-0"} time="2025-11-16T19:56:12Z" level=info msg="request lease" time="2025-11-16T19:56:12Z" level=info msg="getting lease" time="2025-11-16T19:56:12Z" level=info msg="create lease" 2025-11-16T19:56:12.543433412Z INFO controllers.NodeHealthCheck.resource manager Creating a remediation CR {"CR name": "worker-0-0", "CR kind": "MachineDeletionRemediation", "namespace": "openshift-workload-availability"} 2025-11-16T19:56:12.546629675Z DEBUG events [remediation] Created remediation object for node worker-0-0 {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"761251"}, "reason": "RemediationCreated"} 2025-11-16T19:56:12.550366994Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-mdr-snr", "new status": {"observedNodes":3,"healthyNodes":2,"unhealthyNodes":[{"name":"worker-0-0","remediations":[{"resource":{"kind":"MachineDeletionRemediation","namespace":"openshift-workload-availability","name":"worker-0-0","uid":"eddd81b0-3ac3-4414-ad83-cd7a57fde0ee","apiVersion":"machine-deletion-remediation.medik8s.io/v1alpha1"},"started":"2025-11-16T19:56:12Z","templateName":"mdr-template"}]}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-11-16T06:25:20Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Remediating","reason":"NHC is remediating 1 nodes","lastUpdateTime":"2025-11-16T19:55:42Z"}, "patch": "{\"status\":{\"phase\":\"Remediating\",\"reason\":\"NHC is remediating 1 nodes\",\"unhealthyNodes\":[{\"name\":\"worker-0-0\",\"remediations\":[{\"resource\":{\"apiVersion\":\"machine-deletion-remediation.medik8s.io/v1alpha1\",\"kind\":\"MachineDeletionRemediation\",\"name\":\"worker-0-0\",\"namespace\":\"openshift-workload-availability\",\"uid\":\"eddd81b0-3ac3-4414-ad83-cd7a57fde0ee\"},\"started\":\"2025-11-16T19:56:12Z\",\"templateName\":\"mdr-template\"}]}]}}"} 2025-11-16T19:56:12.551305245Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","controller":false}} 2025-11-16T19:56:12.551353263Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-mdr-snr", "Remediation CR Name": "worker-0-0", "Remediation CR Kind": "MachineDeletionRemediation"} 2025-11-16T19:56:12.551363639Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","controller":false}} 2025-11-16T19:56:12.551367994Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-mdr-snr", "Remediation CR Name": "worker-0-0", "Remediation CR Kind": "MachineDeletionRemediation"} 2025-11-16T19:56:12.75547135Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-mdr-snr", "error": null, "requeue": false, "requeuAfter": "1m0s"} 2025-11-16T19:56:12.75555597Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-mdr-snr"} 2025-11-16T19:56:12.762314789Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","controller":false}} 2025-11-16T19:56:12.762345904Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-mdr-snr", "Remediation CR Name": "worker-0-0", "Remediation CR Kind": "MachineDeletionRemediation"} 2025-11-16T19:56:12.762357155Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","controller":false}} 2025-11-16T19:56:12.762361043Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-mdr-snr", "Remediation CR Name": "worker-0-0", "Remediation CR Kind": "MachineDeletionRemediation"} 2025-11-16T19:56:12.765012479Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-0", "condition type": "Ready", "condition status": "Unknown"} 2025-11-16T19:56:12.770435399Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "PermanentNodeDeletionExpected", "status": "False", "reason": "", "message": "", "lastTransition": "2025-11-16T19:56:12Z"} 2025-11-16T19:56:12.770465072Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-11-16T19:56:12Z"} 2025-11-16T19:56:12.770472017Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-1"} time="2025-11-16T19:56:12Z" level=info msg="invalidating lease" time="2025-11-16T19:56:12Z" level=info msg="getting lease" 2025-11-16T19:56:12.773445911Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-2"} time="2025-11-16T19:56:12Z" level=info msg="invalidating lease" time="2025-11-16T19:56:12Z" level=info msg="getting lease" 2025-11-16T19:56:12.777246247Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-0"} 2025-11-16T19:56:12.78200168Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-0", "kind": "MachineDeletionRemediation", "namespace": "openshift-workload-availability"} time="2025-11-16T19:56:12Z" level=info msg="getting lease" 2025-11-16T19:56:12.782091718Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-mdr-snr", "lease name": "node-worker-0-0", "NHC is lease owner": true, "lease expiration time": "1m0s"} 2025-11-16T19:56:12.782119595Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-11-16T19:56:12Z"} 2025-11-16T19:56:12.785985004Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-mdr-snr", "error": null, "requeue": false, "requeuAfter": "59.217884731s"} 2025-11-16T19:56:12.786077916Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-mdr-snr"} 2025-11-16T19:56:12.791740188Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-0", "condition type": "Ready", "condition status": "Unknown"} 2025-11-16T19:56:12.796863937Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "PermanentNodeDeletionExpected", "status": "False", "reason": "", "message": "", "lastTransition": "2025-11-16T19:56:12Z"} 2025-11-16T19:56:12.796887864Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-11-16T19:56:12Z"} 2025-11-16T19:56:12.79689544Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-2"} time="2025-11-16T19:56:12Z" level=info msg="invalidating lease" time="2025-11-16T19:56:12Z" level=info msg="getting lease" 2025-11-16T19:56:12.800546742Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-1"} time="2025-11-16T19:56:12Z" level=info msg="invalidating lease" time="2025-11-16T19:56:12Z" level=info msg="getting lease" 2025-11-16T19:56:12.804583448Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-0"} 2025-11-16T19:56:12.809415099Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-0", "kind": "MachineDeletionRemediation", "namespace": "openshift-workload-availability"} time="2025-11-16T19:56:12Z" level=info msg="getting lease" 2025-11-16T19:56:12.809500813Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-mdr-snr", "lease name": "node-worker-0-0", "NHC is lease owner": true, "lease expiration time": "1m0s"} 2025-11-16T19:56:12.809524908Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-11-16T19:56:12Z"} 2025-11-16T19:56:12.813188818Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-mdr-snr", "error": null, "requeue": false, "requeuAfter": "59.190479657s"} 2025-11-16T19:57:12.004794151Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-mdr-snr"} 2025-11-16T19:57:12.010655236Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-0", "condition type": "Ready", "condition status": "Unknown"} 2025-11-16T19:57:12.016024883Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "PermanentNodeDeletionExpected", "status": "False", "reason": "", "message": "", "lastTransition": "2025-11-16T19:56:12Z"} 2025-11-16T19:57:12.016067082Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-11-16T19:56:12Z"} 2025-11-16T19:57:12.016077505Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-2"} time="2025-11-16T19:57:12Z" level=info msg="invalidating lease" time="2025-11-16T19:57:12Z" level=info msg="getting lease" 2025-11-16T19:57:12.019129616Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-1"} time="2025-11-16T19:57:12Z" level=info msg="invalidating lease" time="2025-11-16T19:57:12Z" level=info msg="getting lease" 2025-11-16T19:57:12.021850041Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-0"} 2025-11-16T19:57:12.024999582Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-0", "kind": "MachineDeletionRemediation", "namespace": "openshift-workload-availability"} time="2025-11-16T19:57:12Z" level=info msg="getting lease" 2025-11-16T19:57:12.025061348Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-mdr-snr", "lease name": "node-worker-0-0", "NHC is lease owner": true, "lease expiration time": "1m0s"} 2025-11-16T19:57:12.025080626Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-11-16T19:56:12Z"} 2025-11-16T19:57:12.025090145Z INFO controllers.NodeHealthCheck remediation timed out {"NodeHealthCheck name": "nhc-mdr-snr"} 2025-11-16T19:57:12.025094626Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-11-16T19:56:12Z"} 2025-11-16T19:57:12.029438717Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","controller":false}} 2025-11-16T19:57:12.02950162Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-mdr-snr", "Remediation CR Name": "worker-0-0", "Remediation CR Kind": "MachineDeletionRemediation"} 2025-11-16T19:57:12.029522177Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","controller":false}} 2025-11-16T19:57:12.029535007Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-mdr-snr", "Remediation CR Name": "worker-0-0", "Remediation CR Kind": "MachineDeletionRemediation"} 2025-11-16T19:57:12.033865483Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","controller":false}} 2025-11-16T19:57:12.033891646Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-mdr-snr", "Remediation CR Name": "worker-0-0", "Remediation CR Kind": "MachineDeletionRemediation"} 2025-11-16T19:57:12.033900749Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","controller":false}} 2025-11-16T19:57:12.033904967Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-mdr-snr", "Remediation CR Name": "worker-0-0", "Remediation CR Kind": "MachineDeletionRemediation"} 2025-11-16T19:57:12.117593039Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-mdr-snr", "new status": {"observedNodes":3,"healthyNodes":2,"unhealthyNodes":[{"name":"worker-0-0","remediations":[{"resource":{"kind":"MachineDeletionRemediation","namespace":"openshift-workload-availability","name":"worker-0-0","uid":"eddd81b0-3ac3-4414-ad83-cd7a57fde0ee","apiVersion":"machine-deletion-remediation.medik8s.io/v1alpha1"},"started":"2025-11-16T19:56:12Z","timedOut":"2025-11-16T19:57:12Z","templateName":"mdr-template"}]}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-11-16T06:25:20Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Remediating","reason":"NHC is remediating 1 nodes","lastUpdateTime":"2025-11-16T19:56:12Z"}, "patch": "{\"status\":{\"unhealthyNodes\":[{\"name\":\"worker-0-0\",\"remediations\":[{\"resource\":{\"apiVersion\":\"machine-deletion-remediation.medik8s.io/v1alpha1\",\"kind\":\"MachineDeletionRemediation\",\"name\":\"worker-0-0\",\"namespace\":\"openshift-workload-availability\",\"uid\":\"eddd81b0-3ac3-4414-ad83-cd7a57fde0ee\"},\"started\":\"2025-11-16T19:56:12Z\",\"templateName\":\"mdr-template\",\"timedOut\":\"2025-11-16T19:57:12Z\"}]}]}}"} 2025-11-16T19:57:12.324383631Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-mdr-snr", "error": null, "requeue": false, "requeuAfter": "1s"} 2025-11-16T19:57:12.324468934Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-mdr-snr"} 2025-11-16T19:57:12.330111661Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-0", "condition type": "Ready", "condition status": "Unknown"} 2025-11-16T19:57:12.335240772Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "PermanentNodeDeletionExpected", "status": "False", "reason": "", "message": "", "lastTransition": "2025-11-16T19:56:12Z"} 2025-11-16T19:57:12.335263385Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "Succeeded", "status": "False", "reason": "", "message": "", "lastTransition": "2025-11-16T19:57:12Z"} 2025-11-16T19:57:12.335269875Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-1"} time="2025-11-16T19:57:12Z" level=info msg="invalidating lease" time="2025-11-16T19:57:12Z" level=info msg="getting lease" 2025-11-16T19:57:12.338407847Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-2"} time="2025-11-16T19:57:12Z" level=info msg="invalidating lease" time="2025-11-16T19:57:12Z" level=info msg="getting lease" 2025-11-16T19:57:12.341852121Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-0"} 2025-11-16T19:57:12.34500348Z INFO controllers.NodeHealthCheck.resource manager Attempting to obtain Node Lease {"Node name": "worker-0-0"} time="2025-11-16T19:57:12Z" level=info msg="request lease" time="2025-11-16T19:57:12Z" level=info msg="getting lease" time="2025-11-16T19:57:12Z" level=info msg="renew lease owned by NodeHealthCheck-nhc-mdr-snr setAcquireTime=false" 2025-11-16T19:57:12.347985708Z INFO controllers.NodeHealthCheck.resource manager Creating a remediation CR {"CR name": "", "CR kind": "SelfNodeRemediation", "namespace": "openshift-workload-availability"} 2025-11-16T19:57:12.35647634Z DEBUG events [remediation] Created remediation object for node worker-0-0 {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"761776"}, "reason": "RemediationCreated"} 2025-11-16T19:57:12.361347748Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","controller":false}} 2025-11-16T19:57:12.361402303Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-mdr-snr", "Remediation CR Name": "worker-0-0-7vjqc", "Remediation CR Kind": "SelfNodeRemediation"} 2025-11-16T19:57:12.361416413Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","controller":false}} 2025-11-16T19:57:12.361423954Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-mdr-snr", "Remediation CR Name": "worker-0-0-7vjqc", "Remediation CR Kind": "SelfNodeRemediation"} 2025-11-16T19:57:12.364859439Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","controller":false}} 2025-11-16T19:57:12.364884605Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-mdr-snr", "Remediation CR Name": "worker-0-0-7vjqc", "Remediation CR Kind": "SelfNodeRemediation"} 2025-11-16T19:57:12.364893739Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","controller":false}} 2025-11-16T19:57:12.364900015Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-mdr-snr", "Remediation CR Name": "worker-0-0-7vjqc", "Remediation CR Kind": "SelfNodeRemediation"} 2025-11-16T19:57:12.386405621Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","controller":false}} 2025-11-16T19:57:12.386441704Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-mdr-snr", "Remediation CR Name": "worker-0-0-7vjqc", "Remediation CR Kind": "SelfNodeRemediation"} 2025-11-16T19:57:12.386452836Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","controller":false}} 2025-11-16T19:57:12.386456891Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-mdr-snr", "Remediation CR Name": "worker-0-0-7vjqc", "Remediation CR Kind": "SelfNodeRemediation"} 2025-11-16T19:57:12.43681664Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-mdr-snr", "new status": {"observedNodes":3,"healthyNodes":2,"unhealthyNodes":[{"name":"worker-0-0","remediations":[{"resource":{"kind":"MachineDeletionRemediation","namespace":"openshift-workload-availability","name":"worker-0-0","uid":"eddd81b0-3ac3-4414-ad83-cd7a57fde0ee","apiVersion":"machine-deletion-remediation.medik8s.io/v1alpha1"},"started":"2025-11-16T19:56:12Z","timedOut":"2025-11-16T19:57:12Z","templateName":"mdr-template"},{"resource":{"kind":"SelfNodeRemediation","namespace":"openshift-workload-availability","name":"worker-0-0-7vjqc","uid":"f6e1f328-d33d-495c-8a88-621db55bc7e9","apiVersion":"self-node-remediation.medik8s.io/v1alpha1"},"started":"2025-11-16T19:57:12Z","templateName":"selfnoderemediationtemplate-sample"}]}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-11-16T06:25:20Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Remediating","reason":"NHC is remediating 1 nodes","lastUpdateTime":"2025-11-16T19:57:12Z"}, "patch": "{\"status\":{\"unhealthyNodes\":[{\"name\":\"worker-0-0\",\"remediations\":[{\"resource\":{\"apiVersion\":\"machine-deletion-remediation.medik8s.io/v1alpha1\",\"kind\":\"MachineDeletionRemediation\",\"name\":\"worker-0-0\",\"namespace\":\"openshift-workload-availability\",\"uid\":\"eddd81b0-3ac3-4414-ad83-cd7a57fde0ee\"},\"started\":\"2025-11-16T19:56:12Z\",\"templateName\":\"mdr-template\",\"timedOut\":\"2025-11-16T19:57:12Z\"},{\"resource\":{\"apiVersion\":\"self-node-remediation.medik8s.io/v1alpha1\",\"kind\":\"SelfNodeRemediation\",\"name\":\"worker-0-0-7vjqc\",\"namespace\":\"openshift-workload-availability\",\"uid\":\"f6e1f328-d33d-495c-8a88-621db55bc7e9\"},\"started\":\"2025-11-16T19:57:12Z\",\"templateName\":\"selfnoderemediationtemplate-sample\"}]}]}}"} 2025-11-16T19:57:12.642409759Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-mdr-snr", "error": null, "requeue": false, "requeuAfter": "8m20s"} 2025-11-16T19:57:12.642493852Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-mdr-snr"} 2025-11-16T19:57:12.648391236Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-0", "condition type": "Ready", "condition status": "Unknown"} 2025-11-16T19:57:12.654235468Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "PermanentNodeDeletionExpected", "status": "False", "reason": "", "message": "", "lastTransition": "2025-11-16T19:56:12Z"} 2025-11-16T19:57:12.65426107Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "Succeeded", "status": "False", "reason": "", "message": "", "lastTransition": "2025-11-16T19:57:12Z"} 2025-11-16T19:57:12.654270708Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-11-16T19:57:12Z"} 2025-11-16T19:57:12.654276691Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-1"} time="2025-11-16T19:57:12Z" level=info msg="invalidating lease" time="2025-11-16T19:57:12Z" level=info msg="getting lease" 2025-11-16T19:57:12.657017889Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-2"} time="2025-11-16T19:57:12Z" level=info msg="invalidating lease" time="2025-11-16T19:57:12Z" level=info msg="getting lease" 2025-11-16T19:57:12.660450412Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-0"} 2025-11-16T19:57:12.663824208Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-0-7vjqc", "kind": "SelfNodeRemediation", "namespace": "openshift-workload-availability"} time="2025-11-16T19:57:12Z" level=info msg="getting lease" 2025-11-16T19:57:12.663872812Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-mdr-snr", "lease name": "node-worker-0-0", "NHC is lease owner": true, "lease expiration time": "8m20s"} 2025-11-16T19:57:12.663890596Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-11-16T19:57:12Z"} 2025-11-16T19:57:12.667099435Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-mdr-snr", "error": null, "requeue": false, "requeuAfter": "8m19.336112585s"} 2025-11-16T19:57:13.325079647Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-mdr-snr"} 2025-11-16T19:57:13.332338516Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-0", "condition type": "Ready", "condition status": "Unknown"} 2025-11-16T19:57:13.33877848Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "PermanentNodeDeletionExpected", "status": "False", "reason": "", "message": "", "lastTransition": "2025-11-16T19:56:12Z"} 2025-11-16T19:57:13.338805495Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "Succeeded", "status": "False", "reason": "", "message": "", "lastTransition": "2025-11-16T19:57:12Z"} 2025-11-16T19:57:13.338814326Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-11-16T19:57:12Z"} 2025-11-16T19:57:13.338821449Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-1"} time="2025-11-16T19:57:13Z" level=info msg="invalidating lease" time="2025-11-16T19:57:13Z" level=info msg="getting lease" 2025-11-16T19:57:13.341878665Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-2"} time="2025-11-16T19:57:13Z" level=info msg="invalidating lease" time="2025-11-16T19:57:13Z" level=info msg="getting lease" 2025-11-16T19:57:13.346257475Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-0"} 2025-11-16T19:57:13.394159457Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","controller":false}} 2025-11-16T19:57:13.394193359Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-mdr-snr", "Remediation CR Name": "worker-0-0-7vjqc", "Remediation CR Kind": "SelfNodeRemediation"} 2025-11-16T19:57:13.394188752Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-0-7vjqc", "kind": "SelfNodeRemediation", "namespace": "openshift-workload-availability"} 2025-11-16T19:57:13.394204107Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","controller":false}} 2025-11-16T19:57:13.394218437Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-mdr-snr", "Remediation CR Name": "worker-0-0-7vjqc", "Remediation CR Kind": "SelfNodeRemediation"} time="2025-11-16T19:57:13Z" level=info msg="getting lease" 2025-11-16T19:57:13.394275376Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-mdr-snr", "lease name": "node-worker-0-0", "NHC is lease owner": true, "lease expiration time": "8m20s"} 2025-11-16T19:57:13.394308059Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-11-16T19:57:12Z"} 2025-11-16T19:57:13.446512023Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-mdr-snr", "error": null, "requeue": false, "requeuAfter": "8m18.605696989s"} 2025-11-16T19:57:13.446606483Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-mdr-snr"} 2025-11-16T19:57:13.458370327Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-0", "condition type": "Ready", "condition status": "Unknown"} 2025-11-16T19:57:13.472661368Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "PermanentNodeDeletionExpected", "status": "False", "reason": "", "message": "", "lastTransition": "2025-11-16T19:56:12Z"} 2025-11-16T19:57:13.472691287Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "Succeeded", "status": "False", "reason": "", "message": "", "lastTransition": "2025-11-16T19:57:12Z"} 2025-11-16T19:57:13.472701634Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-11-16T19:57:12Z"} 2025-11-16T19:57:13.472707178Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-1"} time="2025-11-16T19:57:13Z" level=info msg="invalidating lease" time="2025-11-16T19:57:13Z" level=info msg="getting lease" 2025-11-16T19:57:13.478417308Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-2"} time="2025-11-16T19:57:13Z" level=info msg="invalidating lease" time="2025-11-16T19:57:13Z" level=info msg="getting lease" 2025-11-16T19:57:13.486593802Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-0"} 2025-11-16T19:57:13.499026518Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-0-7vjqc", "kind": "SelfNodeRemediation", "namespace": "openshift-workload-availability"} time="2025-11-16T19:57:13Z" level=info msg="getting lease" 2025-11-16T19:57:13.499135998Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-mdr-snr", "lease name": "node-worker-0-0", "NHC is lease owner": true, "lease expiration time": "8m20s"} 2025-11-16T19:57:13.499168519Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-11-16T19:57:12Z"} 2025-11-16T19:57:13.511202527Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-mdr-snr", "error": null, "requeue": false, "requeuAfter": "8m18.500837462s"} 2025-11-16T19:59:14.010494868Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","controller":false}} 2025-11-16T19:59:14.010555737Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-mdr-snr", "Remediation CR Name": "worker-0-0-7vjqc", "Remediation CR Kind": "SelfNodeRemediation"} 2025-11-16T19:59:14.010570295Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","controller":false}} 2025-11-16T19:59:14.010574476Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-mdr-snr", "Remediation CR Name": "worker-0-0-7vjqc", "Remediation CR Kind": "SelfNodeRemediation"} 2025-11-16T19:59:14.010631571Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-mdr-snr"} 2025-11-16T19:59:14.016435652Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-0", "condition type": "Ready", "condition status": "Unknown"} 2025-11-16T19:59:14.027785466Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "PermanentNodeDeletionExpected", "status": "False", "reason": "", "message": "", "lastTransition": "2025-11-16T19:56:12Z"} 2025-11-16T19:59:14.027824133Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "Succeeded", "status": "False", "reason": "", "message": "", "lastTransition": "2025-11-16T19:57:12Z"} 2025-11-16T19:59:14.02783964Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-11-16T19:57:12Z"} 2025-11-16T19:59:14.027846015Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-1"} time="2025-11-16T19:59:14Z" level=info msg="invalidating lease" time="2025-11-16T19:59:14Z" level=info msg="getting lease" 2025-11-16T19:59:14.128697476Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-2"} time="2025-11-16T19:59:14Z" level=info msg="invalidating lease" time="2025-11-16T19:59:14Z" level=info msg="getting lease" 2025-11-16T19:59:14.132494641Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-0"} 2025-11-16T19:59:14.136185867Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-0-7vjqc", "kind": "SelfNodeRemediation", "namespace": "openshift-workload-availability"} time="2025-11-16T19:59:14Z" level=info msg="getting lease" 2025-11-16T19:59:14.136243708Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-mdr-snr", "lease name": "node-worker-0-0", "NHC is lease owner": true, "lease expiration time": "8m20s"} time="2025-11-16T19:59:14Z" level=info msg="request lease" time="2025-11-16T19:59:14Z" level=info msg="getting lease" time="2025-11-16T19:59:14Z" level=info msg="renew lease owned by NodeHealthCheck-nhc-mdr-snr setAcquireTime=false" 2025-11-16T19:59:14.13911114Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-11-16T19:57:12Z"} 2025-11-16T19:59:14.229655891Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-mdr-snr", "error": null, "requeue": false, "requeuAfter": "6m17.860896038s"} 2025-11-16T19:59:24.06698411Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","controller":false}} 2025-11-16T19:59:24.06710921Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-mdr-snr", "Remediation CR Name": "worker-0-0-7vjqc", "Remediation CR Kind": "SelfNodeRemediation"} 2025-11-16T19:59:24.067148612Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","controller":false}} 2025-11-16T19:59:24.067162313Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-mdr-snr", "Remediation CR Name": "worker-0-0-7vjqc", "Remediation CR Kind": "SelfNodeRemediation"} 2025-11-16T19:59:24.067229059Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-mdr-snr"} 2025-11-16T19:59:24.07586457Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-0", "condition type": "Ready", "condition status": "Unknown"} 2025-11-16T19:59:24.082255679Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "PermanentNodeDeletionExpected", "status": "False", "reason": "", "message": "", "lastTransition": "2025-11-16T19:56:12Z"} 2025-11-16T19:59:24.082288516Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "Succeeded", "status": "False", "reason": "", "message": "", "lastTransition": "2025-11-16T19:57:12Z"} 2025-11-16T19:59:24.082299207Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-11-16T19:59:24Z"} 2025-11-16T19:59:24.082307145Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-1"} time="2025-11-16T19:59:24Z" level=info msg="invalidating lease" time="2025-11-16T19:59:24Z" level=info msg="getting lease" 2025-11-16T19:59:24.085533311Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-2"} time="2025-11-16T19:59:24Z" level=info msg="invalidating lease" time="2025-11-16T19:59:24Z" level=info msg="getting lease" 2025-11-16T19:59:24.089085253Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-0"} 2025-11-16T19:59:24.093190322Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-0-7vjqc", "kind": "SelfNodeRemediation", "namespace": "openshift-workload-availability"} time="2025-11-16T19:59:24Z" level=info msg="getting lease" 2025-11-16T19:59:24.093252291Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-mdr-snr", "lease name": "node-worker-0-0", "NHC is lease owner": true, "lease expiration time": "8m20s"} 2025-11-16T19:59:24.093273746Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-11-16T19:59:24Z"} 2025-11-16T19:59:24.097544986Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-mdr-snr", "error": null, "requeue": false, "requeuAfter": "6m7.906730243s"} 2025-11-16T20:05:32.005236078Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-mdr-snr"} 2025-11-16T20:05:32.011372544Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-0", "condition type": "Ready", "condition status": "Unknown"} 2025-11-16T20:05:32.017113828Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "PermanentNodeDeletionExpected", "status": "False", "reason": "", "message": "", "lastTransition": "2025-11-16T19:56:12Z"} 2025-11-16T20:05:32.017142482Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "Succeeded", "status": "False", "reason": "", "message": "", "lastTransition": "2025-11-16T19:57:12Z"} 2025-11-16T20:05:32.017152674Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-11-16T19:59:24Z"} 2025-11-16T20:05:32.017158576Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-1"} time="2025-11-16T20:05:32Z" level=info msg="invalidating lease" time="2025-11-16T20:05:32Z" level=info msg="getting lease" 2025-11-16T20:05:32.020367505Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-2"} time="2025-11-16T20:05:32Z" level=info msg="invalidating lease" time="2025-11-16T20:05:32Z" level=info msg="getting lease" 2025-11-16T20:05:32.023517557Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-0"} 2025-11-16T20:05:32.026529092Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-0-7vjqc", "kind": "SelfNodeRemediation", "namespace": "openshift-workload-availability"} time="2025-11-16T20:05:32Z" level=info msg="getting lease" 2025-11-16T20:05:32.026580428Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-mdr-snr", "lease name": "node-worker-0-0", "NHC is lease owner": true, "lease expiration time": "8m20s"} time="2025-11-16T20:05:32Z" level=info msg="request lease" time="2025-11-16T20:05:32Z" level=info msg="getting lease" time="2025-11-16T20:05:32Z" level=info msg="renew lease owned by NodeHealthCheck-nhc-mdr-snr setAcquireTime=false" 2025-11-16T20:05:32.029718282Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-11-16T19:59:24Z"} 2025-11-16T20:05:32.029736099Z INFO controllers.NodeHealthCheck remediation timed out {"NodeHealthCheck name": "nhc-mdr-snr"} 2025-11-16T20:05:32.029740141Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-11-16T19:59:24Z"} 2025-11-16T20:05:32.02974384Z INFO controllers.NodeHealthCheck skipping timeout annotation on remediation CR: Succeeded condition is True {"NodeHealthCheck name": "nhc-mdr-snr", "CR name": "worker-0-0-7vjqc"} 2025-11-16T20:05:32.11904894Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-mdr-snr", "new status": {"observedNodes":3,"healthyNodes":2,"unhealthyNodes":[{"name":"worker-0-0","remediations":[{"resource":{"kind":"MachineDeletionRemediation","namespace":"openshift-workload-availability","name":"worker-0-0","uid":"eddd81b0-3ac3-4414-ad83-cd7a57fde0ee","apiVersion":"machine-deletion-remediation.medik8s.io/v1alpha1"},"started":"2025-11-16T19:56:12Z","timedOut":"2025-11-16T19:57:12Z","templateName":"mdr-template"},{"resource":{"kind":"SelfNodeRemediation","namespace":"openshift-workload-availability","name":"worker-0-0-7vjqc","uid":"f6e1f328-d33d-495c-8a88-621db55bc7e9","apiVersion":"self-node-remediation.medik8s.io/v1alpha1"},"started":"2025-11-16T19:57:12Z","timedOut":"2025-11-16T20:05:32Z","templateName":"selfnoderemediationtemplate-sample"}]}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-11-16T06:25:20Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Remediating","reason":"NHC is remediating 1 nodes","lastUpdateTime":"2025-11-16T19:57:12Z"}, "patch": "{\"status\":{\"unhealthyNodes\":[{\"name\":\"worker-0-0\",\"remediations\":[{\"resource\":{\"apiVersion\":\"machine-deletion-remediation.medik8s.io/v1alpha1\",\"kind\":\"MachineDeletionRemediation\",\"name\":\"worker-0-0\",\"namespace\":\"openshift-workload-availability\",\"uid\":\"eddd81b0-3ac3-4414-ad83-cd7a57fde0ee\"},\"started\":\"2025-11-16T19:56:12Z\",\"templateName\":\"mdr-template\",\"timedOut\":\"2025-11-16T19:57:12Z\"},{\"resource\":{\"apiVersion\":\"self-node-remediation.medik8s.io/v1alpha1\",\"kind\":\"SelfNodeRemediation\",\"name\":\"worker-0-0-7vjqc\",\"namespace\":\"openshift-workload-availability\",\"uid\":\"f6e1f328-d33d-495c-8a88-621db55bc7e9\"},\"started\":\"2025-11-16T19:57:12Z\",\"templateName\":\"selfnoderemediationtemplate-sample\",\"timedOut\":\"2025-11-16T20:05:32Z\"}]}]}}"} 2025-11-16T20:05:32.324998324Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-mdr-snr", "error": null, "requeue": false, "requeuAfter": "1s"} 2025-11-16T20:05:33.326127601Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-mdr-snr"} 2025-11-16T20:05:33.332687681Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-0", "condition type": "Ready", "condition status": "Unknown"} 2025-11-16T20:05:33.338307323Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "PermanentNodeDeletionExpected", "status": "False", "reason": "", "message": "", "lastTransition": "2025-11-16T19:56:12Z"} 2025-11-16T20:05:33.338331459Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "Succeeded", "status": "False", "reason": "", "message": "", "lastTransition": "2025-11-16T19:57:12Z"} 2025-11-16T20:05:33.3383406Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-mdr-snr", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-11-16T19:59:24Z"} 2025-11-16T20:05:33.338345613Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-1"} time="2025-11-16T20:05:33Z" level=info msg="invalidating lease" time="2025-11-16T20:05:33Z" level=info msg="getting lease" 2025-11-16T20:05:33.341755705Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-2"} time="2025-11-16T20:05:33Z" level=info msg="invalidating lease" time="2025-11-16T20:05:33Z" level=info msg="getting lease" 2025-11-16T20:05:33.345064412Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-0"} 2025-11-16T20:05:33.345080178Z ERROR controllers.NodeHealthCheck Remediation timed out, and no template left to try {"NodeHealthCheck name": "nhc-mdr-snr", "error": "didn't find a template to use for NHC nhc-mdr-snr and node worker-0-0"} github.com/medik8s/node-healthcheck-operator/controllers.(*NodeHealthCheckReconciler).remediate /app/node-healthcheck-operator/controllers/nodehealthcheck_controller.go:536 github.com/medik8s/node-healthcheck-operator/controllers.(*NodeHealthCheckReconciler).Reconcile /app/node-healthcheck-operator/controllers/nodehealthcheck_controller.go:365 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Reconcile /app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:119 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler /app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:334 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem /app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:294 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.2 /app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:255 2025-11-16T20:05:33.345209106Z DEBUG events [remediation] Remediation timed out, and no template left to try. didn't find a template to use for NHC nhc-mdr-snr and node worker-0-0 {"type": "Warning", "object": {"kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"5cc37621-86e0-4a9a-8751-641e5567058f","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"764528"}, "reason": "NoTemplateLeft"} 2025-11-16T20:05:33.348228161Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-mdr-snr", "error": null, "requeue": false, "requeuAfter": "47h51m38.651983473s"} SNR logs: [root@cert-rhosp-02 ~]# oc logs pod/self-node-remediation-controller-manager-77c688fb7b-s8rvc Defaulted container "manager" out of: manager, kube-rbac-proxy 2025-11-16T06:18:45.103251894Z INFO setup Go Version: go1.24.4 (Red Hat 1.24.4-2.el9) X:strictfipsruntime 2025-11-16T06:18:45.103352027Z INFO setup Go OS/Arch: linux/amd64 2025-11-16T06:18:45.103355181Z INFO setup Operator Version: d80706e 2025-11-16T06:18:45.103357084Z INFO setup Git Commit: d80706e1c305b9b65e00dee1b7cb653dcb1705e7 2025-11-16T06:18:45.103358941Z INFO setup Build Date: 2025-11-03T07:49:14+00:00 2025-11-16T06:18:45.103360609Z INFO setup HTTP/2 for metrics and webhook server disabled 2025-11-16T06:18:45.110448051Z INFO utils-taints out of service taint strategy {"isSupported": true, "k8sMajorVersion": 1, "k8sMinorVersion": 33} 2025-11-16T06:18:45.110481034Z INFO utils-taints out of service taint strategy {"isGA": true, "k8sMajorVersion": 1, "k8sMinorVersion": 33} 2025-11-16T06:18:45.110487537Z INFO setup Starting as a manager that installs the daemonset 2025-11-16T06:18:45.110495008Z INFO controller-runtime.builder skip registering a mutating webhook, object does not implement admission.Defaulter or WithDefaulter wasn't called {"GVK": "self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediationConfig"} 2025-11-16T06:18:45.1105308Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediationConfig", "path": "/validate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediationconfig"} 2025-11-16T06:18:45.110610335Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediationconfig"} 2025-11-16T06:18:45.110654036Z INFO controller-runtime.builder Registering a mutating webhook {"GVK": "self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediationTemplate", "path": "/mutate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediationtemplate"} 2025-11-16T06:18:45.110683302Z INFO controller-runtime.webhook Registering webhook {"path": "/mutate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediationtemplate"} 2025-11-16T06:18:45.110701535Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediationTemplate", "path": "/validate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediationtemplate"} 2025-11-16T06:18:45.110722188Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediationtemplate"} 2025-11-16T06:18:45.110743552Z INFO controller-runtime.builder skip registering a mutating webhook, object does not implement admission.Defaulter or WithDefaulter wasn't called {"GVK": "self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediation"} 2025-11-16T06:18:45.11076013Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediation", "path": "/validate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediation"} 2025-11-16T06:18:45.110805395Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediation"} 2025-11-16T06:18:45.110876162Z INFO setup starting manager 2025-11-16T06:18:45.111046532Z INFO starting server {"name": "health probe", "addr": "[::]:8081"} 2025-11-16T06:18:45.111014855Z INFO controller-runtime.metrics Starting metrics server 2025-11-16T06:18:45.111150844Z INFO controller-runtime.webhook Starting webhook server 2025-11-16T06:18:45.111185151Z INFO controller-runtime.metrics Serving metrics server {"bindAddress": "127.0.0.1:8080", "secure": false} I1116 06:18:45.111304 1 leaderelection.go:257] attempting to acquire leader lease openshift-workload-availability/547f6cb6.medik8s.io... 2025-11-16T06:18:45.111391444Z INFO controller-runtime.certwatcher Updated current TLS certificate 2025-11-16T06:18:45.111464388Z INFO controller-runtime.webhook Serving webhook server {"host": "", "port": 9443} 2025-11-16T06:18:45.11176127Z INFO controller-runtime.certwatcher Starting certificate poll+watcher {"interval": "10s"} 2025-11-16T06:18:49.465550902Z DEBUG controller-runtime.certwatcher certificate event {"event": "CHMOD \"/apiserver.local.config/certificates/apiserver.crt\""} 2025-11-16T06:18:49.465784569Z DEBUG controller-runtime.certwatcher certificate event {"event": "CHMOD \"/apiserver.local.config/certificates/apiserver.key\""} 2025-11-16T06:18:54.883951978Z INFO selfnoderemediationtemplate-resource default {"name": "self-node-remediation-automatic-strategy-template"} 2025-11-16T06:18:54.887819661Z INFO selfnoderemediationtemplate-resource validate create {"name": "self-node-remediation-automatic-strategy-template"} 2025-11-16T06:19:10.976696564Z INFO selfnoderemediationconfig-resource validate create {"name": "self-node-remediation-config"} 2025-11-16T06:19:20.97211863Z INFO selfnoderemediationtemplate-resource default {"name": "self-node-remediation-automatic-strategy-template"} 2025-11-16T06:19:20.975592508Z INFO selfnoderemediationtemplate-resource validate create {"name": "self-node-remediation-automatic-strategy-template"} 2025-11-16T06:25:20.62375939Z INFO selfnoderemediationtemplate-resource validate create {"name": "selfnoderemediationtemplate-sample"} 2025-11-16T11:04:26.647662238Z INFO selfnoderemediation-resource validate create {"name": "worker-0-0-s79k4"} 2025-11-16T11:04:26.856350706Z INFO selfnoderemediation-resource validate update {"name": "worker-0-0-s79k4"} 2025-11-16T11:05:26.240001562Z INFO selfnoderemediation-resource validate update {"name": "worker-0-0-s79k4"} I1116 19:47:06.304561 1 leaderelection.go:271] successfully acquired lease openshift-workload-availability/547f6cb6.medik8s.io 2025-11-16T19:47:06.304860003Z INFO Starting EventSource {"controller": "selfnoderemediation", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediation", "source": "kind source: *v1alpha1.SelfNodeRemediation"} 2025-11-16T19:47:06.304936292Z INFO Starting Controller {"controller": "selfnoderemediation", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediation"} 2025-11-16T19:47:06.304982973Z INFO Starting EventSource {"controller": "selfnoderemediationconfig", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediationConfig", "source": "kind source: *v1alpha1.SelfNodeRemediationConfig"} 2025-11-16T19:47:06.305018142Z INFO Starting EventSource {"controller": "selfnoderemediationconfig", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediationConfig", "source": "kind source: *v1.DaemonSet"} 2025-11-16T19:47:06.30502617Z INFO Starting Controller {"controller": "selfnoderemediationconfig", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediationConfig"} 2025-11-16T19:47:06.304615605Z DEBUG events self-node-remediation-controller-manager-77c688fb7b-s8rvc_b4f48dec-1130-48ef-9246-a3cd51794fd7 became leader {"type": "Normal", "object": {"kind":"Lease","namespace":"openshift-workload-availability","name":"547f6cb6.medik8s.io","uid":"f5705f4d-03f2-49b0-9b85-e664cb4492df","apiVersion":"coordination.k8s.io/v1","resourceVersion":"757281"}, "reason": "LeaderElection"} 2025-11-16T19:47:06.406748269Z INFO Starting workers {"controller": "selfnoderemediation", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediation", "worker count": 1} 2025-11-16T19:47:06.407921642Z INFO Starting workers {"controller": "selfnoderemediationconfig", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediationConfig", "worker count": 1} 2025-11-16T19:47:06.408177197Z INFO controllers.SelfNodeRemediationConfig Syncing certs 2025-11-16T19:47:06.51131469Z INFO controllers.SelfNodeRemediationConfig Cert secret already exists 2025-11-16T19:47:06.511390659Z INFO controllers.SelfNodeRemediationConfig.syncConfigDaemonset Start to sync config daemonset 2025-11-16T19:47:06.512648814Z INFO controllers.SelfNodeRemediationConfig Updating DS tolerations 2025/11/16 19:47:06 reconciling (apps/v1, Kind=DaemonSet) openshift-workload-availability/self-node-remediation-ds 2025/11/16 19:47:06 update was successful 2025-11-16T19:47:06.520771508Z INFO controllers.SelfNodeRemediationConfig Syncing certs 2025-11-16T19:47:06.520800391Z INFO controllers.SelfNodeRemediationConfig Cert secret already exists 2025-11-16T19:47:06.52080527Z INFO controllers.SelfNodeRemediationConfig.syncConfigDaemonset Start to sync config daemonset 2025-11-16T19:47:06.52141297Z INFO controllers.SelfNodeRemediationConfig Updating DS tolerations 2025/11/16 19:47:06 reconciling (apps/v1, Kind=DaemonSet) openshift-workload-availability/self-node-remediation-ds 2025/11/16 19:47:06 update was successful 2025-11-16T19:49:32.447361598Z INFO selfnoderemediation-resource validate create {"name": "worker-0-0-vlthx"} 2025-11-16T19:49:32.55214579Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-vlthx","namespace":"openshift-workload-availability"}} 2025-11-16T19:49:32.552182054Z INFO controllers.SelfNodeRemediation pre-reboot not completed yet, prepare for rebooting {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-vlthx","namespace":"openshift-workload-availability"}} 2025-11-16T19:49:32.552330096Z DEBUG events [remediation] Remediation started by SNR manager {"type": "Normal", "object": {"kind":"SelfNodeRemediation","namespace":"openshift-workload-availability","name":"worker-0-0-vlthx","uid":"f1cceb3d-2d42-41d9-a619-2986f225e423","apiVersion":"self-node-remediation.medik8s.io/v1alpha1","resourceVersion":"758586"}, "reason": "RemediationStarted"} 2025-11-16T19:49:32.655590811Z INFO selfnoderemediation-resource validate update {"name": "worker-0-0-vlthx"} 2025-11-16T19:49:32.657769143Z INFO controllers.SelfNodeRemediation finalizer added {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-vlthx","namespace":"openshift-workload-availability"}} 2025-11-16T19:49:32.657955052Z DEBUG events [remediation] Remediation process - successful adding finalizer {"type": "Normal", "object": {"kind":"SelfNodeRemediation","namespace":"openshift-workload-availability","name":"worker-0-0-vlthx","uid":"f1cceb3d-2d42-41d9-a619-2986f225e423","apiVersion":"self-node-remediation.medik8s.io/v1alpha1","resourceVersion":"758592"}, "reason": "AddFinalizer"} 2025-11-16T19:49:32.662036332Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-vlthx","namespace":"openshift-workload-availability"}} 2025-11-16T19:49:32.6620483Z INFO controllers.SelfNodeRemediation pre-reboot not completed yet, prepare for rebooting {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-vlthx","namespace":"openshift-workload-availability"}} 2025-11-16T19:49:32.670018091Z INFO controllers.SelfNodeRemediation NoExecute taint added {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-vlthx","namespace":"openshift-workload-availability"}, "new taints": [{"key":"node.kubernetes.io/unreachable","effect":"NoSchedule","timeAdded":"2025-11-16T19:49:01Z"},{"key":"node.kubernetes.io/unreachable","effect":"NoExecute","timeAdded":"2025-11-16T19:49:06Z"},{"key":"medik8s.io/remediation","value":"self-node-remediation","effect":"NoExecute","timeAdded":"2025-11-16T19:49:32Z"}]} 2025-11-16T19:49:32.670207818Z INFO controllers.SelfNodeRemediation Marking node as unschedulable {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-vlthx","namespace":"openshift-workload-availability"}, "node name": "worker-0-0"} 2025-11-16T19:49:32.67043265Z DEBUG events [remediation] Remediation process - NoExecute taint added to the unhealthy node {"type": "Normal", "object": {"kind":"Node","name":"worker-0-0","uid":"81d10fd3-71fd-47f7-b1d2-f9cbfde06a54","apiVersion":"v1","resourceVersion":"758595"}, "reason": "AddNoExecute"} 2025-11-16T19:49:32.680589382Z DEBUG events [remediation] Remediation process - unhealthy node marked as unschedulable {"type": "Normal", "object": {"kind":"Node","name":"worker-0-0","uid":"81d10fd3-71fd-47f7-b1d2-f9cbfde06a54","apiVersion":"v1","resourceVersion":"758598"}, "reason": "MarkUnschedulable"} 2025-11-16T19:49:32.687781833Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-vlthx","namespace":"openshift-workload-availability"}} 2025-11-16T19:49:32.687910183Z INFO controllers.SelfNodeRemediation pre-reboot not completed yet, prepare for rebooting {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-vlthx","namespace":"openshift-workload-availability"}} 2025-11-16T19:49:32.688158428Z INFO controllers.SelfNodeRemediation waiting for unschedulable taint to appear {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-vlthx","namespace":"openshift-workload-availability"}, "node name": "worker-0-0"} 2025-11-16T19:49:32.697350935Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-vlthx","namespace":"openshift-workload-availability"}} 2025-11-16T19:49:32.697372036Z INFO controllers.SelfNodeRemediation pre-reboot not completed yet, prepare for rebooting {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-vlthx","namespace":"openshift-workload-availability"}} 2025-11-16T19:49:32.697706298Z INFO rebootDurationCalculator No SafeTimeToAssumeNodeRebootedSeconds specified, using calculated minimum safe reboot time {"calculated minimum time in seconds": 120} 2025-11-16T19:49:32.697758096Z INFO controllers.SelfNodeRemediation setting SNR's time to assume node has been rebooted {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-vlthx","namespace":"openshift-workload-availability"}, "node name": "worker-0-0", "time": "2025-11-16 19:51:32.697757108 +0000 UTC m=+48767.607810919"} 2025-11-16T19:49:32.69792939Z DEBUG events [remediation] Remediation process - about to update required fencing time on snr {"type": "Normal", "object": {"kind":"SelfNodeRemediation","namespace":"openshift-workload-availability","name":"worker-0-0-vlthx","uid":"f1cceb3d-2d42-41d9-a619-2986f225e423","apiVersion":"self-node-remediation.medik8s.io/v1alpha1","resourceVersion":"758611"}, "reason": "UpdateTimeAssumedRebooted"} 2025-11-16T19:49:32.706108034Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-vlthx","namespace":"openshift-workload-availability"}} 2025-11-16T19:49:32.706125784Z INFO controllers.SelfNodeRemediation Node didn't reboot yet, waiting for it to reboot {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-vlthx","namespace":"openshift-workload-availability"}, "node name": "worker-0-0", "time left": "2m0.293875162s"} 2025-11-16T19:49:33.688606184Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-vlthx","namespace":"openshift-workload-availability"}} 2025-11-16T19:49:33.688630227Z INFO controllers.SelfNodeRemediation Node didn't reboot yet, waiting for it to reboot {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-vlthx","namespace":"openshift-workload-availability"}, "node name": "worker-0-0", "time left": "1m59.311370382s"} 2025-11-16T19:51:33.005180777Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-vlthx","namespace":"openshift-workload-availability"}} 2025-11-16T19:51:33.005207364Z INFO controllers.SelfNodeRemediation TimeAssumedRebooted is old. The unhealthy node assumed to been rebooted {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-vlthx","namespace":"openshift-workload-availability"}, "node name": "worker-0-0"} 2025-11-16T19:51:33.010588942Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-vlthx","namespace":"openshift-workload-availability"}} 2025-11-16T19:51:33.017410795Z INFO controllers.SelfNodeRemediation out-of-service taint added {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-vlthx","namespace":"openshift-workload-availability"}, "new taints": [{"key":"node.kubernetes.io/unreachable","effect":"NoSchedule","timeAdded":"2025-11-16T19:49:01Z"},{"key":"node.kubernetes.io/unreachable","effect":"NoExecute","timeAdded":"2025-11-16T19:49:06Z"},{"key":"medik8s.io/remediation","value":"self-node-remediation","effect":"NoExecute","timeAdded":"2025-11-16T19:49:32Z"},{"key":"node.kubernetes.io/unschedulable","effect":"NoSchedule","timeAdded":"2025-11-16T19:49:32Z"},{"key":"node.kubernetes.io/out-of-service","value":"nodeshutdown","effect":"NoExecute","timeAdded":"2025-11-16T19:51:33Z"}]} 2025-11-16T19:51:33.01747796Z DEBUG events [remediation] Remediation process - add out-of-service taint to unhealthy node {"type": "Normal", "object": {"kind":"Node","name":"worker-0-0","uid":"81d10fd3-71fd-47f7-b1d2-f9cbfde06a54","apiVersion":"v1","resourceVersion":"759299"}, "reason": "AddOutOfService"} 2025-11-16T19:51:33.023695687Z INFO controllers.SelfNodeRemediation waiting for terminating pod {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-vlthx","namespace":"openshift-workload-availability"}, "pod name": "ingress-canary-tmq9k", "phase": "Running"} 2025-11-16T19:51:38.029756158Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-vlthx","namespace":"openshift-workload-availability"}} 2025-11-16T19:51:38.040640896Z INFO controllers.SelfNodeRemediation waiting for terminating pod {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-vlthx","namespace":"openshift-workload-availability"}, "pod name": "ingress-canary-tmq9k", "phase": "Running"} 2025-11-16T19:51:43.047257462Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-vlthx","namespace":"openshift-workload-availability"}} 2025-11-16T19:51:43.162293285Z INFO controllers.SelfNodeRemediation out-of-service taint removed {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-vlthx","namespace":"openshift-workload-availability"}, "new taints": [{"key":"node.kubernetes.io/unreachable","effect":"NoSchedule","timeAdded":"2025-11-16T19:49:01Z"},{"key":"node.kubernetes.io/unreachable","effect":"NoExecute","timeAdded":"2025-11-16T19:49:06Z"},{"key":"medik8s.io/remediation","value":"self-node-remediation","effect":"NoExecute","timeAdded":"2025-11-16T19:49:32Z"},{"key":"node.kubernetes.io/unschedulable","effect":"NoSchedule","timeAdded":"2025-11-16T19:49:32Z"}]} 2025-11-16T19:51:43.162350037Z DEBUG events [remediation] Remediation process - remove out-of-service taint from node {"type": "Normal", "object": {"kind":"Node","name":"worker-0-0","uid":"81d10fd3-71fd-47f7-b1d2-f9cbfde06a54","apiVersion":"v1","resourceVersion":"759430"}, "reason": "RemoveOutOfService"} 2025-11-16T19:51:43.162554633Z DEBUG events [remediation] Remediation process - finished deleting unhealthy node resources {"type": "Normal", "object": {"kind":"Node","name":"worker-0-0","uid":"81d10fd3-71fd-47f7-b1d2-f9cbfde06a54","apiVersion":"v1","resourceVersion":"759430"}, "reason": "DeleteResources"} 2025-11-16T19:51:43.170354941Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-vlthx","namespace":"openshift-workload-availability"}} 2025-11-16T19:53:08.713797052Z INFO selfnoderemediation-resource validate update {"name": "worker-0-0-vlthx"} 2025-11-16T19:53:08.717175922Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-vlthx","namespace":"openshift-workload-availability"}} 2025-11-16T19:53:14.142446258Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-vlthx","namespace":"openshift-workload-availability"}} 2025-11-16T19:53:14.142526787Z INFO controllers.SelfNodeRemediation fencing completed, cleaning up {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-vlthx","namespace":"openshift-workload-availability"}} 2025-11-16T19:53:14.1488533Z DEBUG events [remediation] Remediation process - mark healthy remediated node as schedulable {"type": "Normal", "object": {"kind":"Node","name":"worker-0-0","uid":"81d10fd3-71fd-47f7-b1d2-f9cbfde06a54","apiVersion":"v1","resourceVersion":"760058"}, "reason": "MarkNodeSchedulable"} 2025-11-16T19:53:15.157230455Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-vlthx","namespace":"openshift-workload-availability"}} 2025-11-16T19:53:15.157252898Z INFO controllers.SelfNodeRemediation fencing completed, cleaning up {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-vlthx","namespace":"openshift-workload-availability"}} 2025-11-16T19:53:15.165972178Z INFO controllers.SelfNodeRemediation NoExecute taint removed {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-vlthx","namespace":"openshift-workload-availability"}, "new taints": null} 2025-11-16T19:53:15.166180892Z DEBUG events [remediation] Remediation process - remove NoExecute taint from healthy remediated node {"type": "Normal", "object": {"kind":"Node","name":"worker-0-0","uid":"81d10fd3-71fd-47f7-b1d2-f9cbfde06a54","apiVersion":"v1","resourceVersion":"760197"}, "reason": "RemoveNoExecuteTaint"} 2025-11-16T19:53:15.169764047Z INFO selfnoderemediation-resource validate update {"name": "worker-0-0-vlthx"} 2025-11-16T19:53:15.174252555Z INFO controllers.SelfNodeRemediation finalizer removed {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-vlthx","namespace":"openshift-workload-availability"}} 2025-11-16T19:53:15.174527275Z DEBUG events [remediation] Remediation process - remove finalizer from snr {"type": "Normal", "object": {"kind":"SelfNodeRemediation","namespace":"openshift-workload-availability","name":"worker-0-0-vlthx","uid":"f1cceb3d-2d42-41d9-a619-2986f225e423","apiVersion":"self-node-remediation.medik8s.io/v1alpha1","resourceVersion":"760056"}, "reason": "RemoveFinalizer"} 2025-11-16T19:53:15.174594446Z DEBUG events [remediation] Remediation finished {"type": "Normal", "object": {"kind":"SelfNodeRemediation","namespace":"openshift-workload-availability","name":"worker-0-0-vlthx","uid":"f1cceb3d-2d42-41d9-a619-2986f225e423","apiVersion":"self-node-remediation.medik8s.io/v1alpha1","resourceVersion":"760056"}, "reason": "RemediationFinished"} 2025-11-16T19:53:15.177673789Z INFO controllers.SelfNodeRemediation SNR already deleted {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-vlthx","namespace":"openshift-workload-availability"}} 2025-11-16T19:53:16.178671115Z INFO controllers.SelfNodeRemediation SNR already deleted {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-vlthx","namespace":"openshift-workload-availability"}} 2025-11-16T19:57:12.353485302Z INFO selfnoderemediation-resource validate create {"name": "worker-0-0-7vjqc"} 2025-11-16T19:57:12.356374841Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7vjqc","namespace":"openshift-workload-availability"}} 2025-11-16T19:57:12.35639642Z INFO controllers.SelfNodeRemediation pre-reboot not completed yet, prepare for rebooting {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7vjqc","namespace":"openshift-workload-availability"}} 2025-11-16T19:57:12.356451264Z DEBUG events [remediation] Remediation started by SNR manager {"type": "Normal", "object": {"kind":"SelfNodeRemediation","namespace":"openshift-workload-availability","name":"worker-0-0-7vjqc","uid":"f6e1f328-d33d-495c-8a88-621db55bc7e9","apiVersion":"self-node-remediation.medik8s.io/v1alpha1","resourceVersion":"761778"}, "reason": "RemediationStarted"} 2025-11-16T19:57:12.359154436Z INFO selfnoderemediation-resource validate update {"name": "worker-0-0-7vjqc"} 2025-11-16T19:57:12.361120723Z INFO controllers.SelfNodeRemediation finalizer added {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7vjqc","namespace":"openshift-workload-availability"}} 2025-11-16T19:57:12.361254589Z DEBUG events [remediation] Remediation process - successful adding finalizer {"type": "Normal", "object": {"kind":"SelfNodeRemediation","namespace":"openshift-workload-availability","name":"worker-0-0-7vjqc","uid":"f6e1f328-d33d-495c-8a88-621db55bc7e9","apiVersion":"self-node-remediation.medik8s.io/v1alpha1","resourceVersion":"761781"}, "reason": "AddFinalizer"} 2025-11-16T19:57:12.365070476Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7vjqc","namespace":"openshift-workload-availability"}} 2025-11-16T19:57:12.365083018Z INFO controllers.SelfNodeRemediation pre-reboot not completed yet, prepare for rebooting {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7vjqc","namespace":"openshift-workload-availability"}} 2025-11-16T19:57:12.373274754Z INFO controllers.SelfNodeRemediation NoExecute taint added {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7vjqc","namespace":"openshift-workload-availability"}, "new taints": [{"key":"node.kubernetes.io/unreachable","effect":"NoSchedule","timeAdded":"2025-11-16T19:55:41Z"},{"key":"node.kubernetes.io/unreachable","effect":"NoExecute","timeAdded":"2025-11-16T19:55:47Z"},{"key":"medik8s.io/remediation","value":"self-node-remediation","effect":"NoExecute","timeAdded":"2025-11-16T19:57:12Z"}]} 2025-11-16T19:57:12.373366055Z INFO controllers.SelfNodeRemediation Marking node as unschedulable {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7vjqc","namespace":"openshift-workload-availability"}, "node name": "worker-0-0"} 2025-11-16T19:57:12.373452239Z DEBUG events [remediation] Remediation process - NoExecute taint added to the unhealthy node {"type": "Normal", "object": {"kind":"Node","name":"worker-0-0","uid":"81d10fd3-71fd-47f7-b1d2-f9cbfde06a54","apiVersion":"v1","resourceVersion":"761784"}, "reason": "AddNoExecute"} 2025-11-16T19:57:12.380239584Z DEBUG events [remediation] Remediation process - unhealthy node marked as unschedulable {"type": "Normal", "object": {"kind":"Node","name":"worker-0-0","uid":"81d10fd3-71fd-47f7-b1d2-f9cbfde06a54","apiVersion":"v1","resourceVersion":"761786"}, "reason": "MarkUnschedulable"} 2025-11-16T19:57:12.387224809Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7vjqc","namespace":"openshift-workload-availability"}} 2025-11-16T19:57:12.387243205Z INFO controllers.SelfNodeRemediation pre-reboot not completed yet, prepare for rebooting {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7vjqc","namespace":"openshift-workload-availability"}} 2025-11-16T19:57:12.387459188Z INFO controllers.SelfNodeRemediation waiting for unschedulable taint to appear {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7vjqc","namespace":"openshift-workload-availability"}, "node name": "worker-0-0"} 2025-11-16T19:57:13.387677279Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7vjqc","namespace":"openshift-workload-availability"}} 2025-11-16T19:57:13.387708034Z INFO controllers.SelfNodeRemediation pre-reboot not completed yet, prepare for rebooting {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7vjqc","namespace":"openshift-workload-availability"}} 2025-11-16T19:57:13.388067257Z INFO rebootDurationCalculator No SafeTimeToAssumeNodeRebootedSeconds specified, using calculated minimum safe reboot time {"calculated minimum time in seconds": 120} 2025-11-16T19:57:13.388104893Z INFO controllers.SelfNodeRemediation setting SNR's time to assume node has been rebooted {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7vjqc","namespace":"openshift-workload-availability"}, "node name": "worker-0-0", "time": "2025-11-16 19:59:13.388104159 +0000 UTC m=+49228.298157969"} 2025-11-16T19:57:13.388266377Z DEBUG events [remediation] Remediation process - about to update required fencing time on snr {"type": "Normal", "object": {"kind":"SelfNodeRemediation","namespace":"openshift-workload-availability","name":"worker-0-0-7vjqc","uid":"f6e1f328-d33d-495c-8a88-621db55bc7e9","apiVersion":"self-node-remediation.medik8s.io/v1alpha1","resourceVersion":"761791"}, "reason": "UpdateTimeAssumedRebooted"} 2025-11-16T19:57:13.394439006Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7vjqc","namespace":"openshift-workload-availability"}} 2025-11-16T19:57:13.394458362Z INFO controllers.SelfNodeRemediation Node didn't reboot yet, waiting for it to reboot {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7vjqc","namespace":"openshift-workload-availability"}, "node name": "worker-0-0", "time left": "2m0.605542632s"} 2025-11-16T19:59:14.005235815Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7vjqc","namespace":"openshift-workload-availability"}} 2025-11-16T19:59:14.005260421Z INFO controllers.SelfNodeRemediation TimeAssumedRebooted is old. The unhealthy node assumed to been rebooted {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7vjqc","namespace":"openshift-workload-availability"}, "node name": "worker-0-0"} 2025-11-16T19:59:14.01047099Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7vjqc","namespace":"openshift-workload-availability"}} 2025-11-16T19:59:14.017812139Z INFO controllers.SelfNodeRemediation out-of-service taint added {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7vjqc","namespace":"openshift-workload-availability"}, "new taints": [{"key":"node.kubernetes.io/unreachable","effect":"NoSchedule","timeAdded":"2025-11-16T19:55:41Z"},{"key":"node.kubernetes.io/unreachable","effect":"NoExecute","timeAdded":"2025-11-16T19:55:47Z"},{"key":"medik8s.io/remediation","value":"self-node-remediation","effect":"NoExecute","timeAdded":"2025-11-16T19:57:12Z"},{"key":"node.kubernetes.io/unschedulable","effect":"NoSchedule","timeAdded":"2025-11-16T19:57:12Z"},{"key":"node.kubernetes.io/out-of-service","value":"nodeshutdown","effect":"NoExecute","timeAdded":"2025-11-16T19:59:14Z"}]} 2025-11-16T19:59:14.017941332Z DEBUG events [remediation] Remediation process - add out-of-service taint to unhealthy node {"type": "Normal", "object": {"kind":"Node","name":"worker-0-0","uid":"81d10fd3-71fd-47f7-b1d2-f9cbfde06a54","apiVersion":"v1","resourceVersion":"762427"}, "reason": "AddOutOfService"} 2025-11-16T19:59:14.023705773Z INFO controllers.SelfNodeRemediation waiting for terminating pod {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7vjqc","namespace":"openshift-workload-availability"}, "pod name": "dns-default-76qwr", "phase": "Running"} 2025-11-16T19:59:19.03095637Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7vjqc","namespace":"openshift-workload-availability"}} 2025-11-16T19:59:19.040586265Z INFO controllers.SelfNodeRemediation waiting for terminating pod {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7vjqc","namespace":"openshift-workload-availability"}, "pod name": "ingress-canary-8qqbp", "phase": "Running"} 2025-11-16T19:59:24.045845455Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7vjqc","namespace":"openshift-workload-availability"}} 2025-11-16T19:59:24.059662049Z INFO controllers.SelfNodeRemediation out-of-service taint removed {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7vjqc","namespace":"openshift-workload-availability"}, "new taints": [{"key":"node.kubernetes.io/unreachable","effect":"NoSchedule","timeAdded":"2025-11-16T19:55:41Z"},{"key":"node.kubernetes.io/unreachable","effect":"NoExecute","timeAdded":"2025-11-16T19:55:47Z"},{"key":"medik8s.io/remediation","value":"self-node-remediation","effect":"NoExecute","timeAdded":"2025-11-16T19:57:12Z"},{"key":"node.kubernetes.io/unschedulable","effect":"NoSchedule","timeAdded":"2025-11-16T19:57:12Z"}]} 2025-11-16T19:59:24.059729447Z DEBUG events [remediation] Remediation process - remove out-of-service taint from node {"type": "Normal", "object": {"kind":"Node","name":"worker-0-0","uid":"81d10fd3-71fd-47f7-b1d2-f9cbfde06a54","apiVersion":"v1","resourceVersion":"762513"}, "reason": "RemoveOutOfService"} 2025-11-16T19:59:24.059860439Z DEBUG events [remediation] Remediation process - finished deleting unhealthy node resources {"type": "Normal", "object": {"kind":"Node","name":"worker-0-0","uid":"81d10fd3-71fd-47f7-b1d2-f9cbfde06a54","apiVersion":"v1","resourceVersion":"762513"}, "reason": "DeleteResources"} 2025-11-16T19:59:24.066865238Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7vjqc","namespace":"openshift-workload-availabi