[kni@cert-rhosp-02 ~]$ oc project openshift-machine-api Already on project "openshift-machine-api" on server "https://api.ocp-edge-cluster-0.qe.lab.redhat.com:6443". [kni@cert-rhosp-02 ~]$ oc get clusterversion NAME VERSION AVAILABLE PROGRESSING SINCE STATUS version 4.18.0-0.nightly-2025-09-03-101304 True False 23h Cluster version is 4.18.0-0.nightly-2025-09-03-101304 [kni@cert-rhosp-02 ~]$ oc get csv NAME DISPLAY VERSION REPLACES PHASE fence-agents-remediation.v0.6.0 Fence Agents Remediation Operator 0.6.0 fence-agents-remediation.v0.5.1 Succeeded node-healthcheck-operator.v0.10.0 Node Health Check Operator 0.10.0 node-healthcheck-operator.v0.9.1 Succeeded self-node-remediation.v0.10.0 Self Node Remediation Operator 0.10.0 self-node-remediation.v0.9.0 Succeeded [kni@cert-rhosp-02 ~]$ PODS=$(oc get pods -o name -n openshift-workload-availability | grep fence-agents-remediation-controller-manager) [kni@cert-rhosp-02 ~]$ echo $PODS pod/fence-agents-remediation-controller-manager-559f48fdc9-6hx58 pod/fence-agents-remediation-controller-manager-559f48fdc9-q2h4j [kni@cert-rhosp-02 ~]$ for p in $PODS; do > echo "== $p" > oc get "$p" -n openshift-workload-availability -o json | jq .spec.nodeName > done == pod/fence-agents-remediation-controller-manager-559f48fdc9-6hx58 "worker-0-1" == pod/fence-agents-remediation-controller-manager-559f48fdc9-q2h4j "worker-0-2" [kni@cert-rhosp-02 ~]$ PODS=$(oc get pods -o name -n openshift-workload-availability | grep self-node-remediation-controller-manager) [kni@cert-rhosp-02 ~]$ echo $PODS pod/self-node-remediation-controller-manager-6948574b69-25d2m [kni@cert-rhosp-02 ~]$ for p in $PODS; do > echo "== $p" > oc get "$p" -n openshift-workload-availability -o json | jq .spec.nodeName > done == pod/self-node-remediation-controller-manager-6948574b69-25d2m "worker-0-3" [kni@cert-rhosp-02 ~]$ oc get nodes -l 'node-role.kubernetes.io/worker' NAME STATUS ROLES AGE VERSION worker-0-0 NotReady worker 23h v1.31.11 worker-0-1 Ready worker 23h v1.31.11 worker-0-2 Ready worker 23h v1.31.11 worker-0-3 Ready worker 23h v1.31.11 [kni@cert-rhosp-02 ~]$ vi test.yaml [kni@cert-rhosp-02 ~]$ cat test.yaml apiVersion: self-node-remediation.medik8s.io/v1alpha1 kind: SelfNodeRemediationTemplate metadata: namespace: openshift-machine-api name: selfnoderemediationtemplate-sample spec: template: spec: remediationStrategy: Automatic --- apiVersion: fence-agents-remediation.medik8s.io/v1alpha1 kind: FenceAgentsRemediationTemplate metadata: name: fenceagentsremediationtemplate-test namespace: openshift-machine-api spec: template: spec: agent: fence_ipmilan retrycount: 1 retryinterval: 1s timeout: 1s nodeparameters: '--ipport': master-0-0: '6230' master-0-1: '6231' master-0-2: '6232' worker-0-0: '6233' worker-0-1: '6234' worker-0-2: '6235' worker-0-3: '6236' sharedparameters: '--action': reboot '--lanplus': '' '--ip': 192.168.123.1 '--password': password '--username': admin --- apiVersion: remediation.medik8s.io/v1alpha1 kind: NodeHealthCheck metadata: name: nhc-escalation spec: minHealthy: 30% escalatingRemediations: - remediationTemplate: apiVersion: fence-agents-remediation.medik8s.io/v1alpha1 kind: FenceAgentsRemediationTemplate name: fenceagentsremediationtemplate-test namespace: openshift-machine-api order: -1 timeout: 60s - remediationTemplate: apiVersion: self-node-remediation.medik8s.io/v1alpha1 kind: SelfNodeRemediationTemplate name: selfnoderemediationtemplate-sample namespace: openshift-machine-api order: 0 timeout: 180s selector: matchExpressions: - key: node-role.kubernetes.io/worker operator: Exists unhealthyConditions: - type: Ready status: "False" duration: 30s - type: Ready status: Unknown duration: 30s [kni@cert-rhosp-02 ~]$ oc apply -f test.yaml selfnoderemediationtemplate.self-node-remediation.medik8s.io/selfnoderemediationtemplate-sample created fenceagentsremediationtemplate.fence-agents-remediation.medik8s.io/fenceagentsremediationtemplate-test created nodehealthcheck.remediation.medik8s.io/nhc-escalation created [kni@cert-rhosp-02 ~]$ oc get far -o yaml apiVersion: v1 items: - apiVersion: fence-agents-remediation.medik8s.io/v1alpha1 kind: FenceAgentsRemediation metadata: annotations: remediation.medik8s.io/nhc-timed-out: "2025-09-11T20:50:07Z" remediation.medik8s.io/node-name: worker-0-0 remediation.medik8s.io/template-name: fenceagentsremediationtemplate-test creationTimestamp: "2025-09-11T20:50:05Z" finalizers: - fence-agents-remediation.medik8s.io/far-finalizer generateName: worker-0-0- generation: 1 labels: app.kubernetes.io/part-of: node-healthcheck-controller name: worker-0-0-26r5w namespace: openshift-machine-api ownerReferences: - apiVersion: remediation.medik8s.io/v1alpha1 controller: false kind: NodeHealthCheck name: nhc-escalation uid: 62e70205-95ec-4669-b9cc-54397e6a1e88 - apiVersion: machine.openshift.io/v1beta1 controller: false kind: Machine name: ocp-edge-cluster-0-9wrvs-worker-0-6z8jb uid: 768536e7-48ba-4e7c-b174-becd20efd7a2 resourceVersion: "449746" uid: 7e232bbc-e61c-4af3-aebe-728ec615e662 spec: agent: fence_ipmilan nodeparameters: --ipport: master-0-0: "6230" master-0-1: "6231" master-0-2: "6232" worker-0-0: "6233" worker-0-1: "6234" worker-0-2: "6235" worker-0-3: "6236" remediationStrategy: ResourceDeletion retrycount: 1 retryinterval: 1s sharedSecretName: fence-agents-credentials-shared sharedparameters: --action: reboot --ip: 192.168.123.1 --lanplus: "" --password: password --username: admin timeout: 1s status: conditions: - lastTransitionTime: "2025-09-11T20:50:06Z" message: Fence agent command has failed reason: FenceAgentFailed status: "False" type: Processing - lastTransitionTime: "2025-09-11T20:50:06Z" message: Fence agent command has failed reason: FenceAgentFailed status: "False" type: FenceAgentActionSucceeded - lastTransitionTime: "2025-09-11T20:50:06Z" message: Fence agent command has failed reason: FenceAgentFailed status: "False" type: Succeeded lastUpdateTime: "2025-09-11T20:50:06Z" kind: List metadata: resourceVersion: "" [kni@cert-rhosp-02 ~]$ oc get snr -o yaml apiVersion: v1 items: - apiVersion: self-node-remediation.medik8s.io/v1alpha1 kind: SelfNodeRemediation metadata: annotations: remediation.medik8s.io/node-name: worker-0-0 remediation.medik8s.io/template-name: selfnoderemediationtemplate-sample creationTimestamp: "2025-09-11T20:50:07Z" finalizers: - self-node-remediation.medik8s.io/snr-finalizer generateName: worker-0-0- generation: 1 labels: app.kubernetes.io/part-of: node-healthcheck-controller name: worker-0-0-7bn6v namespace: openshift-machine-api ownerReferences: - apiVersion: remediation.medik8s.io/v1alpha1 controller: false kind: NodeHealthCheck name: nhc-escalation uid: 62e70205-95ec-4669-b9cc-54397e6a1e88 - apiVersion: machine.openshift.io/v1beta1 controller: false kind: Machine name: ocp-edge-cluster-0-9wrvs-worker-0-6z8jb uid: 768536e7-48ba-4e7c-b174-becd20efd7a2 resourceVersion: "450887" uid: 9c4ccadb-c80f-41a3-af55-3834ac645da7 spec: remediationStrategy: Automatic status: conditions: - lastTransitionTime: "2025-09-11T20:52:29Z" message: "" reason: RemediationFinishedSuccessfully status: "False" type: Processing - lastTransitionTime: "2025-09-11T20:52:29Z" message: "" reason: RemediationFinishedSuccessfully status: "True" type: Succeeded phase: Fencing-Completed timeAssumedRebooted: "2025-09-11T20:52:08Z" kind: List metadata: resourceVersion: "" [kni@cert-rhosp-02 ~]$ oc get snr NAME AGE worker-0-0-7bn6v 4m42s [kni@cert-rhosp-02 ~]$ oc get far NAME AGE worker-0-0-26r5w 4m56s [kni@cert-rhosp-02 ~]$ oc get far No resources found in openshift-machine-api namespace. [kni@cert-rhosp-02 ~]$ oc get snr No resources found in openshift-machine-api namespace. [kni@cert-rhosp-02 ~]$ oc get nodes/worker-0-0 -o json | jq .spec.taints null [kni@cert-rhosp-02 ~]$ oc get nodes -l 'node-role.kubernetes.io/worker' NAME STATUS ROLES AGE VERSION worker-0-0 Ready worker 23h v1.31.11 worker-0-1 Ready worker 23h v1.31.11 worker-0-2 Ready worker 23h v1.31.11 worker-0-3 Ready worker 23h v1.31.11 FAR logs: 2025-09-11T20:20:17.260232074Z INFO setup Go Version: go1.24.4 (Red Hat 1.24.4-2.el9) X:strictfipsruntime 2025-09-11T20:20:17.2605374Z INFO setup Go OS/Arch: linux/amd64 2025-09-11T20:20:17.260541269Z INFO setup Operator Version: bd73055e 2025-09-11T20:20:17.260543118Z INFO setup Git Commit: bd73055ef2c68bfdc865d2c54179f4448bd454da 2025-09-11T20:20:17.260545024Z INFO setup Build Date: 2025-09-08T09:09:10+00:00 2025-09-11T20:20:17.260561005Z INFO setup HTTP/2 for webhooks disabled 2025-09-11T20:20:17.26697108Z INFO validation out of service taint strategy {"isSupported": true, "k8sMajorVersion": 1, "k8sMinorVersion": 31} 2025-09-11T20:20:17.267018647Z INFO setup out-of-service taint is supported on this cluster 2025-09-11T20:20:17.267070909Z INFO controller-runtime.builder skip registering a mutating webhook, object does not implement admission.Defaulter or WithDefaulter wasn't called {"GVK": "fence-agents-remediation.medik8s.io/v1alpha1, Kind=FenceAgentsRemediation"} 2025-09-11T20:20:17.267140758Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "fence-agents-remediation.medik8s.io/v1alpha1, Kind=FenceAgentsRemediation", "path": "/validate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediation"} 2025-09-11T20:20:17.267254122Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediation"} 2025-09-11T20:20:17.26731621Z INFO controller-runtime.builder Registering a mutating webhook {"GVK": "fence-agents-remediation.medik8s.io/v1alpha1, Kind=FenceAgentsRemediationTemplate", "path": "/mutate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediationtemplate"} 2025-09-11T20:20:17.267385243Z INFO controller-runtime.webhook Registering webhook {"path": "/mutate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediationtemplate"} 2025-09-11T20:20:17.267436766Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "fence-agents-remediation.medik8s.io/v1alpha1, Kind=FenceAgentsRemediationTemplate", "path": "/validate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediationtemplate"} 2025-09-11T20:20:17.267486932Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediationtemplate"} 2025-09-11T20:20:17.267516752Z INFO setup starting manager 2025-09-11T20:20:17.267712631Z INFO controller-runtime.metrics Starting metrics server 2025-09-11T20:20:17.267797701Z INFO starting server {"name": "health probe", "addr": "[::]:8081"} 2025-09-11T20:20:17.267865518Z INFO controller-runtime.metrics Serving metrics server {"bindAddress": ":8080", "secure": false} 2025-09-11T20:20:17.267876312Z INFO controller-runtime.webhook Starting webhook server I0911 20:20:17.268070 1 leaderelection.go:257] attempting to acquire leader lease openshift-workload-availability/cb305759.medik8s.io... 2025-09-11T20:20:17.26818775Z INFO controller-runtime.certwatcher Updated current TLS certificate 2025-09-11T20:20:17.268263597Z INFO controller-runtime.webhook Serving webhook server {"host": "", "port": 9443} 2025-09-11T20:20:17.268325149Z INFO controller-runtime.certwatcher Starting certificate poll+watcher {"interval": "10s"} I0911 20:20:32.996137 1 leaderelection.go:271] successfully acquired lease openshift-workload-availability/cb305759.medik8s.io 2025-09-11T20:20:32.996163433Z DEBUG events fence-agents-remediation-controller-manager-559f48fdc9-6hx58_3db329ce-dc6d-46f3-adf9-e50907c5b7a2 became leader {"type": "Normal", "object": {"kind":"Lease","namespace":"openshift-workload-availability","name":"cb305759.medik8s.io","uid":"730d14ab-c3d7-4c09-9fa2-a93678a846f9","apiVersion":"coordination.k8s.io/v1","resourceVersion":"437623"}, "reason": "LeaderElection"} 2025-09-11T20:20:32.996344884Z INFO Starting EventSource {"controller": "fenceagentsremediation", "controllerGroup": "fence-agents-remediation.medik8s.io", "controllerKind": "FenceAgentsRemediation", "source": "kind source: *v1alpha1.FenceAgentsRemediation"} 2025-09-11T20:20:32.996373633Z INFO Starting Controller {"controller": "fenceagentsremediation", "controllerGroup": "fence-agents-remediation.medik8s.io", "controllerKind": "FenceAgentsRemediation"} 2025-09-11T20:20:33.098382217Z INFO Starting workers {"controller": "fenceagentsremediation", "controllerGroup": "fence-agents-remediation.medik8s.io", "controllerKind": "FenceAgentsRemediation", "worker count": 1} 2025-09-11T20:38:43.543432461Z INFO fenceagentsremediationtemplate-resource default {"name": "fenceagentsremediationtemplate-test"} 2025-09-11T20:50:05.301051575Z INFO fenceagentsremediation-resource validate create {"name": "worker-0-0-26r5w"} 2025-09-11T20:50:05.304479122Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-11T20:50:05.30453244Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-11T20:50:05.410437302Z INFO fenceagentsremediation-resource validate update {"name": "worker-0-0-26r5w"} 2025-09-11T20:50:05.413478826Z INFO controllers.FenceAgentsRemediation Finalizer was added {"CR Name": "worker-0-0-26r5w"} 2025-09-11T20:50:05.413523741Z INFO controllers.FenceAgentsRemediation Updating Status Condition {"processingConditionStatus": "True", "fenceAgentActionSucceededConditionStatus": "Unknown", "succeededConditionStatus": "Unknown", "reason": "RemediationStarted", "LastUpdateTime": "2025-09-11T20:50:05.413522316Z"} 2025-09-11T20:50:05.413551933Z DEBUG events [remediation] Remediation started {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-machine-api","name":"worker-0-0-26r5w","uid":"7e232bbc-e61c-4af3-aebe-728ec615e662","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"449375"}, "reason": "RemediationStarted"} 2025-09-11T20:50:05.41370696Z DEBUG events [remediation] Finalizer was added {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-machine-api","name":"worker-0-0-26r5w","uid":"7e232bbc-e61c-4af3-aebe-728ec615e662","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"449375"}, "reason": "AddFinalizer"} 2025-09-11T20:50:05.619350404Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-11T20:50:05.619458976Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-11T20:50:05.619471913Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-11T20:50:05.62562347Z INFO taints Taint was added {"taint effect": "NoExecute", "taint list": [{"key":"node.kubernetes.io/unreachable","effect":"NoSchedule","timeAdded":"2025-09-11T20:45:00Z"},{"key":"node.kubernetes.io/unreachable","effect":"NoExecute","timeAdded":"2025-09-11T20:45:05Z"},{"key":"medik8s.io/fence-agents-remediation","effect":"NoExecute","timeAdded":"2025-09-11T20:50:05Z"}]} 2025-09-11T20:50:05.625731461Z INFO controllers.FenceAgentsRemediation FAR remediation taint was added {"Node Name": "worker-0-0"} 2025-09-11T20:50:05.625795711Z INFO controllers.FenceAgentsRemediation Build fence agent command line {"Fence Agent": "fence_ipmilan", "Node Name": "worker-0-0"} 2025-09-11T20:50:05.625883565Z DEBUG events [remediation] Remediation taint was added {"type": "Normal", "object": {"kind":"Node","name":"worker-0-0","uid":"d1591acc-5629-406a-aff7-1ed90eb8c307","apiVersion":"v1","resourceVersion":"447788"}, "reason": "AddRemediationTaint"} 2025-09-11T20:50:05.826086641Z INFO controllers.FenceAgentsRemediation Execute the fence agent {"Fence Agent": "fence_ipmilan", "Node Name": "worker-0-0", "FAR uid": "7e232bbc-e61c-4af3-aebe-728ec615e662", "Parameters": ["--password","--ipport","--username","--action","--ip","--lanplus"]} 2025-09-11T20:50:05.82629203Z INFO executer fence agent start {"uid": "7e232bbc-e61c-4af3-aebe-728ec615e662", "fence_agent": "fence_ipmilan", "retryCount": 1, "retryInterval": "1s", "timeout": "1s"} 2025-09-11T20:50:05.826427856Z DEBUG events [remediation] Fence agent was executed {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-machine-api","name":"worker-0-0-26r5w","uid":"7e232bbc-e61c-4af3-aebe-728ec615e662","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"449377"}, "reason": "FenceAgentExecuted"} 2025-09-11T20:50:05.845619167Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-11T20:50:05.8457093Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-11T20:50:05.845724421Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-11T20:50:05.845812414Z INFO controllers.FenceAgentsRemediation A Fence Agent is already running {"Fence Agent": "fence_ipmilan", "Node Name": "worker-0-0", "FAR uid": "7e232bbc-e61c-4af3-aebe-728ec615e662"} 2025-09-11T20:50:05.855501387Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-11T20:50:06.827994157Z INFO executer command failed {"uid": "7e232bbc-e61c-4af3-aebe-728ec615e662", "response": "", "errMessage": "", "err": "signal: killed"} 2025-09-11T20:50:06.82803298Z INFO executer fence agent done {"uid": "7e232bbc-e61c-4af3-aebe-728ec615e662", "fence_agent": "fence_ipmilan", "stdout": "", "stderr": "", "err": "signal: killed"} 2025-09-11T20:50:06.8280471Z INFO executer fence agent context timed out 2025-09-11T20:50:06.828060614Z INFO executer updating status {"FAR uid": "7e232bbc-e61c-4af3-aebe-728ec615e662"} 2025-09-11T20:50:06.828099571Z INFO executer Updating Status Condition {"processingConditionStatus": "False", "fenceAgentActionSucceededConditionStatus": "False", "succeededConditionStatus": "False", "reason": "FenceAgentFailed", "LastUpdateTime": "2025-09-11T20:50:06.828098641Z"} 2025-09-11T20:50:06.835042692Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-11T20:50:06.8350724Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-11T20:50:06.83552205Z INFO executer status updated {"FAR uid": "7e232bbc-e61c-4af3-aebe-728ec615e662"} 2025-09-11T20:50:06.842623812Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-11T20:50:07.068577857Z INFO fenceagentsremediation-resource validate update {"name": "worker-0-0-26r5w"} 2025-09-11T20:50:07.071502082Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-11T20:50:07.071531027Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-11T20:50:07.071559661Z INFO executer cancelling fence agent routine {"uid": "7e232bbc-e61c-4af3-aebe-728ec615e662"} 2025-09-11T20:50:07.071569116Z INFO controllers.FenceAgentsRemediation Remediation was stopped by the Node Healthcheck Operator 2025-09-11T20:50:07.071591835Z INFO controllers.FenceAgentsRemediation Updating Status Condition {"processingConditionStatus": "False", "fenceAgentActionSucceededConditionStatus": "False", "succeededConditionStatus": "False", "reason": "RemediationInterruptedByNHC", "LastUpdateTime": "2025-09-11T20:50:06Z"} 2025-09-11T20:50:07.071704724Z DEBUG events [remediation] NHC added the timed-out annotation, remediation will be stopped {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-machine-api","name":"worker-0-0-26r5w","uid":"7e232bbc-e61c-4af3-aebe-728ec615e662","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"449746"}, "reason": "RemediationStopped"} 2025-09-11T20:50:07.076508044Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-11T20:55:08.294313768Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-11T20:55:08.294377545Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-11T20:55:08.294409772Z INFO controllers.FenceAgentsRemediation Cleaning up a timed-out remediation which is deleted by NHC {"remediation name": "worker-0-0-26r5w"} 2025-09-11T20:55:08.302840341Z INFO taints Taint was removed {"taint effect": "NoExecute", "taint list": [{"key":"node.kubernetes.io/unreachable","effect":"NoExecute","timeAdded":"2025-09-11T20:45:05Z"},{"key":"medik8s.io/remediation","value":"self-node-remediation","effect":"NoExecute","timeAdded":"2025-09-11T20:50:07Z"},{"key":"node.kubernetes.io/unschedulable","effect":"NoSchedule","timeAdded":"2025-09-11T20:50:07Z"}]} 2025-09-11T20:55:08.303107238Z INFO controllers.FenceAgentsRemediation FAR remediation taint was removed {"Node Name": "worker-0-0"} 2025-09-11T20:55:08.30327528Z DEBUG events [remediation] Remediation taint was removed {"type": "Normal", "object": {"kind":"Node","name":"worker-0-0","uid":"d1591acc-5629-406a-aff7-1ed90eb8c307","apiVersion":"v1","resourceVersion":"451833"}, "reason": "RemoveRemediationTaint"} 2025-09-11T20:55:08.318539858Z INFO controllers.FenceAgentsRemediation Finalizer was removed {"CR Name": "worker-0-0-26r5w"} 2025-09-11T20:55:08.318609578Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-11T20:55:08.318674387Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-11T20:55:08.318720481Z INFO controllers.FenceAgentsRemediation FenceAgentsRemediation CR was not found {"CR Name": "worker-0-0-26r5w", "CR Namespace": "openshift-machine-api"} 2025-09-11T20:55:08.318726041Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-11T20:55:08.318738419Z DEBUG events [remediation] Finalizer was removed {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-machine-api","name":"worker-0-0-26r5w","uid":"7e232bbc-e61c-4af3-aebe-728ec615e662","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"451840"}, "reason": "RemoveFinalizer"} SNR Logs: 2025-09-11T20:03:34.182706975Z INFO setup Go Version: go1.23.2 (Red Hat 1.23.2-1.el9) X:strictfipsruntime 2025-09-11T20:03:34.182767562Z INFO setup Go OS/Arch: linux/amd64 2025-09-11T20:03:34.182771163Z INFO setup Operator Version: v0.10.0 2025-09-11T20:03:34.182773122Z INFO setup Git Commit: 2025-09-11T20:03:34.182775123Z INFO setup Build Date: 2025-01-13T11:55:12+00:00 2025-09-11T20:03:34.182776981Z INFO setup HTTP/2 for metrics and webhook server disabled 2025-09-11T20:03:34.184094334Z INFO controller-runtime.metrics Metrics server is starting to listen {"addr": "127.0.0.1:8080"} 2025-09-11T20:03:34.189297781Z INFO utils-taints out of service taint strategy {"isSupported": true, "k8sMajorVersion": 1, "k8sMinorVersion": 31} 2025-09-11T20:03:34.189315805Z INFO utils-taints out of service taint strategy {"isGA": true, "k8sMajorVersion": 1, "k8sMinorVersion": 31} 2025-09-11T20:03:34.189322682Z INFO setup Starting as a manager that installs the daemonset 2025-09-11T20:03:34.189330907Z INFO controller-runtime.builder skip registering a mutating webhook, object does not implement admission.Defaulter or WithDefaulter wasn't called {"GVK": "self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediationConfig"} 2025-09-11T20:03:34.189369705Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediationConfig", "path": "/validate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediationconfig"} 2025-09-11T20:03:34.189468728Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediationconfig"} 2025-09-11T20:03:34.189518172Z INFO controller-runtime.builder Registering a mutating webhook {"GVK": "self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediationTemplate", "path": "/mutate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediationtemplate"} 2025-09-11T20:03:34.189557874Z INFO controller-runtime.webhook Registering webhook {"path": "/mutate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediationtemplate"} 2025-09-11T20:03:34.189603463Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediationTemplate", "path": "/validate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediationtemplate"} 2025-09-11T20:03:34.189642857Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediationtemplate"} 2025-09-11T20:03:34.189673551Z INFO controller-runtime.builder skip registering a mutating webhook, object does not implement admission.Defaulter or WithDefaulter wasn't called {"GVK": "self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediation"} 2025-09-11T20:03:34.189696403Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediation", "path": "/validate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediation"} 2025-09-11T20:03:34.18974613Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediation"} 2025-09-11T20:03:34.189802944Z INFO setup starting manager 2025-09-11T20:03:34.189976441Z INFO controller-runtime.webhook.webhooks Starting webhook server 2025-09-11T20:03:34.189992955Z INFO Starting server {"kind": "health probe", "addr": "[::]:8081"} 2025-09-11T20:03:34.190075791Z INFO starting server {"path": "/metrics", "kind": "metrics", "addr": "127.0.0.1:8080"} I0911 20:03:34.190161 1 leaderelection.go:245] attempting to acquire leader lease openshift-workload-availability/547f6cb6.medik8s.io... 2025-09-11T20:03:34.190167875Z INFO controller-runtime.certwatcher Updated current TLS certificate 2025-09-11T20:03:34.19023268Z INFO controller-runtime.webhook Serving webhook server {"host": "", "port": 9443} 2025-09-11T20:03:34.190369992Z INFO controller-runtime.certwatcher Starting certificate watcher I0911 20:03:49.278443 1 leaderelection.go:255] successfully acquired lease openshift-workload-availability/547f6cb6.medik8s.io 2025-09-11T20:03:49.278505226Z DEBUG events self-node-remediation-controller-manager-6948574b69-25d2m_e31e36d9-fd98-4f29-88ce-580ba16fd911 became leader {"type": "Normal", "object": {"kind":"Lease","namespace":"openshift-workload-availability","name":"547f6cb6.medik8s.io","uid":"b118f694-60b1-4e17-af67-34fbe4dd54c0","apiVersion":"coordination.k8s.io/v1","resourceVersion":"431732"}, "reason": "LeaderElection"} 2025-09-11T20:03:49.279031598Z INFO Starting EventSource {"controller": "selfnoderemediation", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediation", "source": "kind source: *v1alpha1.SelfNodeRemediation"} 2025-09-11T20:03:49.279077885Z INFO Starting Controller {"controller": "selfnoderemediation", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediation"} 2025-09-11T20:03:49.279620378Z INFO Starting EventSource {"controller": "selfnoderemediationconfig", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediationConfig", "source": "kind source: *v1alpha1.SelfNodeRemediationConfig"} 2025-09-11T20:03:49.292174758Z INFO Starting EventSource {"controller": "selfnoderemediationconfig", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediationConfig", "source": "kind source: *v1.DaemonSet"} 2025-09-11T20:03:49.292225063Z INFO Starting Controller {"controller": "selfnoderemediationconfig", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediationConfig"} 2025-09-11T20:03:49.302485113Z INFO selfnoderemediationconfig-resource validate create {"name": "self-node-remediation-config"} 2025-09-11T20:03:49.393789791Z INFO Starting workers {"controller": "selfnoderemediation", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediation", "worker count": 1} 2025-09-11T20:03:49.396013284Z INFO Starting workers {"controller": "selfnoderemediationconfig", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediationConfig", "worker count": 1} 2025-09-11T20:03:49.396188231Z INFO controllers.SelfNodeRemediationConfig Syncing certs 2025-09-11T20:03:49.498452579Z INFO controllers.SelfNodeRemediationConfig Creating new certs 2025-09-11T20:03:52.268699444Z INFO controllers.SelfNodeRemediationConfig Storing certs in new secret 2025-09-11T20:03:52.279020707Z INFO controllers.SelfNodeRemediationConfig.syncConfigDaemonset Start to sync config daemonset 2025-09-11T20:03:52.27981802Z INFO controllers.SelfNodeRemediationConfig Updating DS tolerations 2025-09-11T20:03:52.279857923Z INFO controllers.SelfNodeRemediationConfig snr didn't find old daemonset to be deleted 2025/09/11 20:03:52 reconciling (apps/v1, Kind=DaemonSet) openshift-workload-availability/self-node-remediation-ds 2025/09/11 20:03:52 does not exist, creating (apps/v1, Kind=DaemonSet) openshift-workload-availability/self-node-remediation-ds 2025/09/11 20:03:52 successfully created (apps/v1, Kind=DaemonSet) openshift-workload-availability/self-node-remediation-ds 2025-09-11T20:03:52.291076737Z INFO controllers.SelfNodeRemediationConfig Syncing certs 2025-09-11T20:03:52.291103431Z INFO controllers.SelfNodeRemediationConfig Cert secret already exists 2025-09-11T20:03:52.291109271Z INFO controllers.SelfNodeRemediationConfig.syncConfigDaemonset Start to sync config daemonset 2025-09-11T20:03:52.291647512Z INFO controllers.SelfNodeRemediationConfig Updating DS tolerations 2025/09/11 20:03:52 reconciling (apps/v1, Kind=DaemonSet) openshift-workload-availability/self-node-remediation-ds 2025/09/11 20:03:52 update was successful 2025-09-11T20:03:59.283297885Z INFO selfnoderemediationtemplate-resource default {"name": "self-node-remediation-automatic-strategy-template"} 2025-09-11T20:03:59.286921999Z INFO selfnoderemediationtemplate-resource validate create {"name": "self-node-remediation-automatic-strategy-template"} 2025-09-11T20:38:43.522226405Z INFO selfnoderemediationtemplate-resource default {"name": "selfnoderemediationtemplate-sample"} 2025-09-11T20:38:43.525481632Z INFO selfnoderemediationtemplate-resource validate create {"name": "selfnoderemediationtemplate-sample"} 2025-09-11T20:44:51.304261505Z INFO selfnoderemediationtemplate-resource default {"name": "selfnoderemediationtemplate-sample"} 2025-09-11T20:44:51.307554694Z INFO selfnoderemediationtemplate-resource validate create {"name": "selfnoderemediationtemplate-sample"} 2025-09-11T20:50:05.038495264Z INFO selfnoderemediationtemplate-resource default {"name": "selfnoderemediationtemplate-sample"} 2025-09-11T20:50:05.04142133Z INFO selfnoderemediationtemplate-resource validate create {"name": "selfnoderemediationtemplate-sample"} 2025-09-11T20:50:07.398099782Z INFO selfnoderemediation-resource validate create {"name": "worker-0-0-7bn6v"} 2025-09-11T20:50:07.502311028Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}} 2025-09-11T20:50:07.502334277Z INFO controllers.SelfNodeRemediation pre-reboot not completed yet, prepare for rebooting {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}} 2025-09-11T20:50:07.502403214Z DEBUG events [remediation] Remediation started by SNR manager {"type": "Normal", "object": {"kind":"SelfNodeRemediation","namespace":"openshift-machine-api","name":"worker-0-0-7bn6v","uid":"9c4ccadb-c80f-41a3-af55-3834ac645da7","apiVersion":"self-node-remediation.medik8s.io/v1alpha1","resourceVersion":"449768"}, "reason": "RemediationStarted"} 2025-09-11T20:50:07.607096553Z INFO selfnoderemediation-resource validate update {"name": "worker-0-0-7bn6v"} 2025-09-11T20:50:07.609999962Z INFO controllers.SelfNodeRemediation finalizer added {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}} 2025-09-11T20:50:07.610186681Z DEBUG events [remediation] Remediation process - successful adding finalizer {"type": "Normal", "object": {"kind":"SelfNodeRemediation","namespace":"openshift-machine-api","name":"worker-0-0-7bn6v","uid":"9c4ccadb-c80f-41a3-af55-3834ac645da7","apiVersion":"self-node-remediation.medik8s.io/v1alpha1","resourceVersion":"449790"}, "reason": "AddFinalizer"} 2025-09-11T20:50:07.615232851Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}} 2025-09-11T20:50:07.615313068Z INFO controllers.SelfNodeRemediation pre-reboot not completed yet, prepare for rebooting {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}} 2025-09-11T20:50:07.623790541Z INFO controllers.SelfNodeRemediation NoExecute taint added {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}, "new taints": [{"key":"node.kubernetes.io/unreachable","effect":"NoSchedule","timeAdded":"2025-09-11T20:45:00Z"},{"key":"node.kubernetes.io/unreachable","effect":"NoExecute","timeAdded":"2025-09-11T20:45:05Z"},{"key":"medik8s.io/fence-agents-remediation","effect":"NoExecute","timeAdded":"2025-09-11T20:50:05Z"},{"key":"medik8s.io/remediation","value":"self-node-remediation","effect":"NoExecute","timeAdded":"2025-09-11T20:50:07Z"}]} 2025-09-11T20:50:07.62394272Z INFO controllers.SelfNodeRemediation Marking node as unschedulable {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}, "node name": "worker-0-0"} 2025-09-11T20:50:07.624102098Z DEBUG events [remediation] Remediation process - NoExecute taint added to the unhealthy node {"type": "Normal", "object": {"kind":"Node","name":"worker-0-0","uid":"d1591acc-5629-406a-aff7-1ed90eb8c307","apiVersion":"v1","resourceVersion":"449793"}, "reason": "AddNoExecute"} 2025-09-11T20:50:07.633702552Z DEBUG events [remediation] Remediation process - unhealthy node marked as unschedulable {"type": "Normal", "object": {"kind":"Node","name":"worker-0-0","uid":"d1591acc-5629-406a-aff7-1ed90eb8c307","apiVersion":"v1","resourceVersion":"449795"}, "reason": "MarkUnschedulable"} 2025-09-11T20:50:07.640650598Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}} 2025-09-11T20:50:07.640699174Z INFO controllers.SelfNodeRemediation pre-reboot not completed yet, prepare for rebooting {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}} 2025-09-11T20:50:07.641063445Z INFO controllers.SelfNodeRemediation waiting for unschedulable taint to appear {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}, "node name": "worker-0-0"} 2025-09-11T20:50:07.646972444Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}} 2025-09-11T20:50:07.646986389Z INFO controllers.SelfNodeRemediation pre-reboot not completed yet, prepare for rebooting {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}} 2025-09-11T20:50:07.647276907Z INFO controllers.SelfNodeRemediation waiting for unschedulable taint to appear {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}, "node name": "worker-0-0"} 2025-09-11T20:50:08.641569558Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}} 2025-09-11T20:50:08.641587563Z INFO controllers.SelfNodeRemediation pre-reboot not completed yet, prepare for rebooting {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}} 2025-09-11T20:50:08.641947829Z INFO rebootDurationCalculator No SafeTimeToAssumeNodeRebootedSeconds specified, using calculated minimum safe reboot time {"calculated minimum time in seconds": 120} 2025-09-11T20:50:08.641960446Z INFO controllers.SelfNodeRemediation setting SNR's time to assume node has been rebooted {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}, "node name": "worker-0-0", "time": "2025-09-11 20:52:08.64195994 +0000 UTC m=+2914.472803243"} 2025-09-11T20:50:08.642077986Z DEBUG events [remediation] Remediation process - about to update required fencing time on snr {"type": "Normal", "object": {"kind":"SelfNodeRemediation","namespace":"openshift-machine-api","name":"worker-0-0-7bn6v","uid":"9c4ccadb-c80f-41a3-af55-3834ac645da7","apiVersion":"self-node-remediation.medik8s.io/v1alpha1","resourceVersion":"449798"}, "reason": "UpdateTimeAssumedRebooted"} 2025-09-11T20:50:08.647527542Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}} 2025-09-11T20:50:08.647546652Z INFO controllers.SelfNodeRemediation Node didn't reboot yet, waiting for it to reboot {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}, "node name": "worker-0-0", "time left": "2m0.352454756s"} 2025-09-11T20:52:09.005409608Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}} 2025-09-11T20:52:09.005557716Z INFO controllers.SelfNodeRemediation TimeAssumedRebooted is old. The unhealthy node assumed to been rebooted {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}, "node name": "worker-0-0"} 2025-09-11T20:52:09.010220971Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}} 2025-09-11T20:52:09.018630789Z INFO controllers.SelfNodeRemediation out-of-service taint added {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}, "new taints": [{"key":"node.kubernetes.io/unreachable","effect":"NoSchedule","timeAdded":"2025-09-11T20:45:00Z"},{"key":"node.kubernetes.io/unreachable","effect":"NoExecute","timeAdded":"2025-09-11T20:45:05Z"},{"key":"medik8s.io/fence-agents-remediation","effect":"NoExecute","timeAdded":"2025-09-11T20:50:05Z"},{"key":"medik8s.io/remediation","value":"self-node-remediation","effect":"NoExecute","timeAdded":"2025-09-11T20:50:07Z"},{"key":"node.kubernetes.io/unschedulable","effect":"NoSchedule","timeAdded":"2025-09-11T20:50:07Z"},{"key":"node.kubernetes.io/out-of-service","value":"nodeshutdown","effect":"NoExecute","timeAdded":"2025-09-11T20:52:09Z"}]} 2025-09-11T20:52:09.01877508Z DEBUG events [remediation] Remediation process - add out-of-service taint to unhealthy node {"type": "Normal", "object": {"kind":"Node","name":"worker-0-0","uid":"d1591acc-5629-406a-aff7-1ed90eb8c307","apiVersion":"v1","resourceVersion":"450609"}, "reason": "AddOutOfService"} 2025-09-11T20:52:09.024195681Z INFO controllers.SelfNodeRemediation waiting for terminating pod {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}, "pod name": "image-registry-5579dbf6bd-2bkml", "phase": "Running"} 2025-09-11T20:52:14.031876342Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}} 2025-09-11T20:52:14.035716691Z INFO controllers.SelfNodeRemediation waiting for terminating pod {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}, "pod name": "monitoring-plugin-54d76fd9fb-vc479", "phase": "Running"} 2025-09-11T20:52:19.040773879Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}} 2025-09-11T20:52:19.044939744Z INFO controllers.SelfNodeRemediation waiting for terminating pod {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}, "pod name": "router-default-776464646c-886zl", "phase": "Running"} 2025-09-11T20:52:24.050781327Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}} 2025-09-11T20:52:24.056867602Z INFO controllers.SelfNodeRemediation waiting for terminating pod {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}, "pod name": "networking-console-plugin-5bc4c9d864-rjflt", "phase": "Running"} 2025-09-11T20:52:29.06256174Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}} 2025-09-11T20:52:29.175471568Z INFO controllers.SelfNodeRemediation out-of-service taint removed {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}, "new taints": [{"key":"node.kubernetes.io/unreachable","effect":"NoSchedule","timeAdded":"2025-09-11T20:45:00Z"},{"key":"node.kubernetes.io/unreachable","effect":"NoExecute","timeAdded":"2025-09-11T20:45:05Z"},{"key":"medik8s.io/fence-agents-remediation","effect":"NoExecute","timeAdded":"2025-09-11T20:50:05Z"},{"key":"medik8s.io/remediation","value":"self-node-remediation","effect":"NoExecute","timeAdded":"2025-09-11T20:50:07Z"},{"key":"node.kubernetes.io/unschedulable","effect":"NoSchedule","timeAdded":"2025-09-11T20:50:07Z"}]} 2025-09-11T20:52:29.175649756Z DEBUG events [remediation] Remediation process - remove out-of-service taint from node {"type": "Normal", "object": {"kind":"Node","name":"worker-0-0","uid":"d1591acc-5629-406a-aff7-1ed90eb8c307","apiVersion":"v1","resourceVersion":"450884"}, "reason": "RemoveOutOfService"} 2025-09-11T20:52:29.175740383Z DEBUG events [remediation] Remediation process - finished deleting unhealthy node resources {"type": "Normal", "object": {"kind":"Node","name":"worker-0-0","uid":"d1591acc-5629-406a-aff7-1ed90eb8c307","apiVersion":"v1","resourceVersion":"450884"}, "reason": "DeleteResources"} 2025-09-11T20:52:29.188781521Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}} 2025-09-11T20:55:08.301411202Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}} 2025-09-11T20:55:08.301453628Z INFO controllers.SelfNodeRemediation fencing completed, cleaning up {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}} 2025-09-11T20:55:09.321677321Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}} 2025-09-11T20:55:09.321699875Z INFO controllers.SelfNodeRemediation fencing completed, cleaning up {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}} 2025-09-11T20:55:09.327609599Z DEBUG events [remediation] Remediation process - mark healthy remediated node as schedulable {"type": "Normal", "object": {"kind":"Node","name":"worker-0-0","uid":"d1591acc-5629-406a-aff7-1ed90eb8c307","apiVersion":"v1","resourceVersion":"451962"}, "reason": "MarkNodeSchedulable"} 2025-09-11T20:55:10.338211303Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}} 2025-09-11T20:55:10.338232661Z INFO controllers.SelfNodeRemediation fencing completed, cleaning up {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}} 2025-09-11T20:55:10.34578927Z INFO controllers.SelfNodeRemediation NoExecute taint removed {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}, "new taints": [{"key":"node.kubernetes.io/unreachable","effect":"NoExecute","timeAdded":"2025-09-11T20:45:05Z"}]} 2025-09-11T20:55:10.346015035Z DEBUG events [remediation] Remediation process - remove NoExecute taint from healthy remediated node {"type": "Normal", "object": {"kind":"Node","name":"worker-0-0","uid":"d1591acc-5629-406a-aff7-1ed90eb8c307","apiVersion":"v1","resourceVersion":"451980"}, "reason": "RemoveNoExecuteTaint"} 2025-09-11T20:55:10.355142985Z INFO selfnoderemediation-resource validate update {"name": "worker-0-0-7bn6v"} 2025-09-11T20:55:10.361016092Z INFO controllers.SelfNodeRemediation finalizer removed {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}} 2025-09-11T20:55:10.361258794Z DEBUG events [remediation] Remediation process - remove finalizer from snr {"type": "Normal", "object": {"kind":"SelfNodeRemediation","namespace":"openshift-machine-api","name":"worker-0-0-7bn6v","uid":"9c4ccadb-c80f-41a3-af55-3834ac645da7","apiVersion":"self-node-remediation.medik8s.io/v1alpha1","resourceVersion":"451843"}, "reason": "RemoveFinalizer"} 2025-09-11T20:55:10.361317873Z DEBUG events [remediation] Remediation finished {"type": "Normal", "object": {"kind":"SelfNodeRemediation","namespace":"openshift-machine-api","name":"worker-0-0-7bn6v","uid":"9c4ccadb-c80f-41a3-af55-3834ac645da7","apiVersion":"self-node-remediation.medik8s.io/v1alpha1","resourceVersion":"451843"}, "reason": "RemediationFinished"} 2025-09-11T20:55:10.366146757Z INFO controllers.SelfNodeRemediation SNR already deleted {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}} 2025-09-11T20:55:11.366637151Z INFO controllers.SelfNodeRemediation SNR already deleted {"pod": "manager", "selfnoderemediation": {"name":"worker-0-0-7bn6v","namespace":"openshift-machine-api"}} NHC Logs: 2025-09-11T19:56:11.882384294Z INFO setup Go Version: go1.24.4 (Red Hat 1.24.4-2.el9) X:strictfipsruntime 2025-09-11T19:56:11.882536871Z INFO setup Go OS/Arch: linux/amd64 2025-09-11T19:56:11.882540321Z INFO setup Operator Version: 2ef588d 2025-09-11T19:56:11.882542422Z INFO setup Git Commit: 2ef588d65de4087c46447401cd6e757459d79210 2025-09-11T19:56:11.882544173Z INFO setup Build Date: 2025-09-01T17:55:11+00:00 2025-09-11T19:56:11.882545807Z INFO setup HTTP/2 for metrics and webhook server disabled 2025-09-11T19:56:11.905322039Z INFO setup supported control plane topology {"topology": "HighlyAvailable"} 2025-09-11T19:56:11.905373194Z INFO setup Cluster capabilities {"IsOnOpenshift": true, "HasMachineAPI": true} 2025-09-11T19:56:11.905680919Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "remediation.medik8s.io/v1alpha1, Kind=NodeHealthCheck", "path": "/validate-remediation-medik8s-io-v1alpha1-nodehealthcheck"} 2025-09-11T19:56:11.905766941Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-remediation-medik8s-io-v1alpha1-nodehealthcheck"} 2025-09-11T19:56:11.905807733Z INFO setup starting manager 2025-09-11T19:56:11.905864464Z INFO controller-runtime.metrics Starting metrics server 2025-09-11T19:56:11.905945497Z INFO controller-runtime.metrics Serving metrics server {"bindAddress": "127.0.0.1:8080", "secure": false} 2025-09-11T19:56:11.905996423Z INFO starting server {"name": "health probe", "addr": "[::]:8081"} 2025-09-11T19:56:11.906059531Z INFO controller-runtime.webhook Starting webhook server 2025-09-11T19:56:11.906395657Z INFO controller-runtime.certwatcher Updated current TLS certificate 2025-09-11T19:56:11.906509692Z INFO controller-runtime.webhook Serving webhook server {"host": "", "port": 9443} 2025-09-11T19:56:11.906546941Z INFO controller-runtime.certwatcher Starting certificate poll+watcher {"interval": "10s"} I0911 19:56:12.007213 1 leaderelection.go:257] attempting to acquire leader lease openshift-workload-availability/e1f13584.medik8s.io... I0911 19:56:28.098736 1 leaderelection.go:271] successfully acquired lease openshift-workload-availability/e1f13584.medik8s.io 2025-09-11T19:56:28.098784613Z DEBUG events node-healthcheck-controller-manager-56687f5d99-pj2st_b6014a0e-fcce-41cd-8085-385e4e4be8dc became leader {"type": "Normal", "object": {"kind":"Lease","namespace":"openshift-workload-availability","name":"e1f13584.medik8s.io","uid":"b63f2a69-17f5-41a2-8cf1-28717d233797","apiVersion":"coordination.k8s.io/v1","resourceVersion":"428991"}, "reason": "LeaderElection"} 2025-09-11T19:56:28.099406436Z INFO Starting EventSource {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "source": "channel source: 0xc0008060e0"} 2025-09-11T19:56:28.099434562Z INFO Starting EventSource {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "source": "kind source: *v1alpha1.NodeHealthCheck"} 2025-09-11T19:56:28.099462659Z INFO Starting EventSource {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "source": "kind source: *v1.Node"} 2025-09-11T19:56:28.099510558Z INFO Starting EventSource {"controller": "machinehealthcheck", "controllerGroup": "machine.openshift.io", "controllerKind": "MachineHealthCheck", "source": "kind source: *v1.Node"} 2025-09-11T19:56:28.099513275Z INFO Starting EventSource {"controller": "machinehealthcheck", "controllerGroup": "machine.openshift.io", "controllerKind": "MachineHealthCheck", "source": "channel source: 0xc0008063f0"} 2025-09-11T19:56:28.099536194Z INFO Starting EventSource {"controller": "machinehealthcheck", "controllerGroup": "machine.openshift.io", "controllerKind": "MachineHealthCheck", "source": "kind source: *v1beta1.Machine"} 2025-09-11T19:56:28.099548453Z INFO Starting EventSource {"controller": "machinehealthcheck", "controllerGroup": "machine.openshift.io", "controllerKind": "MachineHealthCheck", "source": "kind source: *v1beta1.MachineHealthCheck"} I0911 19:56:28.100091 1 shared_informer.go:313] Waiting for caches to sync for feature gate accessor 2025-09-11T19:56:28.19931312Z INFO MHCChecker found termination handler MHC, will ignore Nodes with Terminating condition 2025-09-11T19:56:28.199351961Z INFO MHCChecker MHC Checker status changed, notifying NHC controller 2025-09-11T19:56:28.199411707Z INFO adding all NHCs to reconcile queue for handling MHC event I0911 19:56:28.200351 1 shared_informer.go:320] Caches are synced for feature gate accessor I0911 19:56:28.200370 1 simple_featuregate_reader.go:171] Starting feature-gate-detector 2025-09-11T19:56:28.200471742Z INFO Starting Controller {"controller": "machinehealthcheck", "controllerGroup": "machine.openshift.io", "controllerKind": "MachineHealthCheck"} 2025-09-11T19:56:28.200494539Z INFO Starting workers {"controller": "machinehealthcheck", "controllerGroup": "machine.openshift.io", "controllerKind": "MachineHealthCheck", "worker count": 1} 2025-09-11T19:56:28.200478551Z INFO Starting Controller {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck"} 2025-09-11T19:56:28.200537531Z INFO Starting workers {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "worker count": 1} 2025-09-11T19:56:28.200560181Z INFO FeatureGateAccessor FeatureGates initialized I0911 19:56:28.200584 1 recorder_logging.go:49] &Event{ObjectMeta:{dummy.1864529cd99a682c.d1e567b7 dummy 0 0001-01-01 00:00:00 +0000 UTC map[] map[] [] [] []},InvolvedObject:ObjectReference{Kind:Pod,Namespace:dummy,Name:dummy,UID:,APIVersion:v1,ResourceVersion:,FieldPath:,},Reason:FeatureGatesInitialized,Message:FeatureGates updated to featuregates.Features{Enabled:[]v1.FeatureGateName{"AWSEFSDriverVolumeMetrics", "AdminNetworkPolicy", "AlibabaPlatform", "AzureWorkloadIdentity", "BareMetalLoadBalancer", "BuildCSIVolumes", "ChunkSizeMiB", "CloudDualStackNodeIPs", "DisableKubeletCloudCredentialProviders", "GCPLabelsTags", "HardwareSpeed", "IngressControllerLBSubnetsAWS", "KMSv1", "ManagedBootImages", "ManagedBootImagesAWS", "MultiArchInstallAWS", "MultiArchInstallGCP", "NetworkDiagnosticsConfig", "NetworkLiveMigration", "NetworkSegmentation", "NewOLM", "NodeDisruptionPolicy", "OnClusterBuild", "PersistentIPsForVirtualization", "PrivateHostedZoneAWS", "SetEIPForNLBIngressController", "VSphereControlPlaneMachineSet", "VSphereDriverConfiguration", "VSphereMultiVCenters", "VSphereStaticIPs", "ValidatingAdmissionPolicy"}, Disabled:[]v1.FeatureGateName{"AWSClusterHostedDNS", "AdditionalRoutingCapabilities", "AutomatedEtcdBackup", "BootcNodeManagement", "CSIDriverSharedResource", "ClusterAPIInstall", "ClusterAPIInstallIBMCloud", "ClusterMonitoringConfig", "ConsolePluginContentSecurityPolicy", "DNSNameResolver", "DynamicResourceAllocation", "EtcdBackendQuota", "EventedPLEG", "Example", "ExternalOIDC", "ExternalOIDCWithUIDAndExtraClaimMappings", "GCPClusterHostedDNS", "GatewayAPI", "ImageStreamImportMode", "IngressControllerDynamicConfigurationManager", "InsightsConfig", "InsightsConfigAPI", "InsightsOnDemandDataGather", "InsightsRuntimeExtractor", "MachineAPIMigration", "MachineAPIOperatorDisableMachineHealthCheckController", "MachineAPIProviderOpenStack", "MachineConfigNodes", "MaxUnavailableStatefulSet", "MetricsCollectionProfiles", "MinimumKubeletVersion", "MixedCPUsAllocation", "MultiArchInstallAzure", "NodeSwap", "NutanixMultiSubnets", "OVNObservability", "OpenShiftPodSecurityAdmission", "PinnedImages", "PlatformOperators", "ProcMountType", "RouteAdvertisements", "RouteExternalCertificate", "ServiceAccountTokenNodeBinding", "SignatureStores", "SigstoreImageVerification", "TranslateStreamCloseWebsocketRequests", "UpgradeStatus", "UserNamespacesPodSecurityStandards", "UserNamespacesSupport", "VSphereMultiNetworks", "VolumeAttributesClass", "VolumeGroupSnapshot"}},Source:EventSource{Component:,Host:,},FirstTimestamp:2025-09-11 19:56:28.200486956 +0000 UTC m=+16.341779646,LastTimestamp:2025-09-11 19:56:28.200486956 +0000 UTC m=+16.341779646,Count:1,Type:Normal,EventTime:0001-01-01 00:00:00 +0000 UTC,Series:nil,Action:,Related:nil,ReportingController:,ReportingInstance:,} 2025-09-11T19:56:28.512534869Z INFO KubeAPIWarningLogger unknown field "spec.contentSecurityPolicy" 2025-09-11T19:56:28.512649177Z INFO console-plugin successfully created / updated console plugin resources 2025-09-11T20:38:43.566289564Z INFO nodehealthcheck-resource validate create {"name": "nhc-escalation"} 2025-09-11T20:44:51.33348917Z INFO nodehealthcheck-resource validate create {"name": "nhc-escalation"} 2025-09-11T20:44:51.336127243Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-11T20:44:51.343700303Z INFO controllers.NodeHealthCheck disabling NHC {"NodeHealthCheck name": "nhc-escalation", "reason": "RemediationTemplateNotFound", "message": "Remediation template not found: \"failed to get external remediation template openshift-operators/self-node-remediation-automatic-strategy-template: selfnoderemediationtemplates.self-node-remediation.medik8s.io \\\"self-node-remediation-automatic-strategy-template\\\" not found\""} 2025-09-11T20:44:51.343866418Z DEBUG events [remediation] Disabling NHC. Reason: RemediationTemplateNotFound, Message: Remediation template not found: "failed to get external remediation template openshift-operators/self-node-remediation-automatic-strategy-template: selfnoderemediationtemplates.self-node-remediation.medik8s.io \"self-node-remediation-automatic-strategy-template\" not found" {"type": "Warning", "object": {"kind":"NodeHealthCheck","name":"nhc-escalation","uid":"9f91bdd4-6f3e-4ebb-ab3e-c985fcc023ad","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"447497"}, "reason": "Disabled"} 2025-09-11T20:44:51.344177291Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-escalation", "new status": {"observedNodes":0,"healthyNodes":0,"conditions":[{"type":"Disabled","status":"True","lastTransitionTime":"2025-09-11T20:44:51Z","reason":"RemediationTemplateNotFound","message":"Remediation template not found: \"failed to get external remediation template openshift-operators/self-node-remediation-automatic-strategy-template: selfnoderemediationtemplates.self-node-remediation.medik8s.io \\\"self-node-remediation-automatic-strategy-template\\\" not found\""}],"phase":"Disabled","reason":"NHC is disabled: RemediationTemplateNotFound: Remediation template not found: \"failed to get external remediation template openshift-operators/self-node-remediation-automatic-strategy-template: selfnoderemediationtemplates.self-node-remediation.medik8s.io \\\"self-node-remediation-automatic-strategy-template\\\" not found\""}, "patch": "{\"status\":{\"conditions\":[{\"lastTransitionTime\":\"2025-09-11T20:44:51Z\",\"message\":\"Remediation template not found: \\\"failed to get external remediation template openshift-operators/self-node-remediation-automatic-strategy-template: selfnoderemediationtemplates.self-node-remediation.medik8s.io \\\\\\\"self-node-remediation-automatic-strategy-template\\\\\\\" not found\\\"\",\"reason\":\"RemediationTemplateNotFound\",\"status\":\"True\",\"type\":\"Disabled\"}],\"healthyNodes\":0,\"observedNodes\":0,\"phase\":\"Disabled\",\"reason\":\"NHC is disabled: RemediationTemplateNotFound: Remediation template not found: \\\"failed to get external remediation template openshift-operators/self-node-remediation-automatic-strategy-template: selfnoderemediationtemplates.self-node-remediation.medik8s.io \\\\\\\"self-node-remediation-automatic-strategy-template\\\\\\\" not found\\\"\"}}"} 2025-09-11T20:44:51.549837179Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "15s"} 2025-09-11T20:45:00.518905617Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0-0", "NHC": "nhc-escalation"} 2025-09-11T20:45:00.518988548Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0-0", "NHC": "nhc-escalation"} 2025-09-11T20:45:00.519059966Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-11T20:45:00.529655992Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "15s"} 2025-09-11T20:45:06.550718243Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-11T20:45:06.556602533Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "15s"} 2025-09-11T20:45:21.557115043Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-11T20:45:21.563751827Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "15s"} 2025-09-11T20:45:36.564210553Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-11T20:45:36.570975956Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "15s"} 2025-09-11T20:45:51.571991128Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-11T20:45:51.578145349Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "15s"} 2025-09-11T20:46:06.578402675Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-11T20:46:06.58405495Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "15s"} 2025-09-11T20:46:21.58470208Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-11T20:46:21.59038652Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "15s"} 2025-09-11T20:46:36.590803154Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-11T20:46:36.596370967Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "15s"} 2025-09-11T20:46:51.596570046Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-11T20:46:51.602599452Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "15s"} 2025-09-11T20:47:06.603359396Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-11T20:47:06.609443568Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "15s"} 2025-09-11T20:47:21.610385703Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-11T20:47:21.615718882Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "15s"} 2025-09-11T20:47:36.616035328Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-11T20:47:36.621670321Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "15s"} 2025-09-11T20:47:51.62182164Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-11T20:47:51.628461231Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "15s"} 2025-09-11T20:48:06.629614354Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-11T20:48:06.635177721Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "15s"} 2025-09-11T20:48:21.63572059Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-11T20:48:21.642519011Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "15s"} 2025-09-11T20:48:36.643579274Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-11T20:48:36.65010492Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "15s"} 2025-09-11T20:48:51.65063433Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-11T20:48:51.656499504Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "15s"} 2025-09-11T20:49:06.657308271Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-11T20:49:06.66154147Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "15s"} 2025-09-11T20:49:10.266555597Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-11T20:49:10.266587089Z INFO controllers.NodeHealthCheck NodeHealthCheck CR not found {"NodeHealthCheck name": "nhc-escalation", "name": "nhc-escalation"} 2025-09-11T20:49:21.662464585Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-11T20:49:21.662537294Z INFO controllers.NodeHealthCheck NodeHealthCheck CR not found {"NodeHealthCheck name": "nhc-escalation", "name": "nhc-escalation"} 2025-09-11T20:50:05.064483533Z INFO nodehealthcheck-resource validate create {"name": "nhc-escalation"} 2025-09-11T20:50:05.066899554Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-11T20:50:05.0742845Z INFO controllers.NodeHealthCheck enabling NHC, valid config, no conflicting MHC configured in the cluster {"NodeHealthCheck name": "nhc-escalation"} 2025-09-11T20:50:05.074345406Z INFO Starting EventSource {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "source": "kind source: *unstructured.Unstructured"} 2025-09-11T20:50:05.074364827Z INFO controllers.NodeHealthCheck.WatchManager added watch for remediation template CRs {"kind": "FenceAgentsRemediationTemplate"} 2025-09-11T20:50:05.07437309Z INFO Starting EventSource {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "source": "kind source: *unstructured.Unstructured"} 2025-09-11T20:50:05.074376896Z INFO controllers.NodeHealthCheck.WatchManager added watch for remediation CRs {"kind": "FenceAgentsRemediation"} 2025-09-11T20:50:05.074382688Z INFO Starting EventSource {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "source": "kind source: *unstructured.Unstructured"} 2025-09-11T20:50:05.074386533Z INFO controllers.NodeHealthCheck.WatchManager added watch for remediation template CRs {"kind": "SelfNodeRemediationTemplate"} 2025-09-11T20:50:05.07439093Z INFO Starting EventSource {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "source": "kind source: *unstructured.Unstructured"} 2025-09-11T20:50:05.074394537Z INFO controllers.NodeHealthCheck.WatchManager added watch for remediation CRs {"kind": "SelfNodeRemediation"} 2025-09-11T20:50:05.074436313Z DEBUG events [remediation] No issues found, NodeHealthCheck is enabled. {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-escalation","uid":"62e70205-95ec-4669-b9cc-54397e6a1e88","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"449364"}, "reason": "Enabled"} 2025-09-11T20:50:05.074802808Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-0", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T20:50:05.07494401Z DEBUG events [remediation] Node matches unhealthy condition. Node "worker-0-0", condition type "Ready", condition status "Unknown" {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-escalation","uid":"62e70205-95ec-4669-b9cc-54397e6a1e88","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"449364"}, "reason": "DetectedUnhealthy"} 2025-09-11T20:50:05.080950739Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-1"} time="2025-09-11T20:50:05Z" level=info msg="invalidating lease" time="2025-09-11T20:50:05Z" level=info msg="getting lease" 2025-09-11T20:50:05.282475455Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-2"} time="2025-09-11T20:50:05Z" level=info msg="invalidating lease" time="2025-09-11T20:50:05Z" level=info msg="getting lease" 2025-09-11T20:50:05.287107934Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-3"} time="2025-09-11T20:50:05Z" level=info msg="invalidating lease" time="2025-09-11T20:50:05Z" level=info msg="getting lease" 2025-09-11T20:50:05.290174633Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-0"} 2025-09-11T20:50:05.29387247Z INFO controllers.NodeHealthCheck.resource manager Attempting to obtain Node Lease {"Node name": "worker-0-0"} time="2025-09-11T20:50:05Z" level=info msg="request lease" time="2025-09-11T20:50:05Z" level=info msg="getting lease" time="2025-09-11T20:50:05Z" level=info msg="create lease" 2025-09-11T20:50:05.296317434Z INFO controllers.NodeHealthCheck.resource manager Creating a remediation CR {"CR name": "", "CR kind": "FenceAgentsRemediation", "namespace": "openshift-machine-api"} 2025-09-11T20:50:05.304162216Z DEBUG events [remediation] Created remediation object for node worker-0-0 {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-escalation","uid":"62e70205-95ec-4669-b9cc-54397e6a1e88","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"449364"}, "reason": "RemediationCreated"} 2025-09-11T20:50:05.388218945Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-escalation", "new status": {"observedNodes":4,"healthyNodes":3,"unhealthyNodes":[{"name":"worker-0-0","remediations":[{"resource":{"kind":"FenceAgentsRemediation","namespace":"openshift-machine-api","name":"worker-0-0-26r5w","uid":"7e232bbc-e61c-4af3-aebe-728ec615e662","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1"},"started":"2025-09-11T20:50:05Z","templateName":"fenceagentsremediationtemplate-test"}]}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-09-11T20:50:05Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Remediating","reason":"NHC is remediating 1 nodes"}, "patch": "{\"status\":{\"conditions\":[{\"lastTransitionTime\":\"2025-09-11T20:50:05Z\",\"message\":\"No issues found, NodeHealthCheck is enabled.\",\"reason\":\"NodeHealthCheckEnabled\",\"status\":\"False\",\"type\":\"Disabled\"}],\"healthyNodes\":3,\"observedNodes\":4,\"phase\":\"Remediating\",\"reason\":\"NHC is remediating 1 nodes\",\"unhealthyNodes\":[{\"name\":\"worker-0-0\",\"remediations\":[{\"resource\":{\"apiVersion\":\"fence-agents-remediation.medik8s.io/v1alpha1\",\"kind\":\"FenceAgentsRemediation\",\"name\":\"worker-0-0-26r5w\",\"namespace\":\"openshift-machine-api\",\"uid\":\"7e232bbc-e61c-4af3-aebe-728ec615e662\"},\"started\":\"2025-09-11T20:50:05Z\",\"templateName\":\"fenceagentsremediationtemplate-test\"}]}]}}"} 2025-09-11T20:50:05.413146221Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"62e70205-95ec-4669-b9cc-54397e6a1e88","controller":false}} 2025-09-11T20:50:05.413215429Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-0-26r5w", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T20:50:05.413230096Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"62e70205-95ec-4669-b9cc-54397e6a1e88","controller":false}} 2025-09-11T20:50:05.413236457Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-0-26r5w", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T20:50:05.418485015Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"62e70205-95ec-4669-b9cc-54397e6a1e88","controller":false}} 2025-09-11T20:50:05.418520228Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-0-26r5w", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T20:50:05.41853058Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"62e70205-95ec-4669-b9cc-54397e6a1e88","controller":false}} 2025-09-11T20:50:05.418535109Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-0-26r5w", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T20:50:05.594414277Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "1m0s"} 2025-09-11T20:50:05.594597481Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-11T20:50:05.600961949Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-0", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T20:50:05.607007237Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-09-11T20:50:05Z"} 2025-09-11T20:50:05.607032917Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-1"} time="2025-09-11T20:50:05Z" level=info msg="invalidating lease" time="2025-09-11T20:50:05Z" level=info msg="getting lease" 2025-09-11T20:50:05.610532261Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-2"} time="2025-09-11T20:50:05Z" level=info msg="invalidating lease" time="2025-09-11T20:50:05Z" level=info msg="getting lease" 2025-09-11T20:50:05.614131338Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-3"} time="2025-09-11T20:50:05Z" level=info msg="invalidating lease" time="2025-09-11T20:50:05Z" level=info msg="getting lease" 2025-09-11T20:50:05.616983107Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-0"} 2025-09-11T20:50:05.620187184Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-0-26r5w", "kind": "FenceAgentsRemediation", "namespace": "openshift-machine-api"} time="2025-09-11T20:50:05Z" level=info msg="getting lease" 2025-09-11T20:50:05.620234049Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-escalation", "lease name": "node-worker-0-0", "NHC is lease owner": true, "lease expiration time": "1m0s"} 2025-09-11T20:50:05.620254806Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-09-11T20:50:05Z"} 2025-09-11T20:50:05.623677731Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "59.379749875s"} 2025-09-11T20:50:06.83489334Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"62e70205-95ec-4669-b9cc-54397e6a1e88","controller":false}} 2025-09-11T20:50:06.834927951Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-0-26r5w", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T20:50:06.834939489Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"62e70205-95ec-4669-b9cc-54397e6a1e88","controller":false}} 2025-09-11T20:50:06.83494311Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-0-26r5w", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T20:50:06.834975092Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-11T20:50:06.849357509Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-0", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T20:50:06.8577753Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "False", "reason": "", "message": "", "lastTransition": "2025-09-11T20:50:06Z"} 2025-09-11T20:50:06.857812685Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-1"} time="2025-09-11T20:50:06Z" level=info msg="invalidating lease" time="2025-09-11T20:50:06Z" level=info msg="getting lease" 2025-09-11T20:50:06.958110994Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-2"} time="2025-09-11T20:50:06Z" level=info msg="invalidating lease" time="2025-09-11T20:50:06Z" level=info msg="getting lease" 2025-09-11T20:50:06.962006139Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-3"} time="2025-09-11T20:50:07Z" level=info msg="invalidating lease" time="2025-09-11T20:50:07Z" level=info msg="getting lease" 2025-09-11T20:50:07.058327214Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-0"} 2025-09-11T20:50:07.063931941Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-0-26r5w", "kind": "FenceAgentsRemediation", "namespace": "openshift-machine-api"} time="2025-09-11T20:50:07Z" level=info msg="getting lease" 2025-09-11T20:50:07.063994951Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-escalation", "lease name": "node-worker-0-0", "NHC is lease owner": true, "lease expiration time": "1m0s"} 2025-09-11T20:50:07.064014489Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "False", "reason": "", "message": "", "lastTransition": "2025-09-11T20:50:06Z"} 2025-09-11T20:50:07.064026579Z INFO controllers.NodeHealthCheck remediation failed {"NodeHealthCheck name": "nhc-escalation"} 2025-09-11T20:50:07.064030652Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "False", "reason": "", "message": "", "lastTransition": "2025-09-11T20:50:06Z"} 2025-09-11T20:50:07.071683085Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"62e70205-95ec-4669-b9cc-54397e6a1e88","controller":false}} 2025-09-11T20:50:07.071735972Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-0-26r5w", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T20:50:07.071750444Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"62e70205-95ec-4669-b9cc-54397e6a1e88","controller":false}} 2025-09-11T20:50:07.071756661Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-0-26r5w", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T20:50:07.158927304Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-escalation", "new status": {"observedNodes":4,"healthyNodes":3,"unhealthyNodes":[{"name":"worker-0-0","remediations":[{"resource":{"kind":"FenceAgentsRemediation","namespace":"openshift-machine-api","name":"worker-0-0-26r5w","uid":"7e232bbc-e61c-4af3-aebe-728ec615e662","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1"},"started":"2025-09-11T20:50:05Z","timedOut":"2025-09-11T20:50:07Z","templateName":"fenceagentsremediationtemplate-test"}]}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-09-11T20:50:05Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Remediating","reason":"NHC is remediating 1 nodes","lastUpdateTime":"2025-09-11T20:50:05Z"}, "patch": "{\"status\":{\"unhealthyNodes\":[{\"name\":\"worker-0-0\",\"remediations\":[{\"resource\":{\"apiVersion\":\"fence-agents-remediation.medik8s.io/v1alpha1\",\"kind\":\"FenceAgentsRemediation\",\"name\":\"worker-0-0-26r5w\",\"namespace\":\"openshift-machine-api\",\"uid\":\"7e232bbc-e61c-4af3-aebe-728ec615e662\"},\"started\":\"2025-09-11T20:50:05Z\",\"templateName\":\"fenceagentsremediationtemplate-test\",\"timedOut\":\"2025-09-11T20:50:07Z\"}]}]}}"} 2025-09-11T20:50:07.36569898Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "1s"} 2025-09-11T20:50:07.365800361Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-11T20:50:07.372617748Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-0", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T20:50:07.378834597Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "False", "reason": "", "message": "", "lastTransition": "2025-09-11T20:50:06Z"} 2025-09-11T20:50:07.378866794Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-1"} time="2025-09-11T20:50:07Z" level=info msg="invalidating lease" time="2025-09-11T20:50:07Z" level=info msg="getting lease" 2025-09-11T20:50:07.382244792Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-2"} time="2025-09-11T20:50:07Z" level=info msg="invalidating lease" time="2025-09-11T20:50:07Z" level=info msg="getting lease" 2025-09-11T20:50:07.385183354Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-3"} time="2025-09-11T20:50:07Z" level=info msg="invalidating lease" time="2025-09-11T20:50:07Z" level=info msg="getting lease" 2025-09-11T20:50:07.388300479Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-0"} 2025-09-11T20:50:07.391724029Z INFO controllers.NodeHealthCheck.resource manager Attempting to obtain Node Lease {"Node name": "worker-0-0"} time="2025-09-11T20:50:07Z" level=info msg="request lease" time="2025-09-11T20:50:07Z" level=info msg="getting lease" time="2025-09-11T20:50:07Z" level=info msg="renew lease owned by NodeHealthCheck-nhc-escalation setAcquireTime=false" 2025-09-11T20:50:07.394431595Z INFO controllers.NodeHealthCheck.resource manager Creating a remediation CR {"CR name": "", "CR kind": "SelfNodeRemediation", "namespace": "openshift-machine-api"} 2025-09-11T20:50:07.401485331Z DEBUG events [remediation] Created remediation object for node worker-0-0 {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-escalation","uid":"62e70205-95ec-4669-b9cc-54397e6a1e88","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"449753"}, "reason": "RemediationCreated"} 2025-09-11T20:50:07.480746644Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-escalation", "new status": {"observedNodes":4,"healthyNodes":3,"unhealthyNodes":[{"name":"worker-0-0","remediations":[{"resource":{"kind":"FenceAgentsRemediation","namespace":"openshift-machine-api","name":"worker-0-0-26r5w","uid":"7e232bbc-e61c-4af3-aebe-728ec615e662","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1"},"started":"2025-09-11T20:50:05Z","timedOut":"2025-09-11T20:50:07Z","templateName":"fenceagentsremediationtemplate-test"},{"resource":{"kind":"SelfNodeRemediation","namespace":"openshift-machine-api","name":"worker-0-0-7bn6v","uid":"9c4ccadb-c80f-41a3-af55-3834ac645da7","apiVersion":"self-node-remediation.medik8s.io/v1alpha1"},"started":"2025-09-11T20:50:07Z","templateName":"selfnoderemediationtemplate-sample"}]}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-09-11T20:50:05Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Remediating","reason":"NHC is remediating 1 nodes","lastUpdateTime":"2025-09-11T20:50:07Z"}, "patch": "{\"status\":{\"unhealthyNodes\":[{\"name\":\"worker-0-0\",\"remediations\":[{\"resource\":{\"apiVersion\":\"fence-agents-remediation.medik8s.io/v1alpha1\",\"kind\":\"FenceAgentsRemediation\",\"name\":\"worker-0-0-26r5w\",\"namespace\":\"openshift-machine-api\",\"uid\":\"7e232bbc-e61c-4af3-aebe-728ec615e662\"},\"started\":\"2025-09-11T20:50:05Z\",\"templateName\":\"fenceagentsremediationtemplate-test\",\"timedOut\":\"2025-09-11T20:50:07Z\"},{\"resource\":{\"apiVersion\":\"self-node-remediation.medik8s.io/v1alpha1\",\"kind\":\"SelfNodeRemediation\",\"name\":\"worker-0-0-7bn6v\",\"namespace\":\"openshift-machine-api\",\"uid\":\"9c4ccadb-c80f-41a3-af55-3834ac645da7\"},\"started\":\"2025-09-11T20:50:07Z\",\"templateName\":\"selfnoderemediationtemplate-sample\"}]}]}}"} 2025-09-11T20:50:07.610064408Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"62e70205-95ec-4669-b9cc-54397e6a1e88","controller":false}} 2025-09-11T20:50:07.610138231Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-0-7bn6v", "Remediation CR Kind": "SelfNodeRemediation"} 2025-09-11T20:50:07.610152738Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"62e70205-95ec-4669-b9cc-54397e6a1e88","controller":false}} 2025-09-11T20:50:07.61015837Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-0-7bn6v", "Remediation CR Kind": "SelfNodeRemediation"} 2025-09-11T20:50:07.614688729Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"62e70205-95ec-4669-b9cc-54397e6a1e88","controller":false}} 2025-09-11T20:50:07.614713414Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-0-7bn6v", "Remediation CR Kind": "SelfNodeRemediation"} 2025-09-11T20:50:07.614721246Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"62e70205-95ec-4669-b9cc-54397e6a1e88","controller":false}} 2025-09-11T20:50:07.61472499Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-0-7bn6v", "Remediation CR Kind": "SelfNodeRemediation"} 2025-09-11T20:50:07.641020701Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"62e70205-95ec-4669-b9cc-54397e6a1e88","controller":false}} 2025-09-11T20:50:07.641064191Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-0-7bn6v", "Remediation CR Kind": "SelfNodeRemediation"} 2025-09-11T20:50:07.641093362Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"62e70205-95ec-4669-b9cc-54397e6a1e88","controller":false}} 2025-09-11T20:50:07.641097878Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-0-7bn6v", "Remediation CR Kind": "SelfNodeRemediation"} 2025-09-11T20:50:07.687117762Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "3m0s"} 2025-09-11T20:50:07.6872039Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-11T20:50:07.692166082Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-0", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T20:50:07.698739499Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "False", "reason": "", "message": "", "lastTransition": "2025-09-11T20:50:06Z"} 2025-09-11T20:50:07.698770826Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-09-11T20:50:07Z"} 2025-09-11T20:50:07.698777402Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-1"} time="2025-09-11T20:50:07Z" level=info msg="invalidating lease" time="2025-09-11T20:50:07Z" level=info msg="getting lease" 2025-09-11T20:50:07.799114183Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-2"} time="2025-09-11T20:50:07Z" level=info msg="invalidating lease" time="2025-09-11T20:50:07Z" level=info msg="getting lease" 2025-09-11T20:50:07.802721047Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-3"} time="2025-09-11T20:50:07Z" level=info msg="invalidating lease" time="2025-09-11T20:50:07Z" level=info msg="getting lease" 2025-09-11T20:50:07.806280143Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-0"} 2025-09-11T20:50:07.80965622Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-0-7bn6v", "kind": "SelfNodeRemediation", "namespace": "openshift-machine-api"} time="2025-09-11T20:50:07Z" level=info msg="getting lease" 2025-09-11T20:50:07.809710427Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-escalation", "lease name": "node-worker-0-0", "NHC is lease owner": true, "lease expiration time": "3m0s"} 2025-09-11T20:50:07.809730825Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-09-11T20:50:07Z"} 2025-09-11T20:50:07.81310794Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "2m59.1902723s"} 2025-09-11T20:50:08.366572781Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-11T20:50:08.372925837Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-0", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T20:50:08.378711866Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "False", "reason": "", "message": "", "lastTransition": "2025-09-11T20:50:06Z"} 2025-09-11T20:50:08.378734883Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-09-11T20:50:07Z"} 2025-09-11T20:50:08.378740795Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-1"} time="2025-09-11T20:50:08Z" level=info msg="invalidating lease" time="2025-09-11T20:50:08Z" level=info msg="getting lease" 2025-09-11T20:50:08.382505522Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-2"} time="2025-09-11T20:50:08Z" level=info msg="invalidating lease" time="2025-09-11T20:50:08Z" level=info msg="getting lease" 2025-09-11T20:50:08.386416446Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-3"} time="2025-09-11T20:50:08Z" level=info msg="invalidating lease" time="2025-09-11T20:50:08Z" level=info msg="getting lease" 2025-09-11T20:50:08.389645185Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-0"} 2025-09-11T20:50:08.392994745Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-0-7bn6v", "kind": "SelfNodeRemediation", "namespace": "openshift-machine-api"} time="2025-09-11T20:50:08Z" level=info msg="getting lease" 2025-09-11T20:50:08.393051935Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-escalation", "lease name": "node-worker-0-0", "NHC is lease owner": true, "lease expiration time": "3m0s"} 2025-09-11T20:50:08.393078664Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-09-11T20:50:07Z"} 2025-09-11T20:50:08.396535186Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "2m58.606931255s"} 2025-09-11T20:50:08.647317538Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"62e70205-95ec-4669-b9cc-54397e6a1e88","controller":false}} 2025-09-11T20:50:08.647352621Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-0-7bn6v", "Remediation CR Kind": "SelfNodeRemediation"} 2025-09-11T20:50:08.647365215Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"62e70205-95ec-4669-b9cc-54397e6a1e88","controller":false}} 2025-09-11T20:50:08.647369118Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-0-7bn6v", "Remediation CR Kind": "SelfNodeRemediation"} 2025-09-11T20:50:08.647404737Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-11T20:50:08.653514035Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-0", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T20:50:08.659574096Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "False", "reason": "", "message": "", "lastTransition": "2025-09-11T20:50:06Z"} 2025-09-11T20:50:08.659598899Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-09-11T20:50:07Z"} 2025-09-11T20:50:08.659604145Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-1"} time="2025-09-11T20:50:08Z" level=info msg="invalidating lease" time="2025-09-11T20:50:08Z" level=info msg="getting lease" 2025-09-11T20:50:08.663511496Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-2"} time="2025-09-11T20:50:08Z" level=info msg="invalidating lease" time="2025-09-11T20:50:08Z" level=info msg="getting lease" 2025-09-11T20:50:08.667293183Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-3"} time="2025-09-11T20:50:08Z" level=info msg="invalidating lease" time="2025-09-11T20:50:08Z" level=info msg="getting lease" 2025-09-11T20:50:08.670969282Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-0"} 2025-09-11T20:50:08.674355053Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-0-7bn6v", "kind": "SelfNodeRemediation", "namespace": "openshift-machine-api"} time="2025-09-11T20:50:08Z" level=info msg="getting lease" 2025-09-11T20:50:08.67440588Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-escalation", "lease name": "node-worker-0-0", "NHC is lease owner": true, "lease expiration time": "3m0s"} 2025-09-11T20:50:08.674424126Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-09-11T20:50:07Z"} 2025-09-11T20:50:08.67833873Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "2m58.325579366s"} 2025-09-11T20:52:09.010116763Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"62e70205-95ec-4669-b9cc-54397e6a1e88","controller":false}} 2025-09-11T20:52:09.01018646Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-0-7bn6v", "Remediation CR Kind": "SelfNodeRemediation"} 2025-09-11T20:52:09.010206547Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"62e70205-95ec-4669-b9cc-54397e6a1e88","controller":false}} 2025-09-11T20:52:09.010211204Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-0-7bn6v", "Remediation CR Kind": "SelfNodeRemediation"} 2025-09-11T20:52:09.010240641Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-11T20:52:09.015432763Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-0", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T20:52:09.029586916Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "False", "reason": "", "message": "", "lastTransition": "2025-09-11T20:50:06Z"} 2025-09-11T20:52:09.029623989Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-09-11T20:50:07Z"} 2025-09-11T20:52:09.029632335Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-1"} time="2025-09-11T20:52:09Z" level=info msg="invalidating lease" time="2025-09-11T20:52:09Z" level=info msg="getting lease" 2025-09-11T20:52:09.130502563Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-2"} time="2025-09-11T20:52:09Z" level=info msg="invalidating lease" time="2025-09-11T20:52:09Z" level=info msg="getting lease" 2025-09-11T20:52:09.13421548Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-3"} time="2025-09-11T20:52:09Z" level=info msg="invalidating lease" time="2025-09-11T20:52:09Z" level=info msg="getting lease" 2025-09-11T20:52:09.137734092Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-0"} 2025-09-11T20:52:09.141275425Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-0-7bn6v", "kind": "SelfNodeRemediation", "namespace": "openshift-machine-api"} time="2025-09-11T20:52:09Z" level=info msg="getting lease" 2025-09-11T20:52:09.141323932Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-escalation", "lease name": "node-worker-0-0", "NHC is lease owner": true, "lease expiration time": "3m0s"} time="2025-09-11T20:52:09Z" level=info msg="request lease" time="2025-09-11T20:52:09Z" level=info msg="getting lease" time="2025-09-11T20:52:09Z" level=info msg="renew lease owned by NodeHealthCheck-nhc-escalation setAcquireTime=false" 2025-09-11T20:52:09.144002217Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-09-11T20:50:07Z"} 2025-09-11T20:52:09.230849554Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "57.856002835s"} 2025-09-11T20:52:29.188570811Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"62e70205-95ec-4669-b9cc-54397e6a1e88","controller":false}} 2025-09-11T20:52:29.188606022Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-0-7bn6v", "Remediation CR Kind": "SelfNodeRemediation"} 2025-09-11T20:52:29.188617633Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"62e70205-95ec-4669-b9cc-54397e6a1e88","controller":false}} 2025-09-11T20:52:29.188621486Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-0-7bn6v", "Remediation CR Kind": "SelfNodeRemediation"} 2025-09-11T20:52:29.188653099Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-11T20:52:29.198388129Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-0", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T20:52:29.204741209Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "False", "reason": "", "message": "", "lastTransition": "2025-09-11T20:50:06Z"} 2025-09-11T20:52:29.204773603Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-09-11T20:52:29Z"} 2025-09-11T20:52:29.204779446Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-1"} time="2025-09-11T20:52:29Z" level=info msg="invalidating lease" time="2025-09-11T20:52:29Z" level=info msg="getting lease" 2025-09-11T20:52:29.208013818Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-2"} time="2025-09-11T20:52:29Z" level=info msg="invalidating lease" time="2025-09-11T20:52:29Z" level=info msg="getting lease" 2025-09-11T20:52:29.211064352Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-3"} time="2025-09-11T20:52:29Z" level=info msg="invalidating lease" time="2025-09-11T20:52:29Z" level=info msg="getting lease" 2025-09-11T20:52:29.214190433Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-0"} 2025-09-11T20:52:29.218069077Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-0-7bn6v", "kind": "SelfNodeRemediation", "namespace": "openshift-machine-api"} time="2025-09-11T20:52:29Z" level=info msg="getting lease" 2025-09-11T20:52:29.21813144Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-escalation", "lease name": "node-worker-0-0", "NHC is lease owner": true, "lease expiration time": "3m0s"} 2025-09-11T20:52:29.218150847Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-09-11T20:52:29Z"} 2025-09-11T20:52:29.222183425Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "37.781852224s"} 2025-09-11T20:53:07.003900368Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-11T20:53:07.009957546Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-0", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T20:53:07.017894652Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "False", "reason": "", "message": "", "lastTransition": "2025-09-11T20:50:06Z"} 2025-09-11T20:53:07.017922579Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-09-11T20:52:29Z"} 2025-09-11T20:53:07.017929522Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-2"} time="2025-09-11T20:53:07Z" level=info msg="invalidating lease" time="2025-09-11T20:53:07Z" level=info msg="getting lease" 2025-09-11T20:53:07.022555385Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-3"} time="2025-09-11T20:53:07Z" level=info msg="invalidating lease" time="2025-09-11T20:53:07Z" level=info msg="getting lease" 2025-09-11T20:53:07.028713615Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-1"} time="2025-09-11T20:53:07Z" level=info msg="invalidating lease" time="2025-09-11T20:53:07Z" level=info msg="getting lease" 2025-09-11T20:53:07.032549426Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-0"} 2025-09-11T20:53:07.03574509Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-0-7bn6v", "kind": "SelfNodeRemediation", "namespace": "openshift-machine-api"} time="2025-09-11T20:53:07Z" level=info msg="getting lease" 2025-09-11T20:53:07.035800833Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-escalation", "lease name": "node-worker-0-0", "NHC is lease owner": true, "lease expiration time": "3m0s"} 2025-09-11T20:53:07.035823354Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-09-11T20:52:29Z"} 2025-09-11T20:53:07.035834762Z INFO controllers.NodeHealthCheck remediation timed out {"NodeHealthCheck name": "nhc-escalation"} 2025-09-11T20:53:07.035838331Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-09-11T20:52:29Z"} 2025-09-11T20:53:07.035841816Z INFO controllers.NodeHealthCheck skipping timeout annotation on remediation CR: Succeeded condition is True {"NodeHealthCheck name": "nhc-escalation", "CR name": "worker-0-0-7bn6v"} 2025-09-11T20:53:07.039258117Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-escalation", "new status": {"observedNodes":4,"healthyNodes":3,"unhealthyNodes":[{"name":"worker-0-0","remediations":[{"resource":{"kind":"FenceAgentsRemediation","namespace":"openshift-machine-api","name":"worker-0-0-26r5w","uid":"7e232bbc-e61c-4af3-aebe-728ec615e662","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1"},"started":"2025-09-11T20:50:05Z","timedOut":"2025-09-11T20:50:07Z","templateName":"fenceagentsremediationtemplate-test"},{"resource":{"kind":"SelfNodeRemediation","namespace":"openshift-machine-api","name":"worker-0-0-7bn6v","uid":"9c4ccadb-c80f-41a3-af55-3834ac645da7","apiVersion":"self-node-remediation.medik8s.io/v1alpha1"},"started":"2025-09-11T20:50:07Z","timedOut":"2025-09-11T20:53:07Z","templateName":"selfnoderemediationtemplate-sample"}]}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-09-11T20:50:05Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Remediating","reason":"NHC is remediating 1 nodes","lastUpdateTime":"2025-09-11T20:50:07Z"}, "patch": "{\"status\":{\"unhealthyNodes\":[{\"name\":\"worker-0-0\",\"remediations\":[{\"resource\":{\"apiVersion\":\"fence-agents-remediation.medik8s.io/v1alpha1\",\"kind\":\"FenceAgentsRemediation\",\"name\":\"worker-0-0-26r5w\",\"namespace\":\"openshift-machine-api\",\"uid\":\"7e232bbc-e61c-4af3-aebe-728ec615e662\"},\"started\":\"2025-09-11T20:50:05Z\",\"templateName\":\"fenceagentsremediationtemplate-test\",\"timedOut\":\"2025-09-11T20:50:07Z\"},{\"resource\":{\"apiVersion\":\"self-node-remediation.medik8s.io/v1alpha1\",\"kind\":\"SelfNodeRemediation\",\"name\":\"worker-0-0-7bn6v\",\"namespace\":\"openshift-machine-api\",\"uid\":\"9c4ccadb-c80f-41a3-af55-3834ac645da7\"},\"started\":\"2025-09-11T20:50:07Z\",\"templateName\":\"selfnoderemediationtemplate-sample\",\"timedOut\":\"2025-09-11T20:53:07Z\"}]}]}}"} 2025-09-11T20:53:07.244660976Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "1s"} 2025-09-11T20:53:08.245265243Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-11T20:53:08.251506409Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-0", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T20:53:08.258184667Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "False", "reason": "", "message": "", "lastTransition": "2025-09-11T20:50:06Z"} 2025-09-11T20:53:08.258207142Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-09-11T20:52:29Z"} 2025-09-11T20:53:08.258213008Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-1"} time="2025-09-11T20:53:08Z" level=info msg="invalidating lease" time="2025-09-11T20:53:08Z" level=info msg="getting lease" 2025-09-11T20:53:08.261920963Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-2"} time="2025-09-11T20:53:08Z" level=info msg="invalidating lease" time="2025-09-11T20:53:08Z" level=info msg="getting lease" 2025-09-11T20:53:08.265315691Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-3"} time="2025-09-11T20:53:08Z" level=info msg="invalidating lease" time="2025-09-11T20:53:08Z" level=info msg="getting lease" 2025-09-11T20:53:08.268256193Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-0"} 2025-09-11T20:53:08.268271316Z ERROR controllers.NodeHealthCheck Remediation timed out, and no template left to try {"NodeHealthCheck name": "nhc-escalation", "error": "didn't find a template to use for NHC nhc-escalation and node worker-0-0"} github.com/medik8s/node-healthcheck-operator/controllers.(*NodeHealthCheckReconciler).remediate /app/node-healthcheck-operator/controllers/nodehealthcheck_controller.go:536 github.com/medik8s/node-healthcheck-operator/controllers.(*NodeHealthCheckReconciler).Reconcile /app/node-healthcheck-operator/controllers/nodehealthcheck_controller.go:365 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Reconcile /app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:119 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler /app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:334 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem /app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:294 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.2 /app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:255 2025-09-11T20:53:08.268373493Z DEBUG events [remediation] Remediation timed out, and no template left to try. didn't find a template to use for NHC nhc-escalation and node worker-0-0 {"type": "Warning", "object": {"kind":"NodeHealthCheck","name":"nhc-escalation","uid":"62e70205-95ec-4669-b9cc-54397e6a1e88","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"451174"}, "reason": "NoTemplateLeft"} 2025-09-11T20:53:08.35954691Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "47h57m56.640643601s"} 2025-09-11T20:55:07.824007612Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0-0", "NHC": "nhc-escalation"} 2025-09-11T20:55:07.824126783Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0-0", "NHC": "nhc-escalation"} 2025-09-11T20:55:07.824252588Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-11T20:55:07.833680368Z INFO controllers.NodeHealthCheck Node is going to match unhealthy condition {"node": "worker-0-0", "condition type": "Ready", "condition status": "False", "duration left": "29.166322249s"} 2025-09-11T20:55:07.846746196Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "False", "reason": "", "message": "", "lastTransition": "2025-09-11T20:50:06Z"} 2025-09-11T20:55:07.84678473Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-09-11T20:52:29Z"} 2025-09-11T20:55:07.84679185Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-1"} time="2025-09-11T20:55:07Z" level=info msg="invalidating lease" time="2025-09-11T20:55:07Z" level=info msg="getting lease" 2025-09-11T20:55:07.948086534Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-2"} time="2025-09-11T20:55:07Z" level=info msg="invalidating lease" time="2025-09-11T20:55:07Z" level=info msg="getting lease" 2025-09-11T20:55:07.952277971Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-3"} time="2025-09-11T20:55:07Z" level=info msg="invalidating lease" time="2025-09-11T20:55:07Z" level=info msg="getting lease" 2025-09-11T20:55:07.955834546Z INFO controllers.NodeHealthCheck Ignoring node, because it was unhealthy, and is likely to be unhealthy again. {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-0"} 2025-09-11T20:55:07.9560483Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "30.166322249s"} 2025-09-11T20:55:08.246781556Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0-0", "NHC": "nhc-escalation"} 2025-09-11T20:55:08.246967534Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0-0", "NHC": "nhc-escalation"} 2025-09-11T20:55:08.247009094Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-11T20:55:08.279446614Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "False", "reason": "", "message": "", "lastTransition": "2025-09-11T20:50:06Z"} 2025-09-11T20:55:08.279483202Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-09-11T20:52:29Z"} 2025-09-11T20:55:08.279489374Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-0"} 2025-09-11T20:55:08.293568412Z INFO controllers.NodeHealthCheck.resource manager deleted remediation CR {"name": "worker-0-0-26r5w"} 2025-09-11T20:55:08.293604164Z DEBUG events [remediation] Deleted remediation CR of kind FenceAgentsRemediation with name worker-0-0-26r5w {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-escalation","uid":"62e70205-95ec-4669-b9cc-54397e6a1e88","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"451174"}, "reason": "RemediationRemoved"} 2025-09-11T20:55:08.294093987Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"62e70205-95ec-4669-b9cc-54397e6a1e88","controller":false}} 2025-09-11T20:55:08.294146956Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-0-26r5w", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T20:55:08.294164262Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"62e70205-95ec-4669-b9cc-54397e6a1e88","controller":false}} 2025-09-11T20:55:08.294174047Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-0-26r5w", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T20:55:08.301139126Z INFO controllers.NodeHealthCheck.resource manager deleted remediation CR {"name": "worker-0-0-7bn6v"} 2025-09-11T20:55:08.301165687Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-1"} 2025-09-11T20:55:08.301533757Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"62e70205-95ec-4669-b9cc-54397e6a1e88","controller":false}} 2025-09-11T20:55:08.301577025Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-0-7bn6v", "Remediation CR Kind": "SelfNodeRemediation"} 2025-09-11T20:55:08.301585316Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"62e70205-95ec-4669-b9cc-54397e6a1e88","controller":false}} 2025-09-11T20:55:08.301590797Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-0-7bn6v", "Remediation CR Kind": "SelfNodeRemediation"} 2025-09-11T20:55:08.301615021Z DEBUG events [remediation] Deleted remediation CR of kind SelfNodeRemediation with name worker-0-0-7bn6v {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-escalation","uid":"62e70205-95ec-4669-b9cc-54397e6a1e88","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"451174"}, "reason": "RemediationRemoved"} 2025-09-11T20:55:08.318473243Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"62e70205-95ec-4669-b9cc-54397e6a1e88","controller":false}} 2025-09-11T20:55:08.318509188Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-0-26r5w", "Remediation CR Kind": "FenceAgentsRemediation"} time="2025-09-11T20:55:08Z" level=info msg="invalidating lease" time="2025-09-11T20:55:08Z" level=info msg="getting lease" 2025-09-11T20:55:08.379614361Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-2"} time="2025-09-11T20:55:08Z" level=info msg="invalidating lease" time="2025-09-11T20:55:08Z" level=info msg="getting lease" 2025-09-11T20:55:08.38358125Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-3"} time="2025-09-11T20:55:08Z" level=info msg="invalidating lease" time="2025-09-11T20:55:08Z" level=info msg="getting lease" 2025-09-11T20:55:08.38702014Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-escalation", "new status": {"observedNodes":4,"healthyNodes":3,"unhealthyNodes":[{"name":"worker-0-0","remediations":[{"resource":{"kind":"FenceAgentsRemediation","namespace":"openshift-machine-api","name":"worker-0-0-26r5w","uid":"7e232bbc-e61c-4af3-aebe-728ec615e662","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1"},"started":"2025-09-11T20:50:05Z","timedOut":"2025-09-11T20:50:07Z","templateName":"fenceagentsremediationtemplate-test"},{"resource":{"kind":"SelfNodeRemediation","namespace":"openshift-machine-api","name":"worker-0-0-7bn6v","uid":"9c4ccadb-c80f-41a3-af55-3834ac645da7","apiVersion":"self-node-remediation.medik8s.io/v1alpha1"},"started":"2025-09-11T20:50:07Z","timedOut":"2025-09-11T20:53:07Z","templateName":"selfnoderemediationtemplate-sample"}],"conditionsHealthyTimestamp":"2025-09-11T20:55:08Z"}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-09-11T20:50:05Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Remediating","reason":"NHC is remediating 1 nodes","lastUpdateTime":"2025-09-11T20:53:07Z"}, "patch": "{\"status\":{\"unhealthyNodes\":[{\"conditionsHealthyTimestamp\":\"2025-09-11T20:55:08Z\",\"name\":\"worker-0-0\",\"remediations\":[{\"resource\":{\"apiVersion\":\"fence-agents-remediation.medik8s.io/v1alpha1\",\"kind\":\"FenceAgentsRemediation\",\"name\":\"worker-0-0-26r5w\",\"namespace\":\"openshift-machine-api\",\"uid\":\"7e232bbc-e61c-4af3-aebe-728ec615e662\"},\"started\":\"2025-09-11T20:50:05Z\",\"templateName\":\"fenceagentsremediationtemplate-test\",\"timedOut\":\"2025-09-11T20:50:07Z\"},{\"resource\":{\"apiVersion\":\"self-node-remediation.medik8s.io/v1alpha1\",\"kind\":\"SelfNodeRemediation\",\"name\":\"worker-0-0-7bn6v\",\"namespace\":\"openshift-machine-api\",\"uid\":\"9c4ccadb-c80f-41a3-af55-3834ac645da7\"},\"started\":\"2025-09-11T20:50:07Z\",\"templateName\":\"selfnoderemediationtemplate-sample\",\"timedOut\":\"2025-09-11T20:53:07Z\"}]}]}}"} 2025-09-11T20:55:08.59312729Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "11s"} 2025-09-11T20:55:08.593279168Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-11T20:55:08.606676069Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-2"} time="2025-09-11T20:55:08Z" level=info msg="invalidating lease" time="2025-09-11T20:55:08Z" level=info msg="getting lease" 2025-09-11T20:55:08.610144697Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-3"} time="2025-09-11T20:55:08Z" level=info msg="invalidating lease" time="2025-09-11T20:55:08Z" level=info msg="getting lease" 2025-09-11T20:55:08.613226013Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-0"} 2025-09-11T20:55:08.618728063Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-1"} time="2025-09-11T20:55:08Z" level=info msg="invalidating lease" time="2025-09-11T20:55:08Z" level=info msg="getting lease" 2025-09-11T20:55:08.70855523Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "11s"} 2025-09-11T20:55:10.360918572Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"62e70205-95ec-4669-b9cc-54397e6a1e88","controller":false}} 2025-09-11T20:55:10.361018933Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-0-7bn6v", "Remediation CR Kind": "SelfNodeRemediation"} 2025-09-11T20:55:10.361127188Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-11T20:55:10.379405953Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-3"} time="2025-09-11T20:55:10Z" level=info msg="invalidating lease" time="2025-09-11T20:55:10Z" level=info msg="getting lease" 2025-09-11T20:55:10.383526167Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-0"} time="2025-09-11T20:55:10Z" level=info msg="invalidating lease" time="2025-09-11T20:55:10Z" level=info msg="getting lease" 2025-09-11T20:55:10.483060407Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-1"} time="2025-09-11T20:55:10Z" level=info msg="invalidating lease" time="2025-09-11T20:55:10Z" level=info msg="getting lease" 2025-09-11T20:55:10.579673748Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-2"} time="2025-09-11T20:55:10Z" level=info msg="invalidating lease" time="2025-09-11T20:55:10Z" level=info msg="getting lease" 2025-09-11T20:55:10.68128348Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-escalation", "new status": {"observedNodes":4,"healthyNodes":4,"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-09-11T20:50:05Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Enabled","reason":"NHC is enabled, no ongoing remediation","lastUpdateTime":"2025-09-11T20:55:08Z"}, "patch": "{\"status\":{\"healthyNodes\":4,\"phase\":\"Enabled\",\"reason\":\"NHC is enabled, no ongoing remediation\",\"unhealthyNodes\":null}}"} 2025-09-11T20:55:10.886995968Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "0s"} 2025-09-11T20:55:19.594361207Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-11T20:55:19.607559627Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-0"} time="2025-09-11T20:55:19Z" level=info msg="invalidating lease" time="2025-09-11T20:55:19Z" level=info msg="getting lease" 2025-09-11T20:55:19.610736623Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-1"} time="2025-09-11T20:55:19Z" level=info msg="invalidating lease" time="2025-09-11T20:55:19Z" level=info msg="getting lease" 2025-09-11T20:55:19.613838104Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-2"} time="2025-09-11T20:55:19Z" level=info msg="invalidating lease" time="2025-09-11T20:55:19Z" level=info msg="getting lease" 2025-09-11T20:55:19.616387754Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-3"} time="2025-09-11T20:55:19Z" level=info msg="invalidating lease" time="2025-09-11T20:55:19Z" level=info msg="getting lease" 2025-09-11T20:55:19.619057251Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "0s"}