[kni@titan45 ~]$ oc project openshift-workload-availability Already on project "openshift-workload-availability" on server "https://api.ocp-edge-cluster-0.qe.lab.redhat.com:6443". [kni@titan45 ~]$ oc get clusterversion NAME VERSION AVAILABLE PROGRESSING SINCE STATUS version 4.19.0-0.nightly-2025-09-11-184658 True False 10h Cluster version is 4.19.0-0.nightly-2025-09-11-184658 [kni@titan45 ~]$ oc get csv NAME DISPLAY VERSION REPLACES PHASE fence-agents-remediation.v0.6.0 Fence Agents Remediation Operator 0.6.0 fence-agents-remediation.v0.5.1 Succeeded node-healthcheck-operator.v0.10.0 Node Health Check Operator 0.10.0 node-healthcheck-operator.v0.9.1 Succeeded self-node-remediation.v0.10.2 Self Node Remediation Operator 0.10.2 self-node-remediation.v0.10.1 Succeeded [kni@titan45 ~]$ PODS=$(oc get pods -o name -n openshift-workload-availability | grep fence-agents-remediation-controller-manager) [kni@titan45 ~]$ echo $PODS pod/fence-agents-remediation-controller-manager-7bfcd47b77-k2dzj pod/fence-agents-remediation-controller-manager-7bfcd47b77-vs4c9 [kni@titan45 ~]$ for p in $PODS; do > echo "== $p" > oc get "$p" -n openshift-workload-availability -o json | jq .spec.nodeName > done == pod/fence-agents-remediation-controller-manager-7bfcd47b77-k2dzj "worker-0-1" == pod/fence-agents-remediation-controller-manager-7bfcd47b77-vs4c9 "worker-0-0" [kni@titan45 ~]$ PODS=$(oc get pods -o name -n openshift-workload-availability | grep self-node-remediation-controller-manager) [kni@titan45 ~]$ echo $PODS pod/self-node-remediation-controller-manager-866c766664-mrgqr [kni@titan45 ~]$ for p in $PODS; do > echo "== $p" > oc get "$p" -n openshift-workload-availability -o json | jq .spec.nodeName > done == pod/self-node-remediation-controller-manager-866c766664-mrgqr "worker-0-1" [kni@titan45 ~]$ [kni@titan45 ~]$ oc get fartemplate -o yaml apiVersion: v1 items: [] kind: List metadata: resourceVersion: "" [kni@titan45 ~]$ oc get nhc -o yaml apiVersion: v1 items: [] kind: List metadata: resourceVersion: "" [kni@titan45 ~]$ oc debug node/worker-0-2 -- chroot /host bash -c "date & uptime -s" Temporary namespace openshift-debug-qvrxj is created for debugging node... Starting pod/worker-0-2-debug-hzrlt ... To use host binaries, run `chroot /host` Fri Sep 12 20:06:37 UTC 2025 2025-09-12 15:41:03 Removing debug pod ... Temporary namespace openshift-debug-qvrxj was removed. [kni@titan45 ~]$ vi test.yaml [kni@titan45 ~]$ cat test.yaml apiVersion: fence-agents-remediation.medik8s.io/v1alpha1 kind: FenceAgentsRemediationTemplate metadata: name: fenceagentsremediationtemplate-test namespace: openshift-machine-api spec: template: spec: agent: fence_ipmilan retrycount: 1 retryinterval: 1s timeout: 1s nodeparameters: '--ipport': master-0-0: '6230' master-0-1: '6231' master-0-2: '6232' worker-0-0: '6233' worker-0-1: '6234' worker-0-2: '6235' sharedparameters: '--action': reboot '--lanplus': '' '--ip': 192.168.123.1 '--password': password '--username': admin --- apiVersion: self-node-remediation.medik8s.io/v1alpha1 kind: SelfNodeRemediationTemplate metadata: namespace: openshift-machine-api name: selfnoderemediationtemplate-sample spec: template: spec: remediationStrategy: Automatic --- apiVersion: remediation.medik8s.io/v1alpha1 kind: NodeHealthCheck metadata: name: nhc-escalation spec: minHealthy: 30% escalatingRemediations: - remediationTemplate: apiVersion: fence-agents-remediation.medik8s.io/v1alpha1 kind: FenceAgentsRemediationTemplate name: fenceagentsremediationtemplate-test namespace: openshift-machine-api order: -1 timeout: 60s - remediationTemplate: apiVersion: self-node-remediation.medik8s.io/v1alpha1 kind: SelfNodeRemediationTemplate name: selfnoderemediationtemplate-sample namespace: openshift-machine-api order: 0 timeout: 180s selector: matchExpressions: - key: node-role.kubernetes.io/worker operator: Exists unhealthyConditions: - type: Ready status: "False" duration: 30s - type: Ready status: Unknown duration: 30s [kni@titan45 ~]$ oc apply -f test.yaml fenceagentsremediationtemplate.fence-agents-remediation.medik8s.io/fenceagentsremediationtemplate-test created selfnoderemediationtemplate.self-node-remediation.medik8s.io/selfnoderemediationtemplate-sample created nodehealthcheck.remediation.medik8s.io/nhc-escalation created [kni@titan45 ~]$ oc get nodes -l 'node-role.kubernetes.io/worker' NAME STATUS ROLES AGE VERSION worker-0-0 Ready worker 11h v1.32.8 worker-0-1 Ready worker 11h v1.32.8 worker-0-2 Ready worker 11h v1.32.8 [kni@titan45 ~]$ oc get far --all-namespaces No resources found [kni@titan45 ~]$ oc get nodes -l 'node-role.kubernetes.io/worker' NAME STATUS ROLES AGE VERSION worker-0-0 Ready worker 11h v1.32.8 worker-0-1 Ready worker 11h v1.32.8 worker-0-2 NotReady worker 11h v1.32.8 [kni@titan45 ~]$ oc get far -n openshift-machine-api worker-0-2-rk64n -o yaml apiVersion: fence-agents-remediation.medik8s.io/v1alpha1 kind: FenceAgentsRemediation metadata: annotations: remediation.medik8s.io/nhc-timed-out: "2025-09-12T20:19:33Z" remediation.medik8s.io/node-name: worker-0-2 remediation.medik8s.io/template-name: fenceagentsremediationtemplate-test creationTimestamp: "2025-09-12T20:19:31Z" finalizers: - fence-agents-remediation.medik8s.io/far-finalizer generateName: worker-0-2- generation: 1 labels: app.kubernetes.io/part-of: node-healthcheck-controller name: worker-0-2-rk64n namespace: openshift-machine-api ownerReferences: - apiVersion: remediation.medik8s.io/v1alpha1 controller: false kind: NodeHealthCheck name: nhc-escalation uid: 3f92f519-39d3-40f3-9a79-d10f562cbd80 - apiVersion: machine.openshift.io/v1beta1 controller: false kind: Machine name: ocp-edge-cluster-0-5chbx-worker-0-t56hz uid: 21a2d8ef-0de7-4a36-9924-7cf4dce417b9 resourceVersion: "257132" uid: 936c44c9-b0e6-49a2-bb60-1c6d51bacc34 spec: agent: fence_ipmilan nodeparameters: --ipport: master-0-0: "6230" master-0-1: "6231" master-0-2: "6232" worker-0-0: "6233" worker-0-1: "6234" worker-0-2: "6235" remediationStrategy: ResourceDeletion retrycount: 1 retryinterval: 1s sharedSecretName: fence-agents-credentials-shared sharedparameters: --action: reboot --ip: 192.168.123.1 --lanplus: "" --password: password --username: admin timeout: 1s status: conditions: - lastTransitionTime: "2025-09-12T20:19:32Z" message: Fence agent command has failed reason: FenceAgentFailed status: "False" type: Processing - lastTransitionTime: "2025-09-12T20:19:32Z" message: Fence agent command has failed reason: FenceAgentFailed status: "False" type: FenceAgentActionSucceeded - lastTransitionTime: "2025-09-12T20:19:32Z" message: Fence agent command has failed reason: FenceAgentFailed status: "False" type: Succeeded lastUpdateTime: "2025-09-12T20:19:32Z" [kni@titan45 ~]$ oc get snr -n openshift-machine-api worker-0-2-vgzxt -o yaml apiVersion: self-node-remediation.medik8s.io/v1alpha1 kind: SelfNodeRemediation metadata: annotations: remediation.medik8s.io/node-name: worker-0-2 remediation.medik8s.io/template-name: selfnoderemediationtemplate-sample creationTimestamp: "2025-09-12T20:19:33Z" finalizers: - self-node-remediation.medik8s.io/snr-finalizer generateName: worker-0-2- generation: 1 labels: app.kubernetes.io/part-of: node-healthcheck-controller name: worker-0-2-vgzxt namespace: openshift-machine-api ownerReferences: - apiVersion: remediation.medik8s.io/v1alpha1 controller: false kind: NodeHealthCheck name: nhc-escalation uid: 3f92f519-39d3-40f3-9a79-d10f562cbd80 - apiVersion: machine.openshift.io/v1beta1 controller: false kind: Machine name: ocp-edge-cluster-0-5chbx-worker-0-t56hz uid: 21a2d8ef-0de7-4a36-9924-7cf4dce417b9 resourceVersion: "257155" uid: c2801905-bb13-4d94-a2f5-c2b1ec6f2710 spec: remediationStrategy: Automatic status: conditions: - lastTransitionTime: "2025-09-12T20:19:33Z" message: "" reason: RemediationStarted status: "True" type: Processing - lastTransitionTime: "2025-09-12T20:19:33Z" message: "" reason: RemediationStarted status: Unknown type: Succeeded phase: Pre-Reboot-Completed timeAssumedRebooted: "2025-09-12T20:21:33Z" [kni@titan45 ~]$ oc get snr --all-namespaces NAMESPACE NAME AGE openshift-machine-api worker-0-2-vgzxt 117s [kni@titan45 ~]$ oc get far --all-namespaces No resources found [kni@titan45 ~]$ oc get snr --all-namespaces No resources found [kni@titan45 ~]$ oc get nodes -l 'node-role.kubernetes.io/worker' NAME STATUS ROLES AGE VERSION worker-0-0 Ready worker 11h v1.32.8 worker-0-1 Ready worker 11h v1.32.8 worker-0-2 Ready worker 11h v1.32.8 [kni@titan45 ~]$ oc debug node/worker-0-2 -- chroot /host bash -c "date & uptime -s" Temporary namespace openshift-debug-59v9q is created for debugging node... Starting pod/worker-0-2-debug-gvbwz ... To use host binaries, run `chroot /host` Fri Sep 12 20:23:02 UTC 2025 2025-09-12 20:20:22 Removing debug pod ... Temporary namespace openshift-debug-59v9q was removed. [kni@titan45 ~]$ oc get nodes/worker-0-2 -o json | jq .spec.taints null FAR Logs: 2025-09-12T12:53:08.524544618Z INFO setup Go Version: go1.24.4 (Red Hat 1.24.4-2.el9) X:strictfipsruntime 2025-09-12T12:53:08.524858082Z INFO setup Go OS/Arch: linux/amd64 2025-09-12T12:53:08.524867645Z INFO setup Operator Version: bd73055e 2025-09-12T12:53:08.524899355Z INFO setup Git Commit: bd73055ef2c68bfdc865d2c54179f4448bd454da 2025-09-12T12:53:08.524908832Z INFO setup Build Date: 2025-09-08T09:09:10+00:00 2025-09-12T12:53:08.524964073Z INFO setup HTTP/2 for webhooks disabled 2025-09-12T12:53:08.59661479Z INFO validation out of service taint strategy {"isSupported": true, "k8sMajorVersion": 1, "k8sMinorVersion": 32} 2025-09-12T12:53:08.596918062Z INFO setup out-of-service taint is supported on this cluster 2025-09-12T12:53:08.59714752Z INFO controller-runtime.builder skip registering a mutating webhook, object does not implement admission.Defaulter or WithDefaulter wasn't called {"GVK": "fence-agents-remediation.medik8s.io/v1alpha1, Kind=FenceAgentsRemediation"} 2025-09-12T12:53:08.59731168Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "fence-agents-remediation.medik8s.io/v1alpha1, Kind=FenceAgentsRemediation", "path": "/validate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediation"} 2025-09-12T12:53:08.597674534Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediation"} 2025-09-12T12:53:08.597853421Z INFO controller-runtime.builder Registering a mutating webhook {"GVK": "fence-agents-remediation.medik8s.io/v1alpha1, Kind=FenceAgentsRemediationTemplate", "path": "/mutate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediationtemplate"} 2025-09-12T12:53:08.598187196Z INFO controller-runtime.webhook Registering webhook {"path": "/mutate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediationtemplate"} 2025-09-12T12:53:08.598294914Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "fence-agents-remediation.medik8s.io/v1alpha1, Kind=FenceAgentsRemediationTemplate", "path": "/validate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediationtemplate"} 2025-09-12T12:53:08.598448954Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediationtemplate"} 2025-09-12T12:53:08.598594664Z INFO setup starting manager 2025-09-12T12:53:08.599056239Z INFO controller-runtime.metrics Starting metrics server 2025-09-12T12:53:08.599266017Z INFO starting server {"name": "health probe", "addr": "[::]:8081"} 2025-09-12T12:53:08.599524825Z INFO controller-runtime.metrics Serving metrics server {"bindAddress": ":8080", "secure": false} 2025-09-12T12:53:08.599483554Z INFO controller-runtime.webhook Starting webhook server I0912 12:53:08.599866 1 leaderelection.go:257] attempting to acquire leader lease openshift-workload-availability/cb305759.medik8s.io... 2025-09-12T12:53:08.600499824Z INFO controller-runtime.certwatcher Updated current TLS certificate 2025-09-12T12:53:08.600706999Z INFO controller-runtime.webhook Serving webhook server {"host": "", "port": 9443} 2025-09-12T12:53:08.600948661Z INFO controller-runtime.certwatcher Starting certificate poll+watcher {"interval": "10s"} I0912 12:53:31.430420 1 leaderelection.go:271] successfully acquired lease openshift-workload-availability/cb305759.medik8s.io 2025-09-12T12:53:31.431268869Z DEBUG events fence-agents-remediation-controller-manager-7bfcd47b77-vs4c9_718e85aa-6178-494b-a22b-4c07a7110f26 became leader {"type": "Normal", "object": {"kind":"Lease","namespace":"openshift-workload-availability","name":"cb305759.medik8s.io","uid":"abd01c01-e4b5-4cf6-92c8-9d02571ac460","apiVersion":"coordination.k8s.io/v1","resourceVersion":"110668"}, "reason": "LeaderElection"} 2025-09-12T12:53:31.432312416Z INFO Starting EventSource {"controller": "fenceagentsremediation", "controllerGroup": "fence-agents-remediation.medik8s.io", "controllerKind": "FenceAgentsRemediation", "source": "kind source: *v1alpha1.FenceAgentsRemediation"} 2025-09-12T12:53:31.432375753Z INFO Starting Controller {"controller": "fenceagentsremediation", "controllerGroup": "fence-agents-remediation.medik8s.io", "controllerKind": "FenceAgentsRemediation"} 2025-09-12T12:53:31.539140902Z INFO Starting workers {"controller": "fenceagentsremediation", "controllerGroup": "fence-agents-remediation.medik8s.io", "controllerKind": "FenceAgentsRemediation", "worker count": 1} ... ... ... 2025-09-12T20:21:30.527632887Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-12T20:21:30.527806627Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-12T20:21:30.527915047Z INFO controllers.FenceAgentsRemediation Cleaning up a timed-out remediation which is deleted by NHC {"remediation name": "worker-0-2-rk64n"} 2025-09-12T20:21:30.556069758Z INFO taints Taint was removed {"taint effect": "NoExecute", "taint list": [{"key":"node.kubernetes.io/unreachable","effect":"NoExecute","timeAdded":"2025-09-12T20:19:06Z"},{"key":"medik8s.io/remediation","value":"self-node-remediation","effect":"NoExecute","timeAdded":"2025-09-12T20:19:33Z"},{"key":"node.kubernetes.io/unschedulable","effect":"NoSchedule","timeAdded":"2025-09-12T20:19:33Z"}]} 2025-09-12T20:21:30.556422643Z INFO controllers.FenceAgentsRemediation FAR remediation taint was removed {"Node Name": "worker-0-2"} 2025-09-12T20:21:30.557064517Z DEBUG events [remediation] Remediation taint was removed {"type": "Normal", "object": {"kind":"Node","name":"worker-0-2","uid":"33b1fdb5-b748-44b3-a34a-efe8e0fb4cc6","apiVersion":"v1","resourceVersion":"257735"}, "reason": "RemoveRemediationTaint"} 2025-09-12T20:21:30.584216603Z INFO fenceagentsremediation-resource validate update {"name": "worker-0-2-rk64n"} 2025-09-12T20:21:30.609863441Z INFO controllers.FenceAgentsRemediation Finalizer was removed {"CR Name": "worker-0-2-rk64n"} 2025-09-12T20:21:30.610265496Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-12T20:21:30.610453772Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-12T20:21:30.610598324Z INFO controllers.FenceAgentsRemediation FenceAgentsRemediation CR was not found {"CR Name": "worker-0-2-rk64n", "CR Namespace": "openshift-machine-api"} 2025-09-12T20:21:30.610730587Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-12T20:21:30.610546983Z DEBUG events [remediation] Finalizer was removed {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-machine-api","name":"worker-0-2-rk64n","uid":"936c44c9-b0e6-49a2-bb60-1c6d51bacc34","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"257737"}, "reason": "RemoveFinalizer"} SNR Logs: 2025-09-12T10:04:50.007081387Z INFO setup Go Version: go1.23.4 (Red Hat 1.23.4-1.el9) X:strictfipsruntime 2025-09-12T10:04:50.007359858Z INFO setup Go OS/Arch: linux/amd64 2025-09-12T10:04:50.007370868Z INFO setup Operator Version: 66a1641 2025-09-12T10:04:50.007377115Z INFO setup Git Commit: 66a1641f785f4fc398961ebc3a0e71a9cedc0395 2025-09-12T10:04:50.007383542Z INFO setup Build Date: 2025-07-02T15:21:16+00:00 2025-09-12T10:04:50.007389328Z INFO setup HTTP/2 for metrics and webhook server disabled 2025-09-12T10:04:50.008874927Z INFO controller-runtime.metrics Metrics server is starting to listen {"addr": "127.0.0.1:8080"} 2025-09-12T10:04:50.029827042Z INFO utils-taints out of service taint strategy {"isSupported": true, "k8sMajorVersion": 1, "k8sMinorVersion": 32} 2025-09-12T10:04:50.029915702Z INFO utils-taints out of service taint strategy {"isGA": true, "k8sMajorVersion": 1, "k8sMinorVersion": 32} 2025-09-12T10:04:50.029936242Z INFO setup Starting as a manager that installs the daemonset 2025-09-12T10:04:50.029959396Z INFO controller-runtime.builder skip registering a mutating webhook, object does not implement admission.Defaulter or WithDefaulter wasn't called {"GVK": "self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediationConfig"} 2025-09-12T10:04:50.03007719Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediationConfig", "path": "/validate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediationconfig"} 2025-09-12T10:04:50.03028857Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediationconfig"} 2025-09-12T10:04:50.03044718Z INFO controller-runtime.builder Registering a mutating webhook {"GVK": "self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediationTemplate", "path": "/mutate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediationtemplate"} 2025-09-12T10:04:50.030628475Z INFO controller-runtime.webhook Registering webhook {"path": "/mutate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediationtemplate"} 2025-09-12T10:04:50.030716255Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediationTemplate", "path": "/validate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediationtemplate"} 2025-09-12T10:04:50.030840149Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediationtemplate"} 2025-09-12T10:04:50.030911279Z INFO controller-runtime.builder skip registering a mutating webhook, object does not implement admission.Defaulter or WithDefaulter wasn't called {"GVK": "self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediation"} 2025-09-12T10:04:50.030986807Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediation", "path": "/validate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediation"} 2025-09-12T10:04:50.031097663Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediation"} 2025-09-12T10:04:50.031275567Z INFO setup starting manager 2025-09-12T10:04:50.032003864Z INFO controller-runtime.webhook.webhooks Starting webhook server 2025-09-12T10:04:50.032109277Z INFO Starting server {"kind": "health probe", "addr": "[::]:8081"} 2025-09-12T10:04:50.032188317Z INFO starting server {"path": "/metrics", "kind": "metrics", "addr": "127.0.0.1:8080"} I0912 10:04:50.032781 1 leaderelection.go:245] attempting to acquire leader lease openshift-workload-availability/547f6cb6.medik8s.io... 2025-09-12T10:04:50.033177884Z INFO controller-runtime.certwatcher Updated current TLS certificate 2025-09-12T10:04:50.033638752Z INFO controller-runtime.webhook Serving webhook server {"host": "", "port": 9443} 2025-09-12T10:04:50.033731567Z INFO controller-runtime.certwatcher Starting certificate watcher I0912 10:05:08.056949 1 leaderelection.go:255] successfully acquired lease openshift-workload-availability/547f6cb6.medik8s.io 2025-09-12T10:05:08.060379486Z INFO Starting EventSource {"controller": "selfnoderemediation", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediation", "source": "kind source: *v1alpha1.SelfNodeRemediation"} 2025-09-12T10:05:08.060472409Z INFO Starting Controller {"controller": "selfnoderemediation", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediation"} 2025-09-12T10:05:08.057895781Z DEBUG events self-node-remediation-controller-manager-866c766664-mrgqr_7d067c82-800c-460c-bb81-cdfc651b0719 became leader {"type": "Normal", "object": {"kind":"Lease","namespace":"openshift-workload-availability","name":"547f6cb6.medik8s.io","uid":"1aab1816-2a1b-49d5-9938-fcde166284b9","apiVersion":"coordination.k8s.io/v1","resourceVersion":"54787"}, "reason": "LeaderElection"} 2025-09-12T10:05:08.059306209Z INFO Starting EventSource {"controller": "selfnoderemediationconfig", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediationConfig", "source": "kind source: *v1alpha1.SelfNodeRemediationConfig"} 2025-09-12T10:05:08.060877161Z INFO Starting EventSource {"controller": "selfnoderemediationconfig", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediationConfig", "source": "kind source: *v1.DaemonSet"} 2025-09-12T10:05:08.060958316Z INFO Starting Controller {"controller": "selfnoderemediationconfig", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediationConfig"} 2025-09-12T10:05:08.089949649Z INFO selfnoderemediationconfig-resource validate create {"name": "self-node-remediation-config"} 2025-09-12T10:05:08.16761909Z INFO Starting workers {"controller": "selfnoderemediation", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediation", "worker count": 1} 2025-09-12T10:05:08.17216762Z INFO Starting workers {"controller": "selfnoderemediationconfig", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediationConfig", "worker count": 1} 2025-09-12T10:05:08.172619574Z INFO controllers.SelfNodeRemediationConfig Syncing certs 2025-09-12T10:05:08.379703824Z INFO controllers.SelfNodeRemediationConfig Creating new certs 2025-09-12T10:05:12.911172169Z INFO controllers.SelfNodeRemediationConfig Storing certs in new secret 2025-09-12T10:05:12.933562458Z INFO controllers.SelfNodeRemediationConfig.syncConfigDaemonset Start to sync config daemonset 2025-09-12T10:05:12.936901543Z INFO controllers.SelfNodeRemediationConfig Updating DS tolerations 2025-09-12T10:05:12.937000423Z INFO controllers.SelfNodeRemediationConfig snr didn't find old daemonset to be deleted 2025/09/12 10:05:12 reconciling (apps/v1, Kind=DaemonSet) openshift-workload-availability/self-node-remediation-ds 2025/09/12 10:05:12 does not exist, creating (apps/v1, Kind=DaemonSet) openshift-workload-availability/self-node-remediation-ds 2025/09/12 10:05:12 successfully created (apps/v1, Kind=DaemonSet) openshift-workload-availability/self-node-remediation-ds 2025-09-12T10:05:12.991631677Z INFO controllers.SelfNodeRemediationConfig Syncing certs 2025-09-12T10:05:12.991699505Z INFO controllers.SelfNodeRemediationConfig Cert secret already exists 2025-09-12T10:05:12.991719141Z INFO controllers.SelfNodeRemediationConfig.syncConfigDaemonset Start to sync config daemonset 2025-09-12T10:05:12.993397437Z INFO controllers.SelfNodeRemediationConfig Updating DS tolerations 2025/09/12 10:05:12 reconciling (apps/v1, Kind=DaemonSet) openshift-workload-availability/self-node-remediation-ds 2025/09/12 10:05:13 update was successful 2025-09-12T10:05:18.06768912Z INFO selfnoderemediationtemplate-resource default {"name": "self-node-remediation-automatic-strategy-template"} 2025-09-12T10:05:18.078233047Z INFO selfnoderemediationtemplate-resource validate create {"name": "self-node-remediation-automatic-strategy-template"} 2025-09-12T20:15:49.834590216Z INFO selfnoderemediationtemplate-resource default {"name": "selfnoderemediationtemplate-sample"} 2025-09-12T20:15:49.849946783Z INFO selfnoderemediationtemplate-resource validate create {"name": "selfnoderemediationtemplate-sample"} 2025-09-12T20:19:33.454901525Z INFO selfnoderemediation-resource validate create {"name": "worker-0-2-vgzxt"} 2025-09-12T20:19:33.568107293Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-vgzxt","namespace":"openshift-machine-api"}} 2025-09-12T20:19:33.568207573Z INFO controllers.SelfNodeRemediation pre-reboot not completed yet, prepare for rebooting {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-vgzxt","namespace":"openshift-machine-api"}} 2025-09-12T20:19:33.568428966Z DEBUG events [remediation] Remediation started by SNR manager {"type": "Normal", "object": {"kind":"SelfNodeRemediation","namespace":"openshift-machine-api","name":"worker-0-2-vgzxt","uid":"c2801905-bb13-4d94-a2f5-c2b1ec6f2710","apiVersion":"self-node-remediation.medik8s.io/v1alpha1","resourceVersion":"257137"}, "reason": "RemediationStarted"} 2025-09-12T20:19:33.781029042Z INFO selfnoderemediation-resource validate update {"name": "worker-0-2-vgzxt"} 2025-09-12T20:19:33.79023159Z INFO controllers.SelfNodeRemediation finalizer added {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-vgzxt","namespace":"openshift-machine-api"}} 2025-09-12T20:19:33.790597774Z DEBUG events [remediation] Remediation process - successful adding finalizer {"type": "Normal", "object": {"kind":"SelfNodeRemediation","namespace":"openshift-machine-api","name":"worker-0-2-vgzxt","uid":"c2801905-bb13-4d94-a2f5-c2b1ec6f2710","apiVersion":"self-node-remediation.medik8s.io/v1alpha1","resourceVersion":"257142"}, "reason": "AddFinalizer"} 2025-09-12T20:19:33.807791567Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-vgzxt","namespace":"openshift-machine-api"}} 2025-09-12T20:19:33.807842841Z INFO controllers.SelfNodeRemediation pre-reboot not completed yet, prepare for rebooting {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-vgzxt","namespace":"openshift-machine-api"}} 2025-09-12T20:19:33.843047815Z INFO controllers.SelfNodeRemediation NoExecute taint added {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-vgzxt","namespace":"openshift-machine-api"}, "new taints": [{"key":"node.kubernetes.io/unreachable","effect":"NoSchedule","timeAdded":"2025-09-12T20:19:00Z"},{"key":"node.kubernetes.io/unreachable","effect":"NoExecute","timeAdded":"2025-09-12T20:19:06Z"},{"key":"medik8s.io/fence-agents-remediation","effect":"NoExecute","timeAdded":"2025-09-12T20:19:31Z"},{"key":"medik8s.io/remediation","value":"self-node-remediation","effect":"NoExecute","timeAdded":"2025-09-12T20:19:33Z"}]} 2025-09-12T20:19:33.843260941Z INFO controllers.SelfNodeRemediation Marking node as unschedulable {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-vgzxt","namespace":"openshift-machine-api"}, "node name": "worker-0-2"} 2025-09-12T20:19:33.843329931Z DEBUG events [remediation] Remediation process - NoExecute taint added to the unhealthy node {"type": "Normal", "object": {"kind":"Node","name":"worker-0-2","uid":"33b1fdb5-b748-44b3-a34a-efe8e0fb4cc6","apiVersion":"v1","resourceVersion":"257145"}, "reason": "AddNoExecute"} 2025-09-12T20:19:33.913177509Z DEBUG events [remediation] Remediation process - unhealthy node marked as unschedulable {"type": "Normal", "object": {"kind":"Node","name":"worker-0-2","uid":"33b1fdb5-b748-44b3-a34a-efe8e0fb4cc6","apiVersion":"v1","resourceVersion":"257149"}, "reason": "MarkUnschedulable"} 2025-09-12T20:19:33.971690221Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-vgzxt","namespace":"openshift-machine-api"}} 2025-09-12T20:19:33.971755594Z INFO controllers.SelfNodeRemediation pre-reboot not completed yet, prepare for rebooting {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-vgzxt","namespace":"openshift-machine-api"}} 2025-09-12T20:19:33.972734134Z INFO rebootDurationCalculator No SafeTimeToAssumeNodeRebootedSeconds specified, using calculated minimum safe reboot time {"calculated minimum time in seconds": 120} 2025-09-12T20:19:33.972814634Z INFO controllers.SelfNodeRemediation setting SNR's time to assume node has been rebooted {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-vgzxt","namespace":"openshift-machine-api"}, "node name": "worker-0-2", "time": "2025-09-12 20:21:33.972811914 +0000 UTC m=+37004.064912286"} 2025-09-12T20:19:33.973932766Z DEBUG events [remediation] Remediation process - about to update required fencing time on snr {"type": "Normal", "object": {"kind":"SelfNodeRemediation","namespace":"openshift-machine-api","name":"worker-0-2-vgzxt","uid":"c2801905-bb13-4d94-a2f5-c2b1ec6f2710","apiVersion":"self-node-remediation.medik8s.io/v1alpha1","resourceVersion":"257151"}, "reason": "UpdateTimeAssumedRebooted"} 2025-09-12T20:19:34.036587466Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-vgzxt","namespace":"openshift-machine-api"}} 2025-09-12T20:19:34.036893194Z INFO controllers.SelfNodeRemediation Node didn't reboot yet, waiting for it to reboot {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-vgzxt","namespace":"openshift-machine-api"}, "node name": "worker-0-2", "time left": "1m59.963112936s"} 2025-09-12T20:19:34.97209454Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-vgzxt","namespace":"openshift-machine-api"}} 2025-09-12T20:19:34.972211613Z INFO controllers.SelfNodeRemediation Node didn't reboot yet, waiting for it to reboot {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-vgzxt","namespace":"openshift-machine-api"}, "node name": "worker-0-2", "time left": "1m59.02779224s"} 2025-09-12T20:21:30.565489454Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-vgzxt","namespace":"openshift-machine-api"}} 2025-09-12T20:21:30.565688Z INFO controllers.SelfNodeRemediation Node didn't reboot yet, waiting for it to reboot {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-vgzxt","namespace":"openshift-machine-api"}, "node name": "worker-0-2", "time left": "3.434317066s"} 2025-09-12T20:21:34.0209841Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-vgzxt","namespace":"openshift-machine-api"}} 2025-09-12T20:21:34.021032286Z INFO controllers.SelfNodeRemediation TimeAssumedRebooted is old. The unhealthy node assumed to been rebooted {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-vgzxt","namespace":"openshift-machine-api"}, "node name": "worker-0-2"} 2025-09-12T20:21:34.034154597Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-vgzxt","namespace":"openshift-machine-api"}} 2025-09-12T20:21:34.061936541Z INFO controllers.SelfNodeRemediation out-of-service taint added {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-vgzxt","namespace":"openshift-machine-api"}, "new taints": [{"key":"medik8s.io/remediation","value":"self-node-remediation","effect":"NoExecute","timeAdded":"2025-09-12T20:19:33Z"},{"key":"node.kubernetes.io/unschedulable","effect":"NoSchedule","timeAdded":"2025-09-12T20:19:33Z"},{"key":"node.kubernetes.io/out-of-service","value":"nodeshutdown","effect":"NoExecute","timeAdded":"2025-09-12T20:21:34Z"}]} 2025-09-12T20:21:34.06232112Z DEBUG events [remediation] Remediation process - add out-of-service taint to unhealthy node {"type": "Normal", "object": {"kind":"Node","name":"worker-0-2","uid":"33b1fdb5-b748-44b3-a34a-efe8e0fb4cc6","apiVersion":"v1","resourceVersion":"257868"}, "reason": "AddOutOfService"} 2025-09-12T20:21:34.197655289Z INFO controllers.SelfNodeRemediation out-of-service taint removed {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-vgzxt","namespace":"openshift-machine-api"}, "new taints": [{"key":"medik8s.io/remediation","value":"self-node-remediation","effect":"NoExecute","timeAdded":"2025-09-12T20:19:33Z"},{"key":"node.kubernetes.io/unschedulable","effect":"NoSchedule","timeAdded":"2025-09-12T20:19:33Z"}]} 2025-09-12T20:21:34.198490756Z DEBUG events [remediation] Remediation process - remove out-of-service taint from node {"type": "Normal", "object": {"kind":"Node","name":"worker-0-2","uid":"33b1fdb5-b748-44b3-a34a-efe8e0fb4cc6","apiVersion":"v1","resourceVersion":"257877"}, "reason": "RemoveOutOfService"} 2025-09-12T20:21:34.198552234Z DEBUG events [remediation] Remediation process - finished deleting unhealthy node resources {"type": "Normal", "object": {"kind":"Node","name":"worker-0-2","uid":"33b1fdb5-b748-44b3-a34a-efe8e0fb4cc6","apiVersion":"v1","resourceVersion":"257877"}, "reason": "DeleteResources"} 2025-09-12T20:21:34.213259996Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-vgzxt","namespace":"openshift-machine-api"}} 2025-09-12T20:21:34.213321449Z INFO controllers.SelfNodeRemediation fencing completed, cleaning up {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-vgzxt","namespace":"openshift-machine-api"}} 2025-09-12T20:21:34.234900246Z DEBUG events [remediation] Remediation process - mark healthy remediated node as schedulable {"type": "Normal", "object": {"kind":"Node","name":"worker-0-2","uid":"33b1fdb5-b748-44b3-a34a-efe8e0fb4cc6","apiVersion":"v1","resourceVersion":"257882"}, "reason": "MarkNodeSchedulable"} 2025-09-12T20:21:35.25590934Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-vgzxt","namespace":"openshift-machine-api"}} 2025-09-12T20:21:35.255980288Z INFO controllers.SelfNodeRemediation fencing completed, cleaning up {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-vgzxt","namespace":"openshift-machine-api"}} 2025-09-12T20:21:35.291412595Z INFO controllers.SelfNodeRemediation NoExecute taint removed {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-vgzxt","namespace":"openshift-machine-api"}, "new taints": null} 2025-09-12T20:21:35.292398085Z DEBUG events [remediation] Remediation process - remove NoExecute taint from healthy remediated node {"type": "Normal", "object": {"kind":"Node","name":"worker-0-2","uid":"33b1fdb5-b748-44b3-a34a-efe8e0fb4cc6","apiVersion":"v1","resourceVersion":"257979"}, "reason": "RemoveNoExecuteTaint"} 2025-09-12T20:21:35.325278375Z INFO selfnoderemediation-resource validate update {"name": "worker-0-2-vgzxt"} 2025-09-12T20:21:35.352002914Z INFO controllers.SelfNodeRemediation finalizer removed {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-vgzxt","namespace":"openshift-machine-api"}} 2025-09-12T20:21:35.353192198Z DEBUG events [remediation] Remediation process - remove finalizer from snr {"type": "Normal", "object": {"kind":"SelfNodeRemediation","namespace":"openshift-machine-api","name":"worker-0-2-vgzxt","uid":"c2801905-bb13-4d94-a2f5-c2b1ec6f2710","apiVersion":"self-node-remediation.medik8s.io/v1alpha1","resourceVersion":"257879"}, "reason": "RemoveFinalizer"} 2025-09-12T20:21:35.354016035Z DEBUG events [remediation] Remediation finished {"type": "Normal", "object": {"kind":"SelfNodeRemediation","namespace":"openshift-machine-api","name":"worker-0-2-vgzxt","uid":"c2801905-bb13-4d94-a2f5-c2b1ec6f2710","apiVersion":"self-node-remediation.medik8s.io/v1alpha1","resourceVersion":"257879"}, "reason": "RemediationFinished"} 2025-09-12T20:21:35.376600949Z INFO controllers.SelfNodeRemediation SNR already deleted {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-vgzxt","namespace":"openshift-machine-api"}} 2025-09-12T20:21:36.376710213Z INFO controllers.SelfNodeRemediation SNR already deleted {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-vgzxt","namespace":"openshift-machine-api"}} NHC Logs : 2025-09-12T20:15:49.897311957Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-12T20:15:49.929748845Z INFO controllers.NodeHealthCheck enabling NHC, valid config, no conflicting MHC configured in the cluster {"NodeHealthCheck name": "nhc-escalation"} 2025-09-12T20:15:49.929989601Z INFO Starting EventSource {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "source": "kind source: *unstructured.Unstructured"} 2025-09-12T20:15:49.930073933Z INFO controllers.NodeHealthCheck.WatchManager added watch for remediation template CRs {"kind": "SelfNodeRemediationTemplate"} 2025-09-12T20:15:49.93010681Z INFO Starting EventSource {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "source": "kind source: *unstructured.Unstructured"} 2025-09-12T20:15:49.930128391Z INFO controllers.NodeHealthCheck.WatchManager added watch for remediation CRs {"kind": "SelfNodeRemediation"} 2025-09-12T20:15:49.930105551Z DEBUG events [remediation] No issues found, NodeHealthCheck is enabled. {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-escalation","uid":"3f92f519-39d3-40f3-9a79-d10f562cbd80","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"255811"}, "reason": "Enabled"} 2025-09-12T20:15:49.956755573Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-1"} time="2025-09-12T20:15:49Z" level=info msg="invalidating lease" time="2025-09-12T20:15:49Z" level=info msg="getting lease" 2025-09-12T20:15:49.971961061Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-2"} time="2025-09-12T20:15:49Z" level=info msg="invalidating lease" time="2025-09-12T20:15:49Z" level=info msg="getting lease" 2025-09-12T20:15:49.983881884Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-0"} time="2025-09-12T20:15:49Z" level=info msg="invalidating lease" time="2025-09-12T20:15:49Z" level=info msg="getting lease" 2025-09-12T20:15:49.999687976Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-escalation", "new status": {"observedNodes":3,"healthyNodes":3,"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-09-12T20:15:49Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Enabled","reason":"NHC is enabled, no ongoing remediation"}, "patch": "{\"status\":{\"conditions\":[{\"lastTransitionTime\":\"2025-09-12T20:15:49Z\",\"message\":\"No issues found, NodeHealthCheck is enabled.\",\"reason\":\"NodeHealthCheckEnabled\",\"status\":\"False\",\"type\":\"Disabled\"}],\"healthyNodes\":3,\"observedNodes\":3,\"phase\":\"Enabled\",\"reason\":\"NHC is enabled, no ongoing remediation\"}}"} 2025-09-12T20:15:50.219378721Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "0s"} 2025-09-12T20:19:00.774989612Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0-2", "NHC": "nhc-escalation"} 2025-09-12T20:19:00.775296113Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0-2", "NHC": "nhc-escalation"} 2025-09-12T20:19:00.775488768Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-12T20:19:00.840113809Z INFO controllers.NodeHealthCheck Node is going to match unhealthy condition {"node": "worker-0-2", "condition type": "Ready", "condition status": "Unknown", "duration left": "29.159892168s"} 2025-09-12T20:19:00.867330636Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-0"} time="2025-09-12T20:19:00Z" level=info msg="invalidating lease" time="2025-09-12T20:19:00Z" level=info msg="getting lease" 2025-09-12T20:19:00.973640795Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-1"} time="2025-09-12T20:19:01Z" level=info msg="invalidating lease" time="2025-09-12T20:19:01Z" level=info msg="getting lease" 2025-09-12T20:19:01.070476357Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-escalation", "new status": {"observedNodes":3,"healthyNodes":2,"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-09-12T20:15:49Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Enabled","reason":"NHC is enabled, no ongoing remediation","lastUpdateTime":"2025-09-12T20:15:49Z"}, "patch": "{\"status\":{\"healthyNodes\":2}}"} 2025-09-12T20:19:01.298471542Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "30.159892168s"} 2025-09-12T20:19:31.459282Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-12T20:19:31.484559457Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-2", "condition type": "Ready", "condition status": "Unknown"} 2025-09-12T20:19:31.485069719Z DEBUG events [remediation] Node matches unhealthy condition. Node "worker-0-2", condition type "Ready", condition status "Unknown" {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-escalation","uid":"3f92f519-39d3-40f3-9a79-d10f562cbd80","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"256834"}, "reason": "DetectedUnhealthy"} 2025-09-12T20:19:31.50763423Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-0"} time="2025-09-12T20:19:31Z" level=info msg="invalidating lease" time="2025-09-12T20:19:31Z" level=info msg="getting lease" 2025-09-12T20:19:31.612402906Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-1"} time="2025-09-12T20:19:31Z" level=info msg="invalidating lease" time="2025-09-12T20:19:31Z" level=info msg="getting lease" 2025-09-12T20:19:31.626872675Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-2"} 2025-09-12T20:19:31.703351335Z INFO controllers.NodeHealthCheck.resource manager Attempting to obtain Node Lease {"Node name": "worker-0-2"} time="2025-09-12T20:19:31Z" level=info msg="request lease" time="2025-09-12T20:19:31Z" level=info msg="getting lease" time="2025-09-12T20:19:31Z" level=info msg="create lease" 2025-09-12T20:19:31.716856854Z INFO controllers.NodeHealthCheck.resource manager Creating a remediation CR {"CR name": "", "CR kind": "FenceAgentsRemediation", "namespace": "openshift-machine-api"} 2025-09-12T20:19:31.74645986Z DEBUG events [remediation] Created remediation object for node worker-0-2 {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-escalation","uid":"3f92f519-39d3-40f3-9a79-d10f562cbd80","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"256834"}, "reason": "RemediationCreated"} 2025-09-12T20:19:31.770920334Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"3f92f519-39d3-40f3-9a79-d10f562cbd80","controller":false}} 2025-09-12T20:19:31.771057154Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-2-rk64n", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-12T20:19:31.771087784Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"3f92f519-39d3-40f3-9a79-d10f562cbd80","controller":false}} 2025-09-12T20:19:31.771102434Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-2-rk64n", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-12T20:19:31.781271518Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-escalation", "new status": {"observedNodes":3,"healthyNodes":2,"unhealthyNodes":[{"name":"worker-0-2","remediations":[{"resource":{"kind":"FenceAgentsRemediation","namespace":"openshift-machine-api","name":"worker-0-2-rk64n","uid":"936c44c9-b0e6-49a2-bb60-1c6d51bacc34","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1"},"started":"2025-09-12T20:19:31Z","templateName":"fenceagentsremediationtemplate-test"}]}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-09-12T20:15:49Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Remediating","reason":"NHC is remediating 1 nodes","lastUpdateTime":"2025-09-12T20:19:01Z"}, "patch": "{\"status\":{\"phase\":\"Remediating\",\"reason\":\"NHC is remediating 1 nodes\",\"unhealthyNodes\":[{\"name\":\"worker-0-2\",\"remediations\":[{\"resource\":{\"apiVersion\":\"fence-agents-remediation.medik8s.io/v1alpha1\",\"kind\":\"FenceAgentsRemediation\",\"name\":\"worker-0-2-rk64n\",\"namespace\":\"openshift-machine-api\",\"uid\":\"936c44c9-b0e6-49a2-bb60-1c6d51bacc34\"},\"started\":\"2025-09-12T20:19:31Z\",\"templateName\":\"fenceagentsremediationtemplate-test\"}]}]}}"} 2025-09-12T20:19:31.792630875Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"3f92f519-39d3-40f3-9a79-d10f562cbd80","controller":false}} 2025-09-12T20:19:31.792741275Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-2-rk64n", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-12T20:19:31.792785231Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"3f92f519-39d3-40f3-9a79-d10f562cbd80","controller":false}} 2025-09-12T20:19:31.792840841Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-2-rk64n", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-12T20:19:32.006140211Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "1m0s"} 2025-09-12T20:19:32.006483995Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-12T20:19:32.033009329Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-2", "condition type": "Ready", "condition status": "Unknown"} 2025-09-12T20:19:32.059728884Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-09-12T20:19:31Z"} 2025-09-12T20:19:32.060001013Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-0"} time="2025-09-12T20:19:32Z" level=info msg="invalidating lease" time="2025-09-12T20:19:32Z" level=info msg="getting lease" 2025-09-12T20:19:32.078944284Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-1"} time="2025-09-12T20:19:32Z" level=info msg="invalidating lease" time="2025-09-12T20:19:32Z" level=info msg="getting lease" 2025-09-12T20:19:32.093341749Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-2"} 2025-09-12T20:19:32.110336049Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-2-rk64n", "kind": "FenceAgentsRemediation", "namespace": "openshift-machine-api"} time="2025-09-12T20:19:32Z" level=info msg="getting lease" 2025-09-12T20:19:32.110920502Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-escalation", "lease name": "node-worker-0-2", "NHC is lease owner": true, "lease expiration time": "1m0s"} 2025-09-12T20:19:32.111107188Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-09-12T20:19:31Z"} 2025-09-12T20:19:32.126380757Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "58.888912984s"} 2025-09-12T20:19:32.850503219Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"3f92f519-39d3-40f3-9a79-d10f562cbd80","controller":false}} 2025-09-12T20:19:32.85059509Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-2-rk64n", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-12T20:19:32.850643713Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"3f92f519-39d3-40f3-9a79-d10f562cbd80","controller":false}} 2025-09-12T20:19:32.85067507Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-2-rk64n", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-12T20:19:32.850829785Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-12T20:19:32.873041324Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-2", "condition type": "Ready", "condition status": "Unknown"} 2025-09-12T20:19:32.901422418Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "False", "reason": "", "message": "", "lastTransition": "2025-09-12T20:19:32Z"} 2025-09-12T20:19:32.901522159Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-0"} time="2025-09-12T20:19:33Z" level=info msg="invalidating lease" time="2025-09-12T20:19:33Z" level=info msg="getting lease" 2025-09-12T20:19:33.00262844Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-1"} time="2025-09-12T20:19:33Z" level=info msg="invalidating lease" time="2025-09-12T20:19:33Z" level=info msg="getting lease" 2025-09-12T20:19:33.01954551Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-2"} 2025-09-12T20:19:33.039437091Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-2-rk64n", "kind": "FenceAgentsRemediation", "namespace": "openshift-machine-api"} time="2025-09-12T20:19:33Z" level=info msg="getting lease" 2025-09-12T20:19:33.039647885Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-escalation", "lease name": "node-worker-0-2", "NHC is lease owner": true, "lease expiration time": "1m0s"} 2025-09-12T20:19:33.039705965Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "False", "reason": "", "message": "", "lastTransition": "2025-09-12T20:19:32Z"} 2025-09-12T20:19:33.039737659Z INFO controllers.NodeHealthCheck remediation failed {"NodeHealthCheck name": "nhc-escalation"} 2025-09-12T20:19:33.039750956Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "False", "reason": "", "message": "", "lastTransition": "2025-09-12T20:19:32Z"} 2025-09-12T20:19:33.065525068Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"3f92f519-39d3-40f3-9a79-d10f562cbd80","controller":false}} 2025-09-12T20:19:33.065905449Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-2-rk64n", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-12T20:19:33.066024457Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"3f92f519-39d3-40f3-9a79-d10f562cbd80","controller":false}} 2025-09-12T20:19:33.066103037Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-2-rk64n", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-12T20:19:33.103782291Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-escalation", "new status": {"observedNodes":3,"healthyNodes":2,"unhealthyNodes":[{"name":"worker-0-2","remediations":[{"resource":{"kind":"FenceAgentsRemediation","namespace":"openshift-machine-api","name":"worker-0-2-rk64n","uid":"936c44c9-b0e6-49a2-bb60-1c6d51bacc34","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1"},"started":"2025-09-12T20:19:31Z","timedOut":"2025-09-12T20:19:33Z","templateName":"fenceagentsremediationtemplate-test"}]}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-09-12T20:15:49Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Remediating","reason":"NHC is remediating 1 nodes","lastUpdateTime":"2025-09-12T20:19:31Z"}, "patch": "{\"status\":{\"unhealthyNodes\":[{\"name\":\"worker-0-2\",\"remediations\":[{\"resource\":{\"apiVersion\":\"fence-agents-remediation.medik8s.io/v1alpha1\",\"kind\":\"FenceAgentsRemediation\",\"name\":\"worker-0-2-rk64n\",\"namespace\":\"openshift-machine-api\",\"uid\":\"936c44c9-b0e6-49a2-bb60-1c6d51bacc34\"},\"started\":\"2025-09-12T20:19:31Z\",\"templateName\":\"fenceagentsremediationtemplate-test\",\"timedOut\":\"2025-09-12T20:19:33Z\"}]}]}}"} 2025-09-12T20:19:33.328439008Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "1s"} 2025-09-12T20:19:33.328852399Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-12T20:19:33.352975542Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-2", "condition type": "Ready", "condition status": "Unknown"} 2025-09-12T20:19:33.377111974Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "False", "reason": "", "message": "", "lastTransition": "2025-09-12T20:19:32Z"} 2025-09-12T20:19:33.377271534Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-0"} time="2025-09-12T20:19:33Z" level=info msg="invalidating lease" time="2025-09-12T20:19:33Z" level=info msg="getting lease" 2025-09-12T20:19:33.391603862Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-1"} time="2025-09-12T20:19:33Z" level=info msg="invalidating lease" time="2025-09-12T20:19:33Z" level=info msg="getting lease" 2025-09-12T20:19:33.405480569Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-2"} 2025-09-12T20:19:33.421125865Z INFO controllers.NodeHealthCheck.resource manager Attempting to obtain Node Lease {"Node name": "worker-0-2"} time="2025-09-12T20:19:33Z" level=info msg="request lease" time="2025-09-12T20:19:33Z" level=info msg="getting lease" time="2025-09-12T20:19:33Z" level=info msg="renew lease owned by NodeHealthCheck-nhc-escalation setAcquireTime=false" 2025-09-12T20:19:33.434387716Z INFO controllers.NodeHealthCheck.resource manager Creating a remediation CR {"CR name": "", "CR kind": "SelfNodeRemediation", "namespace": "openshift-machine-api"} 2025-09-12T20:19:33.466091389Z DEBUG events [remediation] Created remediation object for node worker-0-2 {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-escalation","uid":"3f92f519-39d3-40f3-9a79-d10f562cbd80","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"257134"}, "reason": "RemediationCreated"} 2025-09-12T20:19:33.48641897Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-escalation", "new status": {"observedNodes":3,"healthyNodes":2,"unhealthyNodes":[{"name":"worker-0-2","remediations":[{"resource":{"kind":"FenceAgentsRemediation","namespace":"openshift-machine-api","name":"worker-0-2-rk64n","uid":"936c44c9-b0e6-49a2-bb60-1c6d51bacc34","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1"},"started":"2025-09-12T20:19:31Z","timedOut":"2025-09-12T20:19:33Z","templateName":"fenceagentsremediationtemplate-test"},{"resource":{"kind":"SelfNodeRemediation","namespace":"openshift-machine-api","name":"worker-0-2-vgzxt","uid":"c2801905-bb13-4d94-a2f5-c2b1ec6f2710","apiVersion":"self-node-remediation.medik8s.io/v1alpha1"},"started":"2025-09-12T20:19:33Z","templateName":"selfnoderemediationtemplate-sample"}]}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-09-12T20:15:49Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Remediating","reason":"NHC is remediating 1 nodes","lastUpdateTime":"2025-09-12T20:19:33Z"}, "patch": "{\"status\":{\"unhealthyNodes\":[{\"name\":\"worker-0-2\",\"remediations\":[{\"resource\":{\"apiVersion\":\"fence-agents-remediation.medik8s.io/v1alpha1\",\"kind\":\"FenceAgentsRemediation\",\"name\":\"worker-0-2-rk64n\",\"namespace\":\"openshift-machine-api\",\"uid\":\"936c44c9-b0e6-49a2-bb60-1c6d51bacc34\"},\"started\":\"2025-09-12T20:19:31Z\",\"templateName\":\"fenceagentsremediationtemplate-test\",\"timedOut\":\"2025-09-12T20:19:33Z\"},{\"resource\":{\"apiVersion\":\"self-node-remediation.medik8s.io/v1alpha1\",\"kind\":\"SelfNodeRemediation\",\"name\":\"worker-0-2-vgzxt\",\"namespace\":\"openshift-machine-api\",\"uid\":\"c2801905-bb13-4d94-a2f5-c2b1ec6f2710\"},\"started\":\"2025-09-12T20:19:33Z\",\"templateName\":\"selfnoderemediationtemplate-sample\"}]}]}}"} 2025-09-12T20:19:33.710284118Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "3m0s"} 2025-09-12T20:19:33.789568911Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"3f92f519-39d3-40f3-9a79-d10f562cbd80","controller":false}} 2025-09-12T20:19:33.789776895Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-2-vgzxt", "Remediation CR Kind": "SelfNodeRemediation"} 2025-09-12T20:19:33.789833812Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"3f92f519-39d3-40f3-9a79-d10f562cbd80","controller":false}} 2025-09-12T20:19:33.789855149Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-2-vgzxt", "Remediation CR Kind": "SelfNodeRemediation"} 2025-09-12T20:19:33.789938605Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-12T20:19:33.806387377Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"3f92f519-39d3-40f3-9a79-d10f562cbd80","controller":false}} 2025-09-12T20:19:33.806512384Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-2-vgzxt", "Remediation CR Kind": "SelfNodeRemediation"} 2025-09-12T20:19:33.806594589Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"3f92f519-39d3-40f3-9a79-d10f562cbd80","controller":false}} 2025-09-12T20:19:33.806625155Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-2-vgzxt", "Remediation CR Kind": "SelfNodeRemediation"} 2025-09-12T20:19:33.820763863Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-2", "condition type": "Ready", "condition status": "Unknown"} 2025-09-12T20:19:33.848373571Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "False", "reason": "", "message": "", "lastTransition": "2025-09-12T20:19:32Z"} 2025-09-12T20:19:33.848564668Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-0"} 2025-09-12T20:19:33.955731967Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"3f92f519-39d3-40f3-9a79-d10f562cbd80","controller":false}} 2025-09-12T20:19:33.956178689Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-2-vgzxt", "Remediation CR Kind": "SelfNodeRemediation"} 2025-09-12T20:19:33.956325879Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"3f92f519-39d3-40f3-9a79-d10f562cbd80","controller":false}} 2025-09-12T20:19:33.9564262Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-2-vgzxt", "Remediation CR Kind": "SelfNodeRemediation"} time="2025-09-12T20:19:33Z" level=info msg="invalidating lease" time="2025-09-12T20:19:33Z" level=info msg="getting lease" 2025-09-12T20:19:33.970369803Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-1"} 2025-09-12T20:19:34.026693611Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"3f92f519-39d3-40f3-9a79-d10f562cbd80","controller":false}} 2025-09-12T20:19:34.026847011Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-2-vgzxt", "Remediation CR Kind": "SelfNodeRemediation"} 2025-09-12T20:19:34.026888945Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"3f92f519-39d3-40f3-9a79-d10f562cbd80","controller":false}} 2025-09-12T20:19:34.026911422Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-2-vgzxt", "Remediation CR Kind": "SelfNodeRemediation"} time="2025-09-12T20:19:34Z" level=info msg="invalidating lease" time="2025-09-12T20:19:34Z" level=info msg="getting lease" 2025-09-12T20:19:34.078899337Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-2"} 2025-09-12T20:19:34.168589001Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-2-vgzxt", "kind": "SelfNodeRemediation", "namespace": "openshift-machine-api"} time="2025-09-12T20:19:34Z" level=info msg="getting lease" 2025-09-12T20:19:34.168888059Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-escalation", "lease name": "node-worker-0-2", "NHC is lease owner": true, "lease expiration time": "3m0s"} 2025-09-12T20:19:34.168970396Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-09-12T20:19:33Z"} 2025-09-12T20:19:34.192033859Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "2m58.831046884s"} 2025-09-12T20:19:34.192303072Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-12T20:19:34.221197519Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-2", "condition type": "Ready", "condition status": "Unknown"} 2025-09-12T20:19:34.246556779Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "False", "reason": "", "message": "", "lastTransition": "2025-09-12T20:19:32Z"} 2025-09-12T20:19:34.246702971Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-09-12T20:19:33Z"} 2025-09-12T20:19:34.246745974Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-0"} time="2025-09-12T20:19:34Z" level=info msg="invalidating lease" time="2025-09-12T20:19:34Z" level=info msg="getting lease" 2025-09-12T20:19:34.266918009Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-1"} time="2025-09-12T20:19:34Z" level=info msg="invalidating lease" time="2025-09-12T20:19:34Z" level=info msg="getting lease" 2025-09-12T20:19:34.284481219Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-2"} 2025-09-12T20:19:34.302876675Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-2-vgzxt", "kind": "SelfNodeRemediation", "namespace": "openshift-machine-api"} time="2025-09-12T20:19:34Z" level=info msg="getting lease" 2025-09-12T20:19:34.303116218Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-escalation", "lease name": "node-worker-0-2", "NHC is lease owner": true, "lease expiration time": "3m0s"} 2025-09-12T20:19:34.303216658Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-09-12T20:19:33Z"} 2025-09-12T20:19:34.320561794Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "2m58.696797268s"} 2025-09-12T20:19:34.329677326Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-12T20:19:34.354540072Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-2", "condition type": "Ready", "condition status": "Unknown"} 2025-09-12T20:19:34.375143852Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "False", "reason": "", "message": "", "lastTransition": "2025-09-12T20:19:32Z"} 2025-09-12T20:19:34.375256123Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-09-12T20:19:33Z"} 2025-09-12T20:19:34.375273517Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-0"} time="2025-09-12T20:19:34Z" level=info msg="invalidating lease" time="2025-09-12T20:19:34Z" level=info msg="getting lease" 2025-09-12T20:19:34.390414201Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-1"} time="2025-09-12T20:19:34Z" level=info msg="invalidating lease" time="2025-09-12T20:19:34Z" level=info msg="getting lease" 2025-09-12T20:19:34.411187432Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-2"} 2025-09-12T20:19:34.424069874Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-2-vgzxt", "kind": "SelfNodeRemediation", "namespace": "openshift-machine-api"} time="2025-09-12T20:19:34Z" level=info msg="getting lease" 2025-09-12T20:19:34.424301415Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-escalation", "lease name": "node-worker-0-2", "NHC is lease owner": true, "lease expiration time": "3m0s"} 2025-09-12T20:19:34.424354235Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-09-12T20:19:33Z"} 2025-09-12T20:19:34.440664147Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "2m58.575654479s"} 2025-09-12T20:21:30.390032991Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0-2", "NHC": "nhc-escalation"} 2025-09-12T20:21:30.390299515Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0-2", "NHC": "nhc-escalation"} 2025-09-12T20:21:30.390438035Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-12T20:21:30.477521678Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "False", "reason": "", "message": "", "lastTransition": "2025-09-12T20:19:32Z"} 2025-09-12T20:21:30.477630266Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-escalation", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-09-12T20:19:33Z"} 2025-09-12T20:21:30.477649823Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-2"} 2025-09-12T20:21:30.527368535Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"3f92f519-39d3-40f3-9a79-d10f562cbd80","controller":false}} 2025-09-12T20:21:30.527533445Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-2-rk64n", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-12T20:21:30.527564248Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"3f92f519-39d3-40f3-9a79-d10f562cbd80","controller":false}} 2025-09-12T20:21:30.527589078Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-2-rk64n", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-12T20:21:30.528290765Z INFO controllers.NodeHealthCheck.resource manager deleted remediation CR {"name": "worker-0-2-rk64n"} 2025-09-12T20:21:30.528363025Z DEBUG events [remediation] Deleted remediation CR of kind FenceAgentsRemediation with name worker-0-2-rk64n {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-escalation","uid":"3f92f519-39d3-40f3-9a79-d10f562cbd80","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"257139"}, "reason": "RemediationRemoved"} 2025-09-12T20:21:30.562721115Z INFO controllers.NodeHealthCheck.resource manager deleted remediation CR {"name": "worker-0-2-vgzxt"} 2025-09-12T20:21:30.563638902Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-0"} 2025-09-12T20:21:30.562652569Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"3f92f519-39d3-40f3-9a79-d10f562cbd80","controller":false}} 2025-09-12T20:21:30.564213634Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-2-vgzxt", "Remediation CR Kind": "SelfNodeRemediation"} 2025-09-12T20:21:30.564377974Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"3f92f519-39d3-40f3-9a79-d10f562cbd80","controller":false}} 2025-09-12T20:21:30.564539195Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-2-vgzxt", "Remediation CR Kind": "SelfNodeRemediation"} 2025-09-12T20:21:30.562862085Z DEBUG events [remediation] Deleted remediation CR of kind SelfNodeRemediation with name worker-0-2-vgzxt {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-escalation","uid":"3f92f519-39d3-40f3-9a79-d10f562cbd80","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"257139"}, "reason": "RemediationRemoved"} time="2025-09-12T20:21:30Z" level=info msg="invalidating lease" time="2025-09-12T20:21:30Z" level=info msg="getting lease" 2025-09-12T20:21:30.587509101Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-1"} 2025-09-12T20:21:30.609233916Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"3f92f519-39d3-40f3-9a79-d10f562cbd80","controller":false}} 2025-09-12T20:21:30.60936008Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-2-rk64n", "Remediation CR Kind": "FenceAgentsRemediation"} time="2025-09-12T20:21:30Z" level=info msg="invalidating lease" time="2025-09-12T20:21:30Z" level=info msg="getting lease" 2025-09-12T20:21:30.688656265Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-escalation", "new status": {"observedNodes":3,"healthyNodes":2,"unhealthyNodes":[{"name":"worker-0-2","remediations":[{"resource":{"kind":"FenceAgentsRemediation","namespace":"openshift-machine-api","name":"worker-0-2-rk64n","uid":"936c44c9-b0e6-49a2-bb60-1c6d51bacc34","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1"},"started":"2025-09-12T20:19:31Z","timedOut":"2025-09-12T20:19:33Z","templateName":"fenceagentsremediationtemplate-test"},{"resource":{"kind":"SelfNodeRemediation","namespace":"openshift-machine-api","name":"worker-0-2-vgzxt","uid":"c2801905-bb13-4d94-a2f5-c2b1ec6f2710","apiVersion":"self-node-remediation.medik8s.io/v1alpha1"},"started":"2025-09-12T20:19:33Z","templateName":"selfnoderemediationtemplate-sample"}],"conditionsHealthyTimestamp":"2025-09-12T20:21:30Z"}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-09-12T20:15:49Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Remediating","reason":"NHC is remediating 1 nodes","lastUpdateTime":"2025-09-12T20:19:33Z"}, "patch": "{\"status\":{\"unhealthyNodes\":[{\"conditionsHealthyTimestamp\":\"2025-09-12T20:21:30Z\",\"name\":\"worker-0-2\",\"remediations\":[{\"resource\":{\"apiVersion\":\"fence-agents-remediation.medik8s.io/v1alpha1\",\"kind\":\"FenceAgentsRemediation\",\"name\":\"worker-0-2-rk64n\",\"namespace\":\"openshift-machine-api\",\"uid\":\"936c44c9-b0e6-49a2-bb60-1c6d51bacc34\"},\"started\":\"2025-09-12T20:19:31Z\",\"templateName\":\"fenceagentsremediationtemplate-test\",\"timedOut\":\"2025-09-12T20:19:33Z\"},{\"resource\":{\"apiVersion\":\"self-node-remediation.medik8s.io/v1alpha1\",\"kind\":\"SelfNodeRemediation\",\"name\":\"worker-0-2-vgzxt\",\"namespace\":\"openshift-machine-api\",\"uid\":\"c2801905-bb13-4d94-a2f5-c2b1ec6f2710\"},\"started\":\"2025-09-12T20:19:33Z\",\"templateName\":\"selfnoderemediationtemplate-sample\"}]}]}}"} 2025-09-12T20:21:30.913457714Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "11s"} 2025-09-12T20:21:30.913649704Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-12T20:21:30.958767479Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-0"} time="2025-09-12T20:21:30Z" level=info msg="invalidating lease" time="2025-09-12T20:21:30Z" level=info msg="getting lease" 2025-09-12T20:21:30.974643936Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-1"} time="2025-09-12T20:21:31Z" level=info msg="invalidating lease" time="2025-09-12T20:21:31Z" level=info msg="getting lease" 2025-09-12T20:21:31.063231885Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-2"} 2025-09-12T20:21:31.174931506Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "11s"} 2025-09-12T20:21:34.033406466Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"3f92f519-39d3-40f3-9a79-d10f562cbd80","controller":false}} 2025-09-12T20:21:34.03348719Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-2-vgzxt", "Remediation CR Kind": "SelfNodeRemediation"} 2025-09-12T20:21:34.033506468Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"3f92f519-39d3-40f3-9a79-d10f562cbd80","controller":false}} 2025-09-12T20:21:34.033515743Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-2-vgzxt", "Remediation CR Kind": "SelfNodeRemediation"} 2025-09-12T20:21:34.03358117Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-12T20:21:34.074826187Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-0"} time="2025-09-12T20:21:34Z" level=info msg="invalidating lease" time="2025-09-12T20:21:34Z" level=info msg="getting lease" 2025-09-12T20:21:34.175341337Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-1"} time="2025-09-12T20:21:34Z" level=info msg="invalidating lease" time="2025-09-12T20:21:34Z" level=info msg="getting lease" 2025-09-12T20:21:34.184258187Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-2"} 2025-09-12T20:21:34.206683448Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "11s"} 2025-09-12T20:21:34.211585986Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"3f92f519-39d3-40f3-9a79-d10f562cbd80","controller":false}} 2025-09-12T20:21:34.211681576Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-2-vgzxt", "Remediation CR Kind": "SelfNodeRemediation"} 2025-09-12T20:21:34.211710263Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"3f92f519-39d3-40f3-9a79-d10f562cbd80","controller":false}} 2025-09-12T20:21:34.211719743Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-2-vgzxt", "Remediation CR Kind": "SelfNodeRemediation"} 2025-09-12T20:21:34.211783844Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-12T20:21:34.275723388Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-0"} time="2025-09-12T20:21:34Z" level=info msg="invalidating lease" time="2025-09-12T20:21:34Z" level=info msg="getting lease" 2025-09-12T20:21:34.376332799Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-1"} time="2025-09-12T20:21:34Z" level=info msg="invalidating lease" time="2025-09-12T20:21:34Z" level=info msg="getting lease" 2025-09-12T20:21:34.4770497Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-2"} 2025-09-12T20:21:34.583957305Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "11s"} 2025-09-12T20:21:35.35041793Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-escalation","uid":"3f92f519-39d3-40f3-9a79-d10f562cbd80","controller":false}} 2025-09-12T20:21:35.350532767Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-escalation", "Remediation CR Name": "worker-0-2-vgzxt", "Remediation CR Kind": "SelfNodeRemediation"} 2025-09-12T20:21:35.350685118Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-12T20:21:35.460345745Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-0"} time="2025-09-12T20:21:35Z" level=info msg="invalidating lease" time="2025-09-12T20:21:35Z" level=info msg="getting lease" 2025-09-12T20:21:35.561993469Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-1"} time="2025-09-12T20:21:35Z" level=info msg="invalidating lease" time="2025-09-12T20:21:35Z" level=info msg="getting lease" 2025-09-12T20:21:35.661549103Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-2"} time="2025-09-12T20:21:35Z" level=info msg="invalidating lease" time="2025-09-12T20:21:35Z" level=info msg="getting lease" 2025-09-12T20:21:35.775455307Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-escalation", "new status": {"observedNodes":3,"healthyNodes":3,"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-09-12T20:15:49Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Enabled","reason":"NHC is enabled, no ongoing remediation","lastUpdateTime":"2025-09-12T20:21:30Z"}, "patch": "{\"status\":{\"healthyNodes\":3,\"phase\":\"Enabled\",\"reason\":\"NHC is enabled, no ongoing remediation\",\"unhealthyNodes\":null}}"} 2025-09-12T20:21:35.998808036Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "0s"} 2025-09-12T20:21:41.914453284Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-escalation"} 2025-09-12T20:21:41.964516154Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-0"} time="2025-09-12T20:21:42Z" level=info msg="invalidating lease" time="2025-09-12T20:21:42Z" level=info msg="getting lease" 2025-09-12T20:21:42.065590294Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-1"} time="2025-09-12T20:21:42Z" level=info msg="invalidating lease" time="2025-09-12T20:21:42Z" level=info msg="getting lease" 2025-09-12T20:21:42.081373991Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-escalation", "node": "worker-0-2"} time="2025-09-12T20:21:42Z" level=info msg="invalidating lease" time="2025-09-12T20:21:42Z" level=info msg="getting lease" 2025-09-12T20:21:42.095766166Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-escalation", "error": null, "requeue": false, "requeuAfter": "0s"}