[kni@cert-rhosp-02 ~]$ oc get clusterversion NAME VERSION AVAILABLE PROGRESSING SINCE STATUS version 4.16.0-0.nightly-2025-09-06-014223 True False 46h Cluster version is 4.16.0-0.nightly-2025-09-06-014223 [kni@cert-rhosp-02 ~]$ oc get csv NAME DISPLAY VERSION REPLACES PHASE fence-agents-remediation.v0.6.0 Fence Agents Remediation Operator 0.6.0 fence-agents-remediation.v0.5.1 Succeeded node-healthcheck-operator.v0.10.0 Node Health Check Operator 0.10.0 node-healthcheck-operator.v0.9.1 Succeeded [kni@cert-rhosp-02 ~]$ PODS=$(oc get pods -o name | grep fence-agents-remediation-controller-manager) [kni@cert-rhosp-02 ~]$ echo $PODS pod/fence-agents-remediation-controller-manager-5f76bb6467-l4q7c pod/fence-agents-remediation-controller-manager-5f76bb6467-nlm62 [kni@cert-rhosp-02 ~]$ for p in $PODS; do echo "== $p"; oc get "$p" -o json | jq .spec.nodeName; done == pod/fence-agents-remediation-controller-manager-5f76bb6467-l4q7c "worker-0-1" == pod/fence-agents-remediation-controller-manager-5f76bb6467-nlm62 "worker-0-2" [kni@cert-rhosp-02 ~]$ oc get nodes -l 'node-role.kubernetes.io/worker' NAME STATUS ROLES AGE VERSION worker-0-0 Ready worker 46h v1.29.14+c68a663 worker-0-1 Ready worker 46h v1.29.14+c68a663 worker-0-2 Ready worker 46h v1.29.14+c68a663 [kni@cert-rhosp-02 ~]$ oc get node worker-0-2 -o json | jq .metadata.annotations | grep baremetalhost "baremetalhost.metal3.io/detached": "test", [kni@cert-rhosp-02 ~]$ oc get secret | grep test-far test-far-shared Opaque 2 34h [kni@cert-rhosp-02 ~]$ vi test.yaml [kni@cert-rhosp-02 ~]$ cat test.yaml --- apiVersion: fence-agents-remediation.medik8s.io/v1alpha1 kind: FenceAgentsRemediation metadata: name: worker-0-2 namespace: openshift-workload-availability spec: agent: fence_ipmilan retrycount: 5 retryinterval: 10s timeout: 300s nodeparameters: '--ipport': master-0-0: '6230' master-0-1: '6231' master-0-2: '6232' worker-0-0: '6233' worker-0-1: '6234' worker-0-2: '6235' sharedparameters: '--action': reboot '--ip': 192.168.123.1 '--lanplus': '' sharedSecretName: test-far-shared [kni@cert-rhosp-02 ~]$ oc debug node/worker-0-2 -- chroot /host bash -c "uptime -s" Temporary namespace openshift-debug-xc5l4 is created for debugging node... Starting pod/worker-0-2-debug-sfkg4 ... To use host binaries, run `chroot /host` 2025-09-09 10:31:14 Removing debug pod ... Temporary namespace openshift-debug-xc5l4 was removed. [kni@cert-rhosp-02 ~]$ oc get far No resources found in openshift-workload-availability namespace. [kni@cert-rhosp-02 ~]$ oc apply -f test.yaml fenceagentsremediation.fence-agents-remediation.medik8s.io/worker-0-2 created [kni@cert-rhosp-02 ~]$ oc get nodes -l 'node-role.kubernetes.io/worker' NAME STATUS ROLES AGE VERSION worker-0-0 Ready worker 47h v1.29.14+c68a663 worker-0-1 Ready worker 47h v1.29.14+c68a663 worker-0-2 NotReady worker 47h v1.29.14+c68a663 [kni@cert-rhosp-02 ~]$ oc get nodes -l 'node-role.kubernetes.io/worker' NAME STATUS ROLES AGE VERSION worker-0-0 Ready worker 47h v1.29.14+c68a663 worker-0-1 Ready worker 47h v1.29.14+c68a663 worker-0-2 Ready worker 47h v1.29.14+c68a663 [kni@cert-rhosp-02 ~]$ oc get node worker-0-2 -o json | jq .metadata.annotations | grep baremetalhost "baremetalhost.metal3.io/detached": "test", [kni@cert-rhosp-02 ~]$ oc logs pod/fence-agents-remediation-controller-manager-5f76bb6467-l4q7c 2025-09-09T19:35:02.887298327Z INFO setup Go Version: go1.24.4 (Red Hat 1.24.4-2.el9) X:strictfipsruntime 2025-09-09T19:35:02.887425619Z INFO setup Go OS/Arch: linux/amd64 2025-09-09T19:35:02.887428468Z INFO setup Operator Version: bd73055e 2025-09-09T19:35:02.887431143Z INFO setup Git Commit: bd73055ef2c68bfdc865d2c54179f4448bd454da 2025-09-09T19:35:02.887433402Z INFO setup Build Date: 2025-09-08T09:09:10+00:00 2025-09-09T19:35:02.887469826Z INFO setup HTTP/2 for webhooks disabled 2025-09-09T19:35:02.897150308Z INFO validation out of service taint strategy {"isSupported": true, "k8sMajorVersion": 1, "k8sMinorVersion": 29} 2025-09-09T19:35:02.897178661Z INFO setup out-of-service taint is supported on this cluster 2025-09-09T19:35:02.897213298Z INFO controller-runtime.builder skip registering a mutating webhook, object does not implement admission.Defaulter or WithDefaulter wasn't called {"GVK": "fence-agents-remediation.medik8s.io/v1alpha1, Kind=FenceAgentsRemediation"} 2025-09-09T19:35:02.89724875Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "fence-agents-remediation.medik8s.io/v1alpha1, Kind=FenceAgentsRemediation", "path": "/validate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediation"} 2025-09-09T19:35:02.897319536Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediation"} 2025-09-09T19:35:02.89737696Z INFO controller-runtime.builder Registering a mutating webhook {"GVK": "fence-agents-remediation.medik8s.io/v1alpha1, Kind=FenceAgentsRemediationTemplate", "path": "/mutate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediationtemplate"} 2025-09-09T19:35:02.897410106Z INFO controller-runtime.webhook Registering webhook {"path": "/mutate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediationtemplate"} 2025-09-09T19:35:02.89745237Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "fence-agents-remediation.medik8s.io/v1alpha1, Kind=FenceAgentsRemediationTemplate", "path": "/validate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediationtemplate"} 2025-09-09T19:35:02.897491548Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediationtemplate"} 2025-09-09T19:35:02.89751426Z INFO setup starting manager 2025-09-09T19:35:02.897653542Z INFO controller-runtime.metrics Starting metrics server 2025-09-09T19:35:02.897697926Z INFO starting server {"name": "health probe", "addr": "[::]:8081"} 2025-09-09T19:35:02.897789596Z INFO controller-runtime.metrics Serving metrics server {"bindAddress": ":8080", "secure": false} 2025-09-09T19:35:02.897772299Z INFO controller-runtime.webhook Starting webhook server I0909 19:35:02.897848 1 leaderelection.go:257] attempting to acquire leader lease openshift-workload-availability/cb305759.medik8s.io... 2025-09-09T19:35:02.898090233Z INFO controller-runtime.certwatcher Updated current TLS certificate 2025-09-09T19:35:02.898183437Z INFO controller-runtime.webhook Serving webhook server {"host": "", "port": 9443} 2025-09-09T19:35:02.89835189Z INFO controller-runtime.certwatcher Starting certificate poll+watcher {"interval": "10s"} I0909 19:41:32.679395 1 leaderelection.go:271] successfully acquired lease openshift-workload-availability/cb305759.medik8s.io 2025-09-09T19:41:32.679433098Z DEBUG events fence-agents-remediation-controller-manager-5f76bb6467-l4q7c_4ef44b30-b503-458a-ba96-265f2a3ca12e became leader {"type": "Normal", "object": {"kind":"Lease","namespace":"openshift-workload-availability","name":"cb305759.medik8s.io","uid":"9bcd0029-2dec-4ea9-a827-779095b67b9d","apiVersion":"coordination.k8s.io/v1","resourceVersion":"1149011"}, "reason": "LeaderElection"} 2025-09-09T19:41:32.679665806Z INFO Starting EventSource {"controller": "fenceagentsremediation", "controllerGroup": "fence-agents-remediation.medik8s.io", "controllerKind": "FenceAgentsRemediation", "source": "kind source: *v1alpha1.FenceAgentsRemediation"} 2025-09-09T19:41:32.679701638Z INFO Starting Controller {"controller": "fenceagentsremediation", "controllerGroup": "fence-agents-remediation.medik8s.io", "controllerKind": "FenceAgentsRemediation"} 2025-09-09T19:41:32.781216242Z INFO Starting workers {"controller": "fenceagentsremediation", "controllerGroup": "fence-agents-remediation.medik8s.io", "controllerKind": "FenceAgentsRemediation", "worker count": 1} 2025-09-09T19:41:32.781345709Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-09T19:41:32.781375405Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-09T19:41:32.89032627Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-09T19:43:54.312921823Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-09T19:43:54.312966916Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-09T19:43:54.31301529Z INFO controllers.FenceAgentsRemediation CR's deletion timestamp is not zero, and FAR finalizer exists {"CR Name": "worker-0-0"} 2025-09-09T19:43:54.319131535Z INFO taints Taint was removed {"taint effect": "NoExecute", "taint list": null} 2025-09-09T19:43:54.319220614Z INFO controllers.FenceAgentsRemediation FAR remediation taint was removed {"Node Name": "worker-0-0"} 2025-09-09T19:43:54.319424655Z DEBUG events [remediation] Remediation taint was removed {"type": "Normal", "object": {"kind":"Node","name":"worker-0-0","uid":"6cad2ea6-5cf6-43bf-ac77-02602be4e92b","apiVersion":"v1","resourceVersion":"1149944"}, "reason": "RemoveRemediationTaint"} 2025-09-09T19:43:54.330151182Z INFO fenceagentsremediation-resource validate update {"name": "worker-0-0"} 2025-09-09T19:43:54.338217341Z INFO controllers.FenceAgentsRemediation Finalizer was removed {"CR Name": "worker-0-0"} 2025-09-09T19:43:54.338365367Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-09T19:43:54.338444548Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-09T19:43:54.338482816Z INFO controllers.FenceAgentsRemediation FenceAgentsRemediation CR was not found{"CR Name": "worker-0-0", "CR Namespace": "openshift-workload-availability"} 2025-09-09T19:43:54.338524122Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-09T19:43:54.338464501Z DEBUG events [remediation] Finalizer was removed {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-0","uid":"e46791fe-1974-4dec-9cf4-fe9e599ac3d5","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"1150036"}, "reason": "RemoveFinalizer"} 2025-09-09T19:44:11.627806229Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-09T19:44:11.627860419Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-09T19:44:11.63062204Z INFO fenceagentsremediation-resource validate update {"name": "worker-0-2"} 2025-09-09T19:44:11.633473925Z INFO controllers.FenceAgentsRemediation Finalizer was added {"CR Name": "worker-0-2"} 2025-09-09T19:44:11.633524095Z INFO controllers.FenceAgentsRemediation Updating Status Condition {"processingConditionStatus": "True", "fenceAgentActionSucceededConditionStatus": "Unknown", "succeededConditionStatus": "Unknown", "reason": "RemediationStarted", "LastUpdateTime": "2025-09-09T19:44:11.633522124Z"} 2025-09-09T19:44:11.633547112Z DEBUG events [remediation] Remediation started {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-2","uid":"6d7e5b15-94ec-4c93-926d-a6a5d3b7a4c5","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"1150205"}, "reason": "RemediationStarted"} 2025-09-09T19:44:11.633598197Z DEBUG events [remediation] Finalizer was added {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-2","uid":"6d7e5b15-94ec-4c93-926d-a6a5d3b7a4c5","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"1150205"}, "reason": "AddFinalizer"} 2025-09-09T19:44:11.639082634Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-09T19:44:11.639134669Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-09T19:44:11.639144388Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-09T19:44:11.644151172Z INFO taints Taint was added {"taint effect": "NoExecute", "taint list": [{"key":"medik8s.io/fence-agents-remediation","effect":"NoExecute","timeAdded":"2025-09-09T19:44:11Z"}]} 2025-09-09T19:44:11.644177468Z INFO controllers.FenceAgentsRemediation FAR remediation taint was added {"Node Name": "worker-0-2"} 2025-09-09T19:44:11.644204106Z INFO controllers.FenceAgentsRemediation Build fence agent command line {"Fence Agent": "fence_ipmilan", "Node Name": "worker-0-2"} 2025-09-09T19:44:11.644263823Z DEBUG events [remediation] Remediation taint was added {"type": "Normal", "object": {"kind":"Node","name":"worker-0-2","uid":"dd6cacd3-cb5f-4f66-b1f8-144000228ef0","apiVersion":"v1","resourceVersion":"1150009"}, "reason": "AddRemediationTaint"} 2025-09-09T19:44:11.776138142Z INFO controllers.FenceAgentsRemediation found a value from secret {"secret name": "test-far-shared", "parameter name": "--username"} 2025-09-09T19:44:11.776182518Z INFO controllers.FenceAgentsRemediation found a value from secret {"secret name": "test-far-shared", "parameter name": "--password"} 2025-09-09T19:44:11.776244147Z INFO controllers.FenceAgentsRemediation Execute the fence agent {"Fence Agent": "fence_ipmilan", "Node Name": "worker-0-2", "FAR uid": "6d7e5b15-94ec-4c93-926d-a6a5d3b7a4c5", "Parameters": ["--username","--password","--lanplus","--action","--ip","--ipport"]} 2025-09-09T19:44:11.776631472Z INFO executer fence agent start {"uid": "6d7e5b15-94ec-4c93-926d-a6a5d3b7a4c5", "fence_agent": "fence_ipmilan", "retryCount": 5, "retryInterval": "10s", "timeout": "5m0s"} 2025-09-09T19:44:11.77667392Z DEBUG events [remediation] Fence agent was executed {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-2","uid":"6d7e5b15-94ec-4c93-926d-a6a5d3b7a4c5","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"1150207"}, "reason": "FenceAgentExecuted"} 2025-09-09T19:44:11.788373242Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-09T19:44:11.788461472Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-09T19:44:11.788474582Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-09T19:44:11.788547762Z INFO controllers.FenceAgentsRemediation A Fence Agent is already running {"Fence Agent": "fence_ipmilan", "Node Name": "worker-0-2", "FAR uid": "6d7e5b15-94ec-4c93-926d-a6a5d3b7a4c5"} 2025-09-09T19:44:11.807380445Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-09T19:44:16.813791549Z INFO executer command completed {"uid": "6d7e5b15-94ec-4c93-926d-a6a5d3b7a4c5", "response": "Success: Rebooted\n", "errMessage": "", "err": null} 2025-09-09T19:44:16.813855804Z INFO executer fence agent done {"uid": "6d7e5b15-94ec-4c93-926d-a6a5d3b7a4c5", "fence_agent": "fence_ipmilan", "stdout": "Success: Rebooted\n", "stderr": "", "err": null} 2025-09-09T19:44:16.813861757Z INFO executer updating status {"FAR uid": "6d7e5b15-94ec-4c93-926d-a6a5d3b7a4c5"} 2025-09-09T19:44:16.81394107Z INFO executer Updating Status Condition {"processingConditionStatus": "", "fenceAgentActionSucceededConditionStatus": "True", "succeededConditionStatus": "", "reason": "FenceAgentSucceeded", "LastUpdateTime": "2025-09-09T19:44:16.813940123Z"} 2025-09-09T19:44:16.814015869Z DEBUG events [remediation] Fence agent was succeeded {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-2","uid":"6d7e5b15-94ec-4c93-926d-a6a5d3b7a4c5","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"1150207"}, "reason": "FenceAgentSucceeded"} 2025-09-09T19:44:16.820047012Z INFO executer status updated {"FAR uid": "6d7e5b15-94ec-4c93-926d-a6a5d3b7a4c5"} 2025-09-09T19:44:16.82022044Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-09T19:44:16.820267412Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-09T19:44:16.820320788Z INFO controllers.FenceAgentsRemediation Remediation strategy is ResourceDeletion which explicitly deletes resources - manually deleting workload {"Node Name": "worker-0-2"} 2025-09-09T19:44:16.82063673Z DEBUG events [remediation] Manually delete pods from the unhealthy node {"type": "Normal", "object": {"kind":"Node","name":"worker-0-2","uid":"dd6cacd3-cb5f-4f66-b1f8-144000228ef0","apiVersion":"v1","resourceVersion":"1150209"}, "reason": "DeleteResources"} 2025-09-09T19:44:16.921821168Z INFO commons-resource starting to delete pods {"node name": "worker-0-2"} 2025-09-09T19:44:19.125138014Z INFO commons-resource done deleting pods {"node name": "worker-0-2"} 2025-09-09T19:44:19.125173726Z INFO controllers.FenceAgentsRemediation Updating Status Condition {"processingConditionStatus": "False", "fenceAgentActionSucceededConditionStatus": "", "succeededConditionStatus": "True", "reason": "RemediationFinishedSuccessfully", "LastUpdateTime": "2025-09-09T19:44:19.125172196Z"} 2025-09-09T19:44:19.1251912Z INFO executer cancelling fence agent routine {"uid": "6d7e5b15-94ec-4c93-926d-a6a5d3b7a4c5"} 2025-09-09T19:44:19.125224407Z INFO controllers.FenceAgentsRemediation FenceAgentsRemediation CR has completed to remediate the node {"Node Name": "worker-0-2"} 2025-09-09T19:44:19.125350656Z DEBUG events [remediation] Unhealthy node remediation was completed {"type": "Normal", "object": {"kind":"Node","name":"worker-0-2","uid":"dd6cacd3-cb5f-4f66-b1f8-144000228ef0","apiVersion":"v1","resourceVersion":"1150209"}, "reason": "NodeRemediationCompleted"} 2025-09-09T19:44:19.125386449Z DEBUG events [remediation] Remediation finished {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-2","uid":"6d7e5b15-94ec-4c93-926d-a6a5d3b7a4c5","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"1150717"}, "reason": "RemediationFinished"} 2025-09-09T19:44:19.130893506Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-09T19:44:19.130964431Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-09T19:44:19.130972835Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-09T19:44:19.13706846Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile [kni@cert-rhosp-02 ~]$