[kni@cert-rhosp-02 ~]$ oc get clusterversion NAME VERSION AVAILABLE PROGRESSING SINCE STATUS version 4.16.0-0.nightly-2025-09-06-014223 True False 47h Cluster version is 4.16.0-0.nightly-2025-09-06-014223 [kni@cert-rhosp-02 ~]$ oc get csv NAME DISPLAY VERSION REPLACES PHASE fence-agents-remediation.v0.6.0 Fence Agents Remediation Operator 0.6.0 fence-agents-remediation.v0.5.1 Succeeded node-healthcheck-operator.v0.10.0 Node Health Check Operator 0.10.0 node-healthcheck-operator.v0.9.1 Succeeded [kni@cert-rhosp-02 ~]$ PODS=$(oc get pods -o name | grep fence-agents-remediation-controller-manager) [kni@cert-rhosp-02 ~]$ for p in $PODS; do > echo "== $p" > oc get "$p" -o json | jq .spec.nodeName > done == pod/fence-agents-remediation-controller-manager-5f76bb6467-hlp9l "worker-0-0" == pod/fence-agents-remediation-controller-manager-5f76bb6467-mtlp2 "worker-0-2" [kni@cert-rhosp-02 ~]$ oc get fartemplate -o yaml apiVersion: v1 items: [] kind: List metadata: resourceVersion: "" [kni@cert-rhosp-02 ~]$ oc get nhc -o yaml apiVersion: v1 items: [] kind: List metadata: resourceVersion: "" [kni@cert-rhosp-02 ~]$ oc get far No resources found in openshift-workload-availability namespace. [kni@cert-rhosp-02 ~]$ oc get nodes -l 'node-role.kubernetes.io/worker' NAME STATUS ROLES AGE VERSION worker-0-0 Ready worker 47h v1.29.14+c68a663 worker-0-1 Ready worker 47h v1.29.14+c68a663 worker-0-2 Ready worker 47h v1.29.14+c68a663 [kni@cert-rhosp-02 ~]$ oc debug node/worker-0-1 -- chroot /host bash -c "uptime -s" Temporary namespace openshift-debug-8vqfk is created for debugging node... Starting pod/worker-0-1-debug-mb26q ... To use host binaries, run `chroot /host` 2025-09-09 10:30:14 Removing debug pod ... Temporary namespace openshift-debug-8v [kni@cert-rhosp-02 ~]$ vi test.yaml [kni@cert-rhosp-02 ~]$ cat test.yaml apiVersion: fence-agents-remediation.medik8s.io/v1alpha1 kind: FenceAgentsRemediationTemplate metadata: name: fenceagentsremediationtemplate-test namespace: openshift-workload-availability spec: template: spec: agent: fence_ipmilan retrycount: 1 retryinterval: 1s timeout: 4s nodeparameters: '--ipport': master-0-0: '6230' master-0-1: '6231' master-0-2: '6232' worker-0-0: '6233' worker-0-1: '6234' worker-0-2: '6235' sharedparameters: '--action': reboot '--lanplus': '' '--ip': 192.168.123.1 '--password': password '--username': admin --- apiVersion: remediation.medik8s.io/v1alpha1 kind: NodeHealthCheck metadata: name: nhc-far-worker spec: minHealthy: 30% remediationTemplate: apiVersion: fence-agents-remediation.medik8s.io/v1alpha1 kind: FenceAgentsRemediationTemplate name: fenceagentsremediationtemplate-test namespace: openshift-workload-availability selector: matchExpressions: - key: node-role.kubernetes.io/control-plane operator: DoesNotExist values: [] - key: node-role.kubernetes.io/master operator: DoesNotExist values: [] unhealthyConditions: - duration: 30s status: 'False' type: Ready - duration: 30s status: Unknown type: Ready [kni@cert-rhosp-02 ~]$ oc apply -f test.yaml fenceagentsremediationtemplate.fence-agents-remediation.medik8s.io/fenceagentsremediationtemplate-test created nodehealthcheck.remediation.medik8s.io/nhc-far-worker created [kni@cert-rhosp-02 ~]$ oc get far No resources found in openshift-workload-availability namespace. [kni@cert-rhosp-02 ~]$ oc get nodes -l 'node-role.kubernetes.io/worker' NAME STATUS ROLES AGE VERSION worker-0-0 Ready worker 2d v1.29.14+c68a663 worker-0-1 NotReady worker 2d v1.29.14+c68a663 worker-0-2 Ready worker 2d v1.29.14+c68a663 [kni@cert-rhosp-02 ~]$ oc get far NAME AGE worker-0-1-m6kmh 20s [kni@cert-rhosp-02 ~]$ oc get far -o yaml apiVersion: v1 items: - apiVersion: fence-agents-remediation.medik8s.io/v1alpha1 kind: FenceAgentsRemediation metadata: annotations: remediation.medik8s.io/node-name: worker-0-1 remediation.medik8s.io/template-name: fenceagentsremediationtemplate-test creationTimestamp: "2025-09-09T21:09:46Z" finalizers: - fence-agents-remediation.medik8s.io/far-finalizer generateName: worker-0-1- generation: 1 labels: app.kubernetes.io/part-of: node-healthcheck-controller name: worker-0-1-m6kmh namespace: openshift-workload-availability ownerReferences: - apiVersion: remediation.medik8s.io/v1alpha1 controller: false kind: NodeHealthCheck name: nhc-far-worker uid: 689ab269-33d6-46b8-9d95-d328fb6a5102 resourceVersion: "1187804" uid: 83b80776-21e7-4aea-99e2-1079e8aab092 spec: agent: fence_ipmilan nodeparameters: --ipport: master-0-0: "6230" master-0-1: "6231" master-0-2: "6232" worker-0-0: "6233" worker-0-1: "6234" worker-0-2: "6235" remediationStrategy: ResourceDeletion retrycount: 1 retryinterval: 1s sharedSecretName: fence-agents-credentials-shared sharedparameters: --action: reboot --ip: 192.168.123.1 --lanplus: "" --password: password --username: admin timeout: 1s status: conditions: - lastTransitionTime: "2025-09-09T21:09:47Z" message: Fence agent command has failed reason: FenceAgentFailed status: "False" type: Processing - lastTransitionTime: "2025-09-09T21:09:47Z" message: Fence agent command has failed reason: FenceAgentFailed status: "False" type: FenceAgentActionSucceeded - lastTransitionTime: "2025-09-09T21:09:47Z" message: Fence agent command has failed reason: FenceAgentFailed status: "False" type: Succeeded lastUpdateTime: "2025-09-09T21:09:47Z" kind: List metadata: resourceVersion: "" [kni@cert-rhosp-02 ~]$ oc get far NAME AGE worker-0-1-m6kmh 85s [kni@cert-rhosp-02 ~]$ oc get nodes/worker-0-1 -o json | jq .spec.taints null [kni@cert-rhosp-02 ~]$ oc logs pod/fence-agents-remediation-controller-manager-5f76bb6467-hlp9l 2025-09-09T20:28:07.352575845Z INFO setup Go Version: go1.24.4 (Red Hat 1.24.4-2.el9) X:strictfipsruntime 2025-09-09T20:28:07.35281602Z INFO setup Go OS/Arch: linux/amd64 2025-09-09T20:28:07.352981984Z INFO setup Operator Version: bd73055e 2025-09-09T20:28:07.352994499Z INFO setup Git Commit: bd73055ef2c68bfdc865d2c54179f4448bd454da 2025-09-09T20:28:07.353004716Z INFO setup Build Date: 2025-09-08T09:09:10+00:00 2025-09-09T20:28:07.353033798Z INFO setup HTTP/2 for webhooks disabled 2025-09-09T20:28:07.361979594Z INFO validation out of service taint strategy {"isSupported": true, "k8sMajorVersion": 1, "k8sMinorVersion": 29} 2025-09-09T20:28:07.362010601Z INFO setup out-of-service taint is supported on this cluster 2025-09-09T20:28:07.362064806Z INFO controller-runtime.builder skip registering a mutating webhook, object does not implement admission.Defaulter or WithDefaulter wasn't called {"GVK": "fence-agents-remediation.medik8s.io/v1alpha1, Kind=FenceAgentsRemediation"} 2025-09-09T20:28:07.362102943Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "fence-agents-remediation.medik8s.io/v1alpha1, Kind=FenceAgentsRemediation", "path": "/validate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediation"} 2025-09-09T20:28:07.362188226Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediation"} 2025-09-09T20:28:07.362239702Z INFO controller-runtime.builder Registering a mutating webhook {"GVK": "fence-agents-remediation.medik8s.io/v1alpha1, Kind=FenceAgentsRemediationTemplate", "path": "/mutate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediationtemplate"} 2025-09-09T20:28:07.362287464Z INFO controller-runtime.webhook Registering webhook {"path": "/mutate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediationtemplate"} 2025-09-09T20:28:07.362308391Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "fence-agents-remediation.medik8s.io/v1alpha1, Kind=FenceAgentsRemediationTemplate", "path": "/validate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediationtemplate"} 2025-09-09T20:28:07.362343682Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediationtemplate"} 2025-09-09T20:28:07.362375072Z INFO setup starting manager 2025-09-09T20:28:07.36251047Z INFO controller-runtime.metrics Starting metrics server 2025-09-09T20:28:07.362543759Z INFO starting server {"name": "health probe", "addr": "[::]:8081"} 2025-09-09T20:28:07.362592386Z INFO controller-runtime.metrics Serving metrics server {"bindAddress": ":8080", "secure": false} 2025-09-09T20:28:07.362605697Z INFO controller-runtime.webhook Starting webhook server I0909 20:28:07.362718 1 leaderelection.go:257] attempting to acquire leader lease openshift-workload-availability/cb305759.medik8s.io... 2025-09-09T20:28:07.36281205Z INFO controller-runtime.certwatcher Updated current TLS certificate 2025-09-09T20:28:07.36287289Z INFO controller-runtime.webhook Serving webhook server {"host": "", "port": 9443} 2025-09-09T20:28:07.362998418Z INFO controller-runtime.certwatcher Starting certificate poll+watcher {"interval": "10s"} I0909 20:28:23.891456 1 leaderelection.go:271] successfully acquired lease openshift-workload-availability/cb305759.medik8s.io 2025-09-09T20:28:23.89150491Z DEBUG events fence-agents-remediation-controller-manager-5f76bb6467-hlp9l_a5626dc2-b9ec-4927-83d3-6ae052a59bcc became leader {"type": "Normal", "object": {"kind":"Lease","namespace":"openshift-workload-availability","name":"cb305759.medik8s.io","uid":"9bcd0029-2dec-4ea9-a827-779095b67b9d","apiVersion":"coordination.k8s.io/v1","resourceVersion":"1168775"}, "reason": "LeaderElection"} ... ... ... 2025-09-09T21:05:18.925667038Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-09T21:05:18.925692478Z INFO controllers.FenceAgentsRemediation FenceAgentsRemediation CR was not found{"CR Name": "worker-0-1-95lvh", "CR Namespace": "openshift-workload-availability"} 2025-09-09T21:05:18.925697327Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-09T21:05:18.925706534Z DEBUG events [remediation] Finalizer was removed {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-1-95lvh","uid":"e4fa030a-bec2-4ec5-b09e-a5ae12868bf8","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"1185106"}, "reason": "RemoveFinalizer"} 2025-09-09T21:08:22.160751746Z INFO fenceagentsremediationtemplate-resource default {"name": "fenceagentsremediationtemplate-test"} 2025-09-09T21:09:46.253631004Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-09T21:09:46.253674512Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-09T21:09:46.259611173Z INFO fenceagentsremediation-resource validate update {"name": "worker-0-1-m6kmh"} 2025-09-09T21:09:46.262519726Z INFO controllers.FenceAgentsRemediation Finalizer was added {"CR Name": "worker-0-1-m6kmh"} 2025-09-09T21:09:46.262542284Z INFO controllers.FenceAgentsRemediation Updating Status Condition {"processingConditionStatus": "True", "fenceAgentActionSucceededConditionStatus": "Unknown", "succeededConditionStatus": "Unknown", "reason": "RemediationStarted", "LastUpdateTime": "2025-09-09T21:09:46.262541332Z"} 2025-09-09T21:09:46.262704102Z DEBUG events [remediation] Remediation started {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-1-m6kmh","uid":"83b80776-21e7-4aea-99e2-1079e8aab092","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"1187763"}, "reason": "RemediationStarted"} 2025-09-09T21:09:46.262777989Z DEBUG events [remediation] Finalizer was added {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-1-m6kmh","uid":"83b80776-21e7-4aea-99e2-1079e8aab092","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"1187763"}, "reason": "AddFinalizer"} 2025-09-09T21:09:46.467405032Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-09T21:09:46.467477941Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-09T21:09:46.467490433Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-09T21:09:46.474731226Z INFO taints Taint was added {"taint effect": "NoExecute", "taint list": [{"key":"node.kubernetes.io/unreachable","effect":"NoSchedule","timeAdded":"2025-09-09T21:09:15Z"},{"key":"node.kubernetes.io/unreachable","effect":"NoExecute","timeAdded":"2025-09-09T21:09:20Z"},{"key":"medik8s.io/fence-agents-remediation","effect":"NoExecute","timeAdded":"2025-09-09T21:09:46Z"}]} 2025-09-09T21:09:46.474782475Z INFO controllers.FenceAgentsRemediation FAR remediation taint was added {"Node Name": "worker-0-1"} 2025-09-09T21:09:46.474824024Z INFO controllers.FenceAgentsRemediation Build fence agent command line {"Fence Agent": "fence_ipmilan", "Node Name": "worker-0-1"} 2025-09-09T21:09:46.474879716Z INFO controllers.FenceAgentsRemediation Execute the fence agent {"Fence Agent": "fence_ipmilan", "Node Name": "worker-0-1", "FAR uid": "83b80776-21e7-4aea-99e2-1079e8aab092", "Parameters": ["--password","--username","--action","--ip","--ipport","--lanplus"]} 2025-09-09T21:09:46.47528821Z DEBUG events [remediation] Remediation taint was added {"type": "Normal", "object": {"kind":"Node","name":"worker-0-1","uid":"ba4e4add-a481-432f-bb60-35c8b0dbf33f","apiVersion":"v1","resourceVersion":"1187565"}, "reason": "AddRemediationTaint"} 2025-09-09T21:09:46.475307656Z DEBUG events [remediation] Fence agent was executed {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-1-m6kmh","uid":"83b80776-21e7-4aea-99e2-1079e8aab092","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"1187764"}, "reason": "FenceAgentExecuted"} 2025-09-09T21:09:46.475327095Z INFO executer fence agent start {"uid": "83b80776-21e7-4aea-99e2-1079e8aab092", "fence_agent": "fence_ipmilan", "retryCount": 1, "retryInterval": "1s", "timeout": "1s"} 2025-09-09T21:09:46.481303761Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-09T21:09:46.481356855Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-09T21:09:46.48136574Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-09T21:09:46.481399064Z INFO controllers.FenceAgentsRemediation A Fence Agent is already running {"Fence Agent": "fence_ipmilan", "Node Name": "worker-0-1", "FAR uid": "83b80776-21e7-4aea-99e2-1079e8aab092"} 2025-09-09T21:09:46.491917063Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-09T21:09:47.476136886Z INFO executer command failed {"uid": "83b80776-21e7-4aea-99e2-1079e8aab092", "response": "", "errMessage": "", "err": "signal: killed"} 2025-09-09T21:09:47.476198288Z INFO executer fence agent done {"uid": "83b80776-21e7-4aea-99e2-1079e8aab092", "fence_agent": "fence_ipmilan", "stdout": "", "stderr": "", "err": "signal: killed"} 2025-09-09T21:09:47.476206344Z INFO executer fence agent context timed out 2025-09-09T21:09:47.476208619Z INFO executer updating status {"FAR uid": "83b80776-21e7-4aea-99e2-1079e8aab092"} 2025-09-09T21:09:47.476245736Z INFO executer Updating Status Condition {"processingConditionStatus": "False", "fenceAgentActionSucceededConditionStatus": "False", "succeededConditionStatus": "False", "reason": "FenceAgentFailed", "LastUpdateTime": "2025-09-09T21:09:47.476245018Z"} 2025-09-09T21:09:47.482232321Z INFO executer status updated {"FAR uid": "83b80776-21e7-4aea-99e2-1079e8aab092"} 2025-09-09T21:09:47.48228084Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-09T21:09:47.482398454Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-09T21:09:47.487676211Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-09T21:11:29.833429172Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-09T21:11:29.833480788Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-09T21:11:29.833503238Z INFO controllers.FenceAgentsRemediation CR's deletion timestamp is not zero, and FAR finalizer exists {"CR Name": "worker-0-1-m6kmh"} 2025-09-09T21:11:29.833506919Z INFO controllers.FenceAgentsRemediation FAR didn't finish remediate the node {"CR Name": "worker-0-1-m6kmh", "processing condition": "False", "fenceAgentActionSucceeded condition": "False", "succeeded condition": "False"} 2025-09-09T21:11:29.83351381Z INFO executer cancelling fence agent routine {"uid": "83b80776-21e7-4aea-99e2-1079e8aab092"} 2025-09-09T21:11:29.839696077Z INFO taints Taint was removed {"taint effect": "NoExecute", "taint list": [{"key":"node.kubernetes.io/unreachable","effect":"NoExecute","timeAdded":"2025-09-09T21:09:20Z"}]} 2025-09-09T21:11:29.839806144Z INFO controllers.FenceAgentsRemediation FAR remediation taint was removed {"Node Name": "worker-0-1"} 2025-09-09T21:11:29.840136228Z DEBUG events [remediation] Remediation taint was removed {"type": "Normal", "object": {"kind":"Node","name":"worker-0-1","uid":"ba4e4add-a481-432f-bb60-35c8b0dbf33f","apiVersion":"v1","resourceVersion":"1188528"}, "reason": "RemoveRemediationTaint"} 2025-09-09T21:11:29.847402385Z INFO fenceagentsremediation-resource validate update {"name": "worker-0-1-m6kmh"} 2025-09-09T21:11:29.852674508Z INFO controllers.FenceAgentsRemediation Finalizer was removed {"CR Name": "worker-0-1-m6kmh"} 2025-09-09T21:11:29.852735036Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-09T21:11:29.852906581Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-09T21:11:29.852933489Z INFO controllers.FenceAgentsRemediation FenceAgentsRemediation CR was not found{"CR Name": "worker-0-1-m6kmh", "CR Namespace": "openshift-workload-availability"} 2025-09-09T21:11:29.852940977Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-09T21:11:29.852764908Z DEBUG events [remediation] Finalizer was removed {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-1-m6kmh","uid":"83b80776-21e7-4aea-99e2-1079e8aab092","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"1188530"}, "reason": "RemoveFinalizer"} [kni@cert-rhosp-02 ~]$