[kni@cert-rhosp-02 ~]$ oc get clusterversion NAME VERSION AVAILABLE PROGRESSING SINCE STATUS version 4.16.0-0.nightly-2025-09-06-014223 True False 12h Cluster version is 4.16.0-0.nightly-2025-09-06-014223 [kni@cert-rhosp-02 ~]$ oc get csv NAME DISPLAY VERSION REPLACES PHASE fence-agents-remediation.v0.6.0 Fence Agents Remediation Operator 0.6.0 fence-agents-remediation.v0.5.1 Succeeded node-healthcheck-operator.v0.10.0 Node Health Check Operator 0.10.0 node-healthcheck-operator.v0.9.1 Succeeded [kni@cert-rhosp-02 ~]$ PODS=$(oc get pods -o name | grep fence-agents-remediation-controller-manager) [kni@cert-rhosp-02 ~]$ for p in $PODS; do > echo "== $p" > oc get "$p" -o json | jq .spec.nodeName > done == pod/fence-agents-remediation-controller-manager-6577bdfc69-7t6dx "worker-0-2" == pod/fence-agents-remediation-controller-manager-6577bdfc69-q87rc "worker-0-0" [kni@cert-rhosp-02 ~]$ oc get fartemplate -o yaml apiVersion: v1 items: [] kind: List metadata: resourceVersion: "" [kni@cert-rhosp-02 ~]$ oc get secret | grep test-far [kni@cert-rhosp-02 ~]$ cat shared_secret_names.yaml apiVersion: v1 kind: Secret stringData: '--password': password '--username': admin metadata: name: test-far-shared namespace: openshift-workload-availability type: Opaque [kni@cert-rhosp-02 ~]$ oc apply -f shared_secret_names.yaml secret/test-far-shared created [kni@cert-rhosp-02 ~]$ oc get secret | grep test-far test-far-shared Opaque 2 6s [kni@cert-rhosp-02 ~]$ oc get nhc -o yaml apiVersion: v1 items: [] kind: List metadata: resourceVersion: "" [kni@cert-rhosp-02 ~]$ oc get far No resources found in openshift-workload-availability namespace. [kni@cert-rhosp-02 ~]$ oc get pods | grep fence fence-agents-remediation-controller-manager-6577bdfc69-7t6dx 2/2 Running 0 5m54s fence-agents-remediation-controller-manager-6577bdfc69-q87rc 2/2 Running 0 2m47s [kni@cert-rhosp-02 ~]$ oc get nodes -l 'node-role.kubernetes.io/worker' NAME STATUS ROLES AGE VERSION worker-0-0 Ready worker 12h v1.29.14+c68a663 worker-0-1 Ready worker 12h v1.29.14+c68a663 worker-0-2 Ready worker 12h v1.29.14+c68a663 [kni@cert-rhosp-02 ~]$ oc debug node/worker-0-1 -- chroot /host bash -c "systemctl stop kubelet" Temporary namespace openshift-debug-f6pfv is created for debugging node... Starting pod/worker-0-1-debug-n9rw5 ... To use host binaries, run `chroot /host` [kni@cert-rhosp-02 ~]$ oc get nodes -l 'node-role.kubernetes.io/worker' NAME STATUS ROLES AGE VERSION worker-0-0 Ready worker 12h v1.29.14+c68a663 worker-0-1 NotReady worker 12h v1.29.14+c68a663 worker-0-2 Ready worker 12h v1.29.14+c68a663 [kni@cert-rhosp-02 ~]$ vi shared_secret_names.yaml [kni@cert-rhosp-02 ~]$ cat shared_secret_names.yaml apiVersion: v1 kind: Secret stringData: '--password': password '--username': admin metadata: name: test-far-shared namespace: openshift-workload-availability type: Opaque --- apiVersion: fence-agents-remediation.medik8s.io/v1alpha1 kind: FenceAgentsRemediation metadata: name: worker-0-1 namespace: openshift-workload-availability spec: agent: fence_ipmilan retrycount: 5 retryinterval: 10s timeout: 300s nodeparameters: '--ipport': master-0-0: '6230' master-0-1: '6231' master-0-2: '6232' worker-0-0: '6233' worker-0-1: '6234' worker-0-2: '6235' sharedparameters: '--action': reboot '--ip': 192.168.123.1 '--lanplus': '' sharedSecretName: test-far-shared [kni@cert-rhosp-02 ~]$ oc apply -f shared_secret_names.yaml secret/test-far-shared configured fenceagentsremediation.fence-agents-remediation.medik8s.io/worker-0-1 created [kni@cert-rhosp-02 ~]$ oc get far -o yaml apiVersion: v1 items: - apiVersion: fence-agents-remediation.medik8s.io/v1alpha1 kind: FenceAgentsRemediation metadata: annotations: kubectl.kubernetes.io/last-applied-configuration: | {"apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","kind":"FenceAgentsRemediation","metadata":{"annotations":{},"name":"worker-0-1","namespace":"openshift-workload-availability"},"spec":{"agent":"fence_ipmilan","nodeparameters":{"--ipport":{"master-0-0":"6230","master-0-1":"6231","master-0-2":"6232","worker-0-0":"6233","worker-0-1":"6234","worker-0-2":"6235"}},"retrycount":5,"retryinterval":"10s","sharedSecretName":"test-far-shared","sharedparameters":{"--action":"reboot","--ip":"192.168.123.1","--lanplus":""},"timeout":"300s"}} creationTimestamp: "2025-09-08T09:33:52Z" finalizers: - fence-agents-remediation.medik8s.io/far-finalizer generation: 2 name: worker-0-1 namespace: openshift-workload-availability resourceVersion: "326028" uid: 66f5fece-0890-480c-9f3f-3158df28a73a spec: agent: fence_ipmilan nodeparameters: --ipport: master-0-0: "6230" master-0-1: "6231" master-0-2: "6232" worker-0-0: "6233" worker-0-1: "6234" worker-0-2: "6235" remediationStrategy: ResourceDeletion retrycount: 5 retryinterval: 10s sharedSecretName: test-far-shared sharedparameters: --action: reboot --ip: 192.168.123.1 --lanplus: "" timeout: 5m0s status: conditions: - lastTransitionTime: "2025-09-08T09:34:00Z" message: The unhealthy node was fully remediated (it was tainted, fenced using the fence agent and all the node resources have been deleted) reason: RemediationFinishedSuccessfully status: "False" type: Processing - lastTransitionTime: "2025-09-08T09:33:57Z" message: FAR taint was added and the fence agent command has been created and executed successfully reason: FenceAgentSucceeded status: "True" type: FenceAgentActionSucceeded - lastTransitionTime: "2025-09-08T09:34:00Z" message: The unhealthy node was fully remediated (it was tainted, fenced using the fence agent and all the node resources have been deleted) reason: RemediationFinishedSuccessfully status: "True" type: Succeeded lastUpdateTime: "2025-09-08T09:34:00Z" kind: List metadata: resourceVersion: "" [kni@cert-rhosp-02 ~]$ oc get nodes -l 'node-role.kubernetes.io/worker' NAME STATUS ROLES AGE VERSION worker-0-0 Ready worker 12h v1.29.14+c68a663 worker-0-1 Ready worker 12h v1.29.14+c68a663 worker-0-2 Ready worker 12h v1.29.14+c68a663 [kni@cert-rhosp-02 ~]$ oc logs pod/fence-agents-remediation-controller-manager-6577bdfc69-7t6dx 2025-09-08T09:26:08.879294082Z INFO setup Go Version: go1.24.4 (Red Hat 1.24.4-2.el9) X:strictfipsruntime 2025-09-08T09:26:08.879380082Z INFO setup Go OS/Arch: linux/amd64 2025-09-08T09:26:08.879383339Z INFO setup Operator Version: 6cd59c9a 2025-09-08T09:26:08.879385138Z INFO setup Git Commit: 6cd59c9a8b6d88630f8d80303ac285e5334e0b41 2025-09-08T09:26:08.879387051Z INFO setup Build Date: 2025-09-01T19:15:36+00:00 2025-09-08T09:26:08.879401604Z INFO setup HTTP/2 for webhooks disabled 2025-09-08T09:26:08.886901422Z INFO validation out of service taint strategy {"isSupported": true, "k8sMajorVersion": 1, "k8sMinorVersion": 29} 2025-09-08T09:26:08.886925633Z INFO setup out-of-service taint is supported on this cluster 2025-09-08T09:26:08.886964967Z INFO controller-runtime.builder skip registering a mutating webhook, object does not implement admission.Defaulter or WithDefaulter wasn't called {"GVK": "fence-agents-remediation.medik8s.io/v1alpha1, Kind=FenceAgentsRemediation"} 2025-09-08T09:26:08.88699517Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "fence-agents-remediation.medik8s.io/v1alpha1, Kind=FenceAgentsRemediation", "path": "/validate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediation"} 2025-09-08T09:26:08.887062588Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediation"} 2025-09-08T09:26:08.887120912Z INFO controller-runtime.builder Registering a mutating webhook {"GVK": "fence-agents-remediation.medik8s.io/v1alpha1, Kind=FenceAgentsRemediationTemplate", "path": "/mutate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediationtemplate"} 2025-09-08T09:26:08.887151602Z INFO controller-runtime.webhook Registering webhook {"path": "/mutate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediationtemplate"} 2025-09-08T09:26:08.887180227Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "fence-agents-remediation.medik8s.io/v1alpha1, Kind=FenceAgentsRemediationTemplate", "path": "/validate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediationtemplate"} 2025-09-08T09:26:08.88720325Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediationtemplate"} 2025-09-08T09:26:08.887221407Z INFO setup starting manager 2025-09-08T09:26:08.887318065Z INFO controller-runtime.metrics Starting metrics server 2025-09-08T09:26:08.887356408Z INFO starting server {"name": "health probe", "addr": "[::]:8081"} 2025-09-08T09:26:08.887390186Z INFO controller-runtime.metrics Serving metrics server {"bindAddress": ":8080", "secure": false} 2025-09-08T09:26:08.88740773Z INFO controller-runtime.webhook Starting webhook server I0908 09:26:08.887443 1 leaderelection.go:257] attempting to acquire leader lease openshift-workload-availability/cb305759.medik8s.io... 2025-09-08T09:26:08.887649602Z INFO controller-runtime.certwatcher Updated current TLS certificate 2025-09-08T09:26:08.887714639Z INFO controller-runtime.webhook Serving webhook server {"host": "", "port": 9443} 2025-09-08T09:26:08.887768807Z INFO controller-runtime.certwatcher Starting certificate poll+watcher {"interval": "10s"} I0908 09:29:26.490472 1 leaderelection.go:271] successfully acquired lease openshift-workload-availability/cb305759.medik8s.io 2025-09-08T09:29:26.490519812Z DEBUG events fence-agents-remediation-controller-manager-6577bdfc69-7t6dx_b99edc4d-7184-443e-8f60-d6bf62c4bb3f became leader {"type": "Normal", "object": {"kind":"Lease","namespace":"openshift-workload-availability","name":"cb305759.medik8s.io","uid":"9bcd0029-2dec-4ea9-a827-779095b67b9d","apiVersion":"coordination.k8s.io/v1","resourceVersion":"324036"}, "reason": "LeaderElection"} 2025-09-08T09:29:26.490688122Z INFO Starting EventSource {"controller": "fenceagentsremediation", "controllerGroup": "fence-agents-remediation.medik8s.io", "controllerKind": "FenceAgentsRemediation", "source": "kind source: *v1alpha1.FenceAgentsRemediation"} 2025-09-08T09:29:26.490712168Z INFO Starting Controller {"controller": "fenceagentsremediation", "controllerGroup": "fence-agents-remediation.medik8s.io", "controllerKind": "FenceAgentsRemediation"} 2025-09-08T09:29:26.592542235Z INFO Starting workers {"controller": "fenceagentsremediation", "controllerGroup": "fence-agents-remediation.medik8s.io", "controllerKind": "FenceAgentsRemediation", "worker count": 1} 2025-09-08T09:33:52.479028164Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-08T09:33:52.479066074Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-08T09:33:52.588260883Z INFO fenceagentsremediation-resource validate update {"name": "worker-0-1"} 2025-09-08T09:33:52.591672238Z INFO controllers.FenceAgentsRemediation Finalizer was added {"CR Name": "worker-0-1"} 2025-09-08T09:33:52.591707159Z DEBUG events [remediation] Remediation started {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-1","uid":"66f5fece-0890-480c-9f3f-3158df28a73a","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"325758"}, "reason": "RemediationStarted"} 2025-09-08T09:33:52.591758162Z INFO controllers.FenceAgentsRemediation Updating Status Condition {"processingConditionStatus": "True", "fenceAgentActionSucceededConditionStatus": "Unknown", "succeededConditionStatus": "Unknown", "reason": "RemediationStarted", "LastUpdateTime": "2025-09-08T09:33:52.59175667Z"} 2025-09-08T09:33:52.591877877Z DEBUG events [remediation] Finalizer was added {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-1","uid":"66f5fece-0890-480c-9f3f-3158df28a73a","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"325758"}, "reason": "AddFinalizer"} 2025-09-08T09:33:52.797330555Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-08T09:33:52.797406295Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-08T09:33:52.797419316Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-08T09:33:52.804753158Z INFO taints Taint was added {"taint effect": "NoExecute", "taint list": [{"key":"node.kubernetes.io/unreachable","effect":"NoSchedule","timeAdded":"2025-09-08T09:18:25Z"},{"key":"node.kubernetes.io/unreachable","effect":"NoExecute","timeAdded":"2025-09-08T09:18:31Z"},{"key":"medik8s.io/fence-agents-remediation","effect":"NoExecute","timeAdded":"2025-09-08T09:33:52Z"}]} 2025-09-08T09:33:52.804952745Z INFO controllers.FenceAgentsRemediation FAR remediation taint was added {"Node Name": "worker-0-1"} 2025-09-08T09:33:52.805013558Z INFO controllers.FenceAgentsRemediation Build fence agent command line {"Fence Agent": "fence_ipmilan", "Node Name": "worker-0-1"} 2025-09-08T09:33:52.805081681Z DEBUG events [remediation] Remediation taint was added {"type": "Normal", "object": {"kind":"Node","name":"worker-0-1","uid":"ba4e4add-a481-432f-bb60-35c8b0dbf33f","apiVersion":"v1","resourceVersion":"325115"}, "reason": "AddRemediationTaint"} 2025-09-08T09:33:52.905363649Z INFO controllers.FenceAgentsRemediation found a value from secret {"secret name": "test-far-shared", "parameter name": "--password"} 2025-09-08T09:33:52.905396819Z INFO controllers.FenceAgentsRemediation found a value from secret {"secret name": "test-far-shared", "parameter name": "--username"} 2025-09-08T09:33:52.905431843Z INFO controllers.FenceAgentsRemediation Execute the fence agent {"Fence Agent": "fence_ipmilan", "Node Name": "worker-0-1", "FAR uid": "66f5fece-0890-480c-9f3f-3158df28a73a", "ParametersError": "json: unsupported type: iter.Seq[github.com/medik8s/fence-agents-remediation/api/v1alpha1.ParameterName]"} 2025-09-08T09:33:52.905544198Z INFO executer fence agent start {"uid": "66f5fece-0890-480c-9f3f-3158df28a73a", "fence_agent": "fence_ipmilan", "retryCount": 5, "retryInterval": "10s", "timeout": "5m0s"} 2025-09-08T09:33:52.905591949Z DEBUG events [remediation] Fence agent was executed {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-1","uid":"66f5fece-0890-480c-9f3f-3158df28a73a","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"325760"}, "reason": "FenceAgentExecuted"} 2025-09-08T09:33:52.910761615Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-08T09:33:52.910810597Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-08T09:33:52.91082859Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-08T09:33:52.910873654Z INFO controllers.FenceAgentsRemediation A Fence Agent is already running {"Fence Agent": "fence_ipmilan", "Node Name": "worker-0-1", "FAR uid": "66f5fece-0890-480c-9f3f-3158df28a73a"} 2025-09-08T09:33:52.917102058Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-08T09:33:57.85527242Z INFO executer command completed {"uid": "66f5fece-0890-480c-9f3f-3158df28a73a", "response": "Success: Rebooted\n", "errMessage": "", "err": null} 2025-09-08T09:33:57.855379678Z INFO executer fence agent done {"uid": "66f5fece-0890-480c-9f3f-3158df28a73a", "fence_agent": "fence_ipmilan", "stdout": "Success: Rebooted\n", "stderr": "", "err": null} 2025-09-08T09:33:57.855396225Z INFO executer updating status {"FAR uid": "66f5fece-0890-480c-9f3f-3158df28a73a"} 2025-09-08T09:33:57.855468267Z INFO executer Updating Status Condition {"processingConditionStatus": "", "fenceAgentActionSucceededConditionStatus": "True", "succeededConditionStatus": "", "reason": "FenceAgentSucceeded", "LastUpdateTime": "2025-09-08T09:33:57.855467609Z"} 2025-09-08T09:33:57.855551101Z DEBUG events [remediation] Fence agent was succeeded {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-1","uid":"66f5fece-0890-480c-9f3f-3158df28a73a","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"325760"}, "reason": "FenceAgentSucceeded"} 2025-09-08T09:33:57.861203803Z INFO executer status updated {"FAR uid": "66f5fece-0890-480c-9f3f-3158df28a73a"} 2025-09-08T09:33:57.861476755Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-08T09:33:57.86149614Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-08T09:33:57.86155522Z INFO controllers.FenceAgentsRemediation Remediation strategy is ResourceDeletion which explicitly deletes resources - manually deleting workload {"Node Name": "worker-0-1"} 2025-09-08T09:33:57.861652499Z DEBUG events [remediation] Manually delete pods from the unhealthy node {"type": "Normal", "object": {"kind":"Node","name":"worker-0-1","uid":"ba4e4add-a481-432f-bb60-35c8b0dbf33f","apiVersion":"v1","resourceVersion":"325764"}, "reason": "DeleteResources"} 2025-09-08T09:33:57.96276103Z INFO commons-resource starting to delete pods {"node name": "worker-0-1"} 2025-09-08T09:34:00.116210912Z INFO commons-resource done deleting pods {"node name": "worker-0-1"} 2025-09-08T09:34:00.116249262Z INFO controllers.FenceAgentsRemediation Updating Status Condition {"processingConditionStatus": "False", "fenceAgentActionSucceededConditionStatus": "", "succeededConditionStatus": "True", "reason": "RemediationFinishedSuccessfully", "LastUpdateTime": "2025-09-08T09:34:00.116247891Z"} 2025-09-08T09:34:00.116266758Z INFO executer cancelling fence agent routine {"uid": "66f5fece-0890-480c-9f3f-3158df28a73a"} 2025-09-08T09:34:00.11627496Z INFO controllers.FenceAgentsRemediation FenceAgentsRemediation CR has completed to remediate the node {"Node Name": "worker-0-1"} 2025-09-08T09:34:00.116469305Z DEBUG events [remediation] Unhealthy node remediation was completed {"type": "Normal", "object": {"kind":"Node","name":"worker-0-1","uid":"ba4e4add-a481-432f-bb60-35c8b0dbf33f","apiVersion":"v1","resourceVersion":"325764"}, "reason": "NodeRemediationCompleted"} 2025-09-08T09:34:00.116493624Z DEBUG events [remediation] Remediation finished {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-1","uid":"66f5fece-0890-480c-9f3f-3158df28a73a","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"325829"}, "reason": "RemediationFinished"} 2025-09-08T09:34:00.321956968Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-08T09:34:00.322021313Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-08T09:34:00.322034189Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-08T09:34:00.328123111Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile