[kni@titan45 ~]$ oc project openshift-workload-availability Now using project "openshift-workload-availability" on server "https://api.ocp-edge-cluster-0.qe.lab.redhat.com:6443". [kni@titan45 ~]$ oc get clusterversion NAME VERSION AVAILABLE PROGRESSING SINCE STATUS version 4.19.0-0.nightly-2025-09-11-184658 True False 3h10m Cluster version is 4.19.0-0.nightly-2025-09-11-184658 [kni@titan45 ~]$ oc get csv NAME DISPLAY VERSION REPLACES PHASE fence-agents-remediation.v0.6.0 Fence Agents Remediation Operator 0.6.0 fence-agents-remediation.v0.5.1 Succeeded node-healthcheck-operator.v0.10.0 Node Health Check Operator 0.10.0 node-healthcheck-operator.v0.9.1 Succeeded self-node-remediation.v0.10.2 Self Node Remediation Operator 0.10.2 self-node-remediation.v0.10.1 Succeeded [kni@titan45 ~]$ PODS=$(oc get pods -o name -n openshift-workload-availability | grep fence-agents-remediation-controller-manager) [kni@titan45 ~]$ echo $PODS pod/fence-agents-remediation-controller-manager-7bfcd47b77-k2dzj pod/fence-agents-remediation-controller-manager-7bfcd47b77-vs4c9 [kni@titan45 ~]$ for p in $PODS; do > echo "== $p" > oc get "$p" -n openshift-workload-availability -o json | jq .spec.nodeName > done == pod/fence-agents-remediation-controller-manager-7bfcd47b77-k2dzj "worker-0-1" == pod/fence-agents-remediation-controller-manager-7bfcd47b77-vs4c9 "worker-0-0" [kni@titan45 ~]$ oc get nodes -l 'node-role.kubernetes.io/worker' NAME STATUS ROLES AGE VERSION worker-0-0 Ready worker 3h59m v1.32.8 worker-0-1 Ready worker 3h59m v1.32.8 worker-0-2 NotReady worker 3h59m v1.32.8 [kni@titan45 ~]$ vi test.yaml [kni@titan45 ~]$ cat test.yaml apiVersion: remediation.medik8s.io/v1alpha1 kind: NodeHealthCheck metadata: name: nhc-far-worker spec: maxUnhealthy: 1 remediationTemplate: apiVersion: fence-agents-remediation.medik8s.io/v1alpha1 kind: FenceAgentsRemediationTemplate name: fenceagentsremediationtemplate-test namespace: openshift-workload-availability selector: matchExpressions: - key: node-role.kubernetes.io/control-plane operator: DoesNotExist values: [] - key: node-role.kubernetes.io/master operator: DoesNotExist values: [] unhealthyConditions: - duration: 30s status: 'False' type: Ready - duration: 30s status: Unknown type: Ready --- apiVersion: fence-agents-remediation.medik8s.io/v1alpha1 kind: FenceAgentsRemediationTemplate metadata: name: fenceagentsremediationtemplate-test namespace: openshift-workload-availability spec: template: spec: agent: fence_ipmilan retrycount: 5 retryinterval: 10s timeout: 300s nodeparameters: '--ipport': master-0-0: '6230' master-0-1: '6231' master-0-2: '6232' worker-0-0: '6233' worker-0-1: '6234' worker-0-2: '6235' sharedparameters: '--action': reboot '--lanplus': '' '--ip': 192.168.123.1 sharedSecretName: test-far-shared [kni@titan45 ~]$ oc apply -f test.yaml nodehealthcheck.remediation.medik8s.io/nhc-far-worker unchanged fenceagentsremediationtemplate.fence-agents-remediation.medik8s.io/fenceagentsremediationtemplate-test created [kni@titan45 ~]$ oc get far -o yaml apiVersion: v1 items: - apiVersion: fence-agents-remediation.medik8s.io/v1alpha1 kind: FenceAgentsRemediation metadata: annotations: remediation.medik8s.io/node-name: worker-0-2 remediation.medik8s.io/template-name: fenceagentsremediationtemplate-test creationTimestamp: "2025-09-12T13:04:46Z" finalizers: - fence-agents-remediation.medik8s.io/far-finalizer generateName: worker-0-2- generation: 1 labels: app.kubernetes.io/part-of: node-healthcheck-controller name: worker-0-2-j9q9l namespace: openshift-workload-availability ownerReferences: - apiVersion: remediation.medik8s.io/v1alpha1 controller: false kind: NodeHealthCheck name: nhc-far-worker uid: d19d0666-38f2-4a86-bc2d-093898e81bd4 resourceVersion: "114925" uid: ddc8fd9f-c217-4ade-9c0e-6ff69585cca6 spec: agent: fence_ipmilan nodeparameters: --ipport: master-0-0: "6230" master-0-1: "6231" master-0-2: "6232" worker-0-0: "6233" worker-0-1: "6234" worker-0-2: "6235" remediationStrategy: ResourceDeletion retrycount: 5 retryinterval: 10s sharedSecretName: test-far-shared sharedparameters: --action: reboot --ip: 192.168.123.1 --lanplus: "" timeout: 5m0s status: conditions: - lastTransitionTime: "2025-09-12T13:04:46Z" message: FAR CR was found, its name matches one of the cluster nodes, and a finalizer was set to the CR reason: RemediationStarted status: "True" type: Processing - lastTransitionTime: "2025-09-12T13:04:46Z" message: FAR CR was found, its name matches one of the cluster nodes, and a finalizer was set to the CR reason: RemediationStarted status: Unknown type: FenceAgentActionSucceeded - lastTransitionTime: "2025-09-12T13:04:46Z" message: FAR CR was found, its name matches one of the cluster nodes, and a finalizer was set to the CR reason: RemediationStarted status: Unknown type: Succeeded lastUpdateTime: "2025-09-12T13:04:46Z" kind: List metadata: resourceVersion: "" [kni@titan45 ~]$ oc get nodes -l 'node-role.kubernetes.io/worker' NAME STATUS ROLES AGE VERSION worker-0-0 Ready worker 4h5m v1.32.8 worker-0-1 Ready worker 4h5m v1.32.8 worker-0-2 Ready worker 4h5m v1.32.8 [kni@titan45 ~]$ oc get pod/fence-agents-remediation-controller-manager-7bfcd47b77-vs4c9 -o yaml apiVersion: v1 kind: Pod metadata: annotations: alm-examples: |- [ { "apiVersion": "fence-agents-remediation.medik8s.io/v1alpha1", "kind": "FenceAgentsRemediation", "metadata": { "name": "worker-1" }, "spec": { "agent": "fence_ipmilan", "nodeSecretNames": { "master-0": "fence-agents-credentials-master0", "master-1": "fence-agents-credentials-master1", "master-2": "fence-agents-credentials-master2", "worker-0": "fence-agents-credentials-worker0", "worker-1": "fence-agents-credentials-worker1", "worker-2": "fence-agents-credentials-worker2" }, "nodeparameters": { "--ipport": { "master-0": "6230", "master-1": "6231", "master-2": "6232", "worker-0": "6233", "worker-1": "6234", "worker-2": "6235" } }, "remediationStrategy": "ResourceDeletion", "retrycount": 5, "retryinterval": "5s", "sharedSecretName": "fence-agents-credentials-shared", "sharedparameters": { "--action": "reboot", "--ip": "192.168.111.1", "--lanplus": "", "--username": "admin" }, "timeout": "60s" } }, { "apiVersion": "fence-agents-remediation.medik8s.io/v1alpha1", "kind": "FenceAgentsRemediationTemplate", "metadata": { "name": "fenceagentsremediationtemplate-default" }, "spec": { "template": { "spec": { "agent": "fence_ipmilan", "nodeSecretNames": { "master-0": "fence-agents-credentials-master0", "master-1": "fence-agents-credentials-master1", "master-2": "fence-agents-credentials-master2", "worker-0": "fence-agents-credentials-worker0", "worker-1": "fence-agents-credentials-worker1", "worker-2": "fence-agents-credentials-worker2" }, "nodeparameters": { "--ipport": { "master-0": "6230", "master-1": "6231", "master-2": "6232", "worker-0": "6233", "worker-1": "6234", "worker-2": "6235" } }, "remediationStrategy": "ResourceDeletion", "retrycount": 5, "retryinterval": "5s", "sharedSecretName": "fence-agents-credentials-shared", "sharedparameters": { "--action": "reboot", "--ip": "192.168.111.1", "--lanplus": "", "--username": "admin" }, "timeout": "60s" } } } } ] capabilities: Basic Install categories: OpenShift Optional containerImage: "" createdAt: "2025-09-08 09:19:00" description: Fence Agents Remediation Operator uses well-known agents to fence and remediate unhealthy nodes. The remediation includes rebooting the unhealthy node using a fence agent, and then evicting workloads from the unhealthy node. features.operators.openshift.io/cnf: "false" features.operators.openshift.io/cni: "false" features.operators.openshift.io/csi: "false" features.operators.openshift.io/disconnected: "true" features.operators.openshift.io/fips-compliant: "true" features.operators.openshift.io/proxy-aware: "false" features.operators.openshift.io/tls-profiles: "false" features.operators.openshift.io/token-auth-aws: "false" features.operators.openshift.io/token-auth-azure: "false" features.operators.openshift.io/token-auth-gcp: "false" k8s.ovn.org/pod-networks: '{"default":{"ip_addresses":["10.128.2.21/23"],"mac_address":"0a:58:0a:80:02:15","gateway_ips":["10.128.2.1"],"routes":[{"dest":"10.128.0.0/14","nextHop":"10.128.2.1"},{"dest":"172.30.0.0/16","nextHop":"10.128.2.1"},{"dest":"169.254.0.5/32","nextHop":"10.128.2.1"},{"dest":"100.64.0.0/16","nextHop":"10.128.2.1"}],"ip_address":"10.128.2.21/23","gateway_ip":"10.128.2.1","role":"primary"}}' k8s.v1.cni.cncf.io/network-status: |- [{ "name": "ovn-kubernetes", "interface": "eth0", "ips": [ "10.128.2.21" ], "mac": "0a:58:0a:80:02:15", "default": true, "dns": {} }] kubectl.kubernetes.io/default-container: manager olm.operatorGroup: node-healthcheck-operator-operatorgroup olm.operatorNamespace: openshift-workload-availability olm.skipRange: '>=0.5.0 <0.6.0' olm.targetNamespaces: "" olmcahash: 60cec09bb67d540196b69f2149d2ee59ca6f730d3827fa4dcd64dfbdd14f9cc1 openshift.io/scc: restricted-v2 operatorframework.io/properties: '{"properties":[{"type":"olm.gvk","value":{"group":"fence-agents-remediation.medik8s.io","kind":"FenceAgentsRemediation","version":"v1alpha1"}},{"type":"olm.gvk","value":{"group":"fence-agents-remediation.medik8s.io","kind":"FenceAgentsRemediationTemplate","version":"v1alpha1"}},{"type":"olm.package","value":{"packageName":"fence-agents-remediation","version":"0.6.0"}}]}' operatorframework.io/suggested-namespace: openshift-workload-availability operatorframework.io/suggested-namespace-template: '{"kind":"Namespace","apiVersion":"v1","metadata":{"name":"openshift-workload-availability","annotations":{"openshift.io/node-selector":""}}}' operators.openshift.io/valid-subscription: '["OpenShift Kubernetes Engine", "OpenShift Container Platform", "OpenShift Platform Plus"]' operators.operatorframework.io/builder: operator-sdk-v1.32.0 operators.operatorframework.io/project_layout: go.kubebuilder.io/v3 repository: https://github.com/medik8s/fence-agents-remediation seccomp.security.alpha.kubernetes.io/pod: runtime/default support: Red Hat creationTimestamp: "2025-09-12T12:53:07Z" generateName: fence-agents-remediation-controller-manager-7bfcd47b77- labels: app.kubernetes.io/name: fence-agents-remediation-operator control-plane: controller-manager pod-template-hash: 7bfcd47b77 name: fence-agents-remediation-controller-manager-7bfcd47b77-vs4c9 namespace: openshift-workload-availability ownerReferences: - apiVersion: apps/v1 blockOwnerDeletion: true controller: true kind: ReplicaSet name: fence-agents-remediation-controller-manager-7bfcd47b77 uid: 7e60e044-a20e-4e34-8ebf-1f21eb79a592 resourceVersion: "110581" uid: b5205fe5-68f6-4b2b-aec8-099f562f7cee spec: affinity: podAntiAffinity: preferredDuringSchedulingIgnoredDuringExecution: - podAffinityTerm: labelSelector: matchExpressions: - key: control-plane operator: In values: - controller-manager - key: app.kubernetes.io/name operator: In values: - fence-agents-remediation-operator topologyKey: kubernetes.io/hostname weight: 100 containers: - args: - --secure-listen-address=0.0.0.0:8443 - --http2-disable - --upstream=http://127.0.0.1:8080/ - --logtostderr=true - --v=0 env: - name: OPERATOR_CONDITION_NAME value: fence-agents-remediation.v0.6.0 image: registry.redhat.io/openshift4/ose-kube-rbac-proxy-rhel9@sha256:d37a6d10b0fa07370066a31fdaffe2ea553faf4e4e98be7fcef5ec40d62ffe29 imagePullPolicy: IfNotPresent name: kube-rbac-proxy ports: - containerPort: 8443 name: https protocol: TCP resources: limits: cpu: 500m memory: 128Mi requests: cpu: 5m memory: 64Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL runAsUser: 1000740000 terminationMessagePath: /dev/termination-log terminationMessagePolicy: File volumeMounts: - mountPath: /apiserver.local.config/certificates name: apiservice-cert - mountPath: /tmp/k8s-webhook-server/serving-certs name: webhook-cert - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-bd6g8 readOnly: true - args: - --health-probe-bind-address=:8081 - --metrics-bind-address=127.0.0.1:8080 - --leader-elect command: - /manager env: - name: DEPLOYMENT_NAMESPACE valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.namespace - name: OPERATOR_CONDITION_NAME value: fence-agents-remediation.v0.6.0 image: registry.redhat.io/workload-availability/fence-agents-remediation-rhel9-operator@sha256:eef1a298718650a22a3da4a07c140ce21b6f287d87af79801610317fa8f63d52 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 3 httpGet: path: /healthz port: 8081 scheme: HTTP initialDelaySeconds: 15 periodSeconds: 20 successThreshold: 1 timeoutSeconds: 1 name: manager readinessProbe: failureThreshold: 3 httpGet: path: /readyz port: 8081 scheme: HTTP initialDelaySeconds: 5 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 1 resources: limits: cpu: 500m memory: 512Mi requests: cpu: 10m memory: 64Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL runAsUser: 1000740000 terminationMessagePath: /dev/termination-log terminationMessagePolicy: File volumeMounts: - mountPath: /apiserver.local.config/certificates name: apiservice-cert - mountPath: /tmp/k8s-webhook-server/serving-certs name: webhook-cert - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-bd6g8 readOnly: true dnsPolicy: ClusterFirst enableServiceLinks: true imagePullSecrets: - name: fence-agents-remediation-controller-manager-dockercfg-95xpl nodeName: worker-0-0 preemptionPolicy: PreemptLowerPriority priority: 2000000000 priorityClassName: system-cluster-critical restartPolicy: Always schedulerName: default-scheduler securityContext: fsGroup: 1000740000 runAsNonRoot: true seLinuxOptions: level: s0:c27,c19 seccompProfile: type: RuntimeDefault serviceAccount: fence-agents-remediation-controller-manager serviceAccountName: fence-agents-remediation-controller-manager terminationGracePeriodSeconds: 10 tolerations: - effect: NoExecute key: node.kubernetes.io/not-ready operator: Exists tolerationSeconds: 300 - effect: NoExecute key: node.kubernetes.io/unreachable operator: Exists tolerationSeconds: 300 - effect: NoSchedule key: node.kubernetes.io/memory-pressure operator: Exists volumes: - name: apiservice-cert secret: defaultMode: 420 items: - key: tls.crt path: apiserver.crt - key: tls.key path: apiserver.key secretName: fence-agents-remediation-controller-manager-service-cert - name: webhook-cert secret: defaultMode: 420 items: - key: tls.crt path: tls.crt - key: tls.key path: tls.key secretName: fence-agents-remediation-controller-manager-service-cert - name: kube-api-access-bd6g8 projected: defaultMode: 420 sources: - serviceAccountToken: expirationSeconds: 3607 path: token - configMap: items: - key: ca.crt path: ca.crt name: kube-root-ca.crt - downwardAPI: items: - fieldRef: apiVersion: v1 fieldPath: metadata.namespace path: namespace - configMap: items: - key: service-ca.crt path: service-ca.crt name: openshift-service-ca.crt status: conditions: - lastProbeTime: null lastTransitionTime: "2025-09-12T12:53:08Z" status: "True" type: PodReadyToStartContainers - lastProbeTime: null lastTransitionTime: "2025-09-12T12:53:07Z" status: "True" type: Initialized - lastProbeTime: null lastTransitionTime: "2025-09-12T12:53:19Z" status: "True" type: Ready - lastProbeTime: null lastTransitionTime: "2025-09-12T12:53:19Z" status: "True" type: ContainersReady - lastProbeTime: null lastTransitionTime: "2025-09-12T12:53:07Z" status: "True" type: PodScheduled containerStatuses: - containerID: cri-o://371d31592ac78585f77701ebf227a4e9364c6f967fc40553e69761918d988fca image: registry.redhat.io/openshift4/ose-kube-rbac-proxy-rhel9@sha256:d37a6d10b0fa07370066a31fdaffe2ea553faf4e4e98be7fcef5ec40d62ffe29 imageID: registry.redhat.io/openshift4/ose-kube-rbac-proxy-rhel9@sha256:61bcf4eb286aed402e5db1c10efbce58816db1f67ef7c0464d201a9cb001a24e lastState: {} name: kube-rbac-proxy ready: true restartCount: 0 started: true state: running: startedAt: "2025-09-12T12:53:08Z" volumeMounts: - mountPath: /apiserver.local.config/certificates name: apiservice-cert - mountPath: /tmp/k8s-webhook-server/serving-certs name: webhook-cert - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-bd6g8 readOnly: true recursiveReadOnly: Disabled - containerID: cri-o://5617c1efbad0988ada22ec62992a4ee0a2a232f0367f069b31ade8c895541745 image: registry.redhat.io/workload-availability/fence-agents-remediation-rhel9-operator@sha256:eef1a298718650a22a3da4a07c140ce21b6f287d87af79801610317fa8f63d52 imageID: registry.redhat.io/workload-availability/fence-agents-remediation-rhel9-operator@sha256:eef1a298718650a22a3da4a07c140ce21b6f287d87af79801610317fa8f63d52 lastState: {} name: manager ready: true restartCount: 0 started: true state: running: startedAt: "2025-09-12T12:53:08Z" volumeMounts: - mountPath: /apiserver.local.config/certificates name: apiservice-cert - mountPath: /tmp/k8s-webhook-server/serving-certs name: webhook-cert - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-bd6g8 readOnly: true recursiveReadOnly: Disabled hostIP: 192.168.123.83 hostIPs: - ip: 192.168.123.83 phase: Running podIP: 10.128.2.21 podIPs: - ip: 10.128.2.21 qosClass: Burstable startTime: "2025-09-12T12:53:07Z" FAR logs: 2025-09-12T12:53:08.524544618Z INFO setup Go Version: go1.24.4 (Red Hat 1.24.4-2.el9) X:strictfipsruntime 2025-09-12T12:53:08.524858082Z INFO setup Go OS/Arch: linux/amd64 2025-09-12T12:53:08.524867645Z INFO setup Operator Version: bd73055e 2025-09-12T12:53:08.524899355Z INFO setup Git Commit: bd73055ef2c68bfdc865d2c54179f4448bd454da 2025-09-12T12:53:08.524908832Z INFO setup Build Date: 2025-09-08T09:09:10+00:00 2025-09-12T12:53:08.524964073Z INFO setup HTTP/2 for webhooks disabled 2025-09-12T12:53:08.59661479Z INFO validation out of service taint strategy {"isSupported": true, "k8sMajorVersion": 1, "k8sMinorVersion": 32} 2025-09-12T12:53:08.596918062Z INFO setup out-of-service taint is supported on this cluster 2025-09-12T12:53:08.59714752Z INFO controller-runtime.builder skip registering a mutating webhook, object does not implement admission.Defaulter or WithDefaulter wasn't called {"GVK": "fence-agents-remediation.medik8s.io/v1alpha1, Kind=FenceAgentsRemediation"} 2025-09-12T12:53:08.59731168Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "fence-agents-remediation.medik8s.io/v1alpha1, Kind=FenceAgentsRemediation", "path": "/validate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediation"} 2025-09-12T12:53:08.597674534Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediation"} 2025-09-12T12:53:08.597853421Z INFO controller-runtime.builder Registering a mutating webhook {"GVK": "fence-agents-remediation.medik8s.io/v1alpha1, Kind=FenceAgentsRemediationTemplate", "path": "/mutate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediationtemplate"} 2025-09-12T12:53:08.598187196Z INFO controller-runtime.webhook Registering webhook {"path": "/mutate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediationtemplate"} 2025-09-12T12:53:08.598294914Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "fence-agents-remediation.medik8s.io/v1alpha1, Kind=FenceAgentsRemediationTemplate", "path": "/validate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediationtemplate"} 2025-09-12T12:53:08.598448954Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediationtemplate"} 2025-09-12T12:53:08.598594664Z INFO setup starting manager 2025-09-12T12:53:08.599056239Z INFO controller-runtime.metrics Starting metrics server 2025-09-12T12:53:08.599266017Z INFO starting server {"name": "health probe", "addr": "[::]:8081"} 2025-09-12T12:53:08.599524825Z INFO controller-runtime.metrics Serving metrics server {"bindAddress": ":8080", "secure": false} 2025-09-12T12:53:08.599483554Z INFO controller-runtime.webhook Starting webhook server I0912 12:53:08.599866 1 leaderelection.go:257] attempting to acquire leader lease openshift-workload-availability/cb305759.medik8s.io... 2025-09-12T12:53:08.600499824Z INFO controller-runtime.certwatcher Updated current TLS certificate 2025-09-12T12:53:08.600706999Z INFO controller-runtime.webhook Serving webhook server {"host": "", "port": 9443} 2025-09-12T12:53:08.600948661Z INFO controller-runtime.certwatcher Starting certificate poll+watcher {"interval": "10s"} I0912 12:53:31.430420 1 leaderelection.go:271] successfully acquired lease openshift-workload-availability/cb305759.medik8s.io 2025-09-12T12:53:31.431268869Z DEBUG events fence-agents-remediation-controller-manager-7bfcd47b77-vs4c9_718e85aa-6178-494b-a22b-4c07a7110f26 became leader {"type": "Normal", "object": {"kind":"Lease","namespace":"openshift-workload-availability","name":"cb305759.medik8s.io","uid":"abd01c01-e4b5-4cf6-92c8-9d02571ac460","apiVersion":"coordination.k8s.io/v1","resourceVersion":"110668"}, "reason": "LeaderElection"} 2025-09-12T12:53:31.432312416Z INFO Starting EventSource {"controller": "fenceagentsremediation", "controllerGroup": "fence-agents-remediation.medik8s.io", "controllerKind": "FenceAgentsRemediation", "source": "kind source: *v1alpha1.FenceAgentsRemediation"} 2025-09-12T12:53:31.432375753Z INFO Starting Controller {"controller": "fenceagentsremediation", "controllerGroup": "fence-agents-remediation.medik8s.io", "controllerKind": "FenceAgentsRemediation"} 2025-09-12T12:53:31.539140902Z INFO Starting workers {"controller": "fenceagentsremediation", "controllerGroup": "fence-agents-remediation.medik8s.io", "controllerKind": "FenceAgentsRemediation", "worker count": 1} ... ... ... 2025-09-12T13:10:15.576848386Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-12T13:10:15.577076116Z INFO controllers.FenceAgentsRemediation CR's deletion timestamp is not zero, and FAR finalizer exists {"CR Name": "worker-0-2-j9q9l"} 2025-09-12T13:10:15.577126869Z INFO controllers.FenceAgentsRemediation FAR didn't finish remediate the node {"CR Name": "worker-0-2-j9q9l", "processing condition": "True", "fenceAgentActionSucceeded condition": "Unknown", "succeeded condition": "Unknown"} 2025-09-12T13:10:15.612967899Z INFO taints Taint was removed {"taint effect": "NoExecute", "taint list": [{"key":"node.kubernetes.io/unreachable","effect":"NoSchedule","timeAdded":"2025-09-12T12:41:04Z"},{"key":"node.kubernetes.io/unreachable","effect":"NoExecute","timeAdded":"2025-09-12T12:41:11Z"}]} 2025-09-12T13:10:15.613346967Z INFO controllers.FenceAgentsRemediation FAR remediation taint was removed {"Node Name": "worker-0-2"} 2025-09-12T13:10:15.614067346Z DEBUG events [remediation] Remediation taint was removed {"type": "Normal", "object": {"kind":"Node","name":"worker-0-2","uid":"33b1fdb5-b748-44b3-a34a-efe8e0fb4cc6","apiVersion":"v1","resourceVersion":"115897"}, "reason": "RemoveRemediationTaint"} 2025-09-12T13:10:15.668152427Z INFO fenceagentsremediation-resource validate update {"name": "worker-0-2-j9q9l"} 2025-09-12T13:10:15.693366558Z INFO controllers.FenceAgentsRemediation Finalizer was removed {"CR Name": "worker-0-2-j9q9l"} 2025-09-12T13:10:15.693813783Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-12T13:10:15.694028288Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-12T13:10:15.694222844Z INFO controllers.FenceAgentsRemediation FenceAgentsRemediation CR was not found {"CR Name": "worker-0-2-j9q9l", "CR Namespace": "openshift-workload-availability"} 2025-09-12T13:10:15.694259164Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-12T13:10:15.694216901Z DEBUG events [remediation] Finalizer was removed {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-2-j9q9l","uid":"ddc8fd9f-c217-4ade-9c0e-6ff69585cca6","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"116619"}, "reason": "RemoveFinalizer"} 2025-09-12T13:10:15.787477431Z INFO fenceagentsremediation-resource validate create {"name": "worker-0-2-t66k4"} 2025-09-12T13:10:15.809122558Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-12T13:10:15.809245392Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-12T13:10:15.828296264Z INFO fenceagentsremediation-resource validate update {"name": "worker-0-2-t66k4"} 2025-09-12T13:10:15.856531022Z INFO controllers.FenceAgentsRemediation Finalizer was added {"CR Name": "worker-0-2-t66k4"} 2025-09-12T13:10:15.856818926Z INFO controllers.FenceAgentsRemediation Updating Status Condition {"processingConditionStatus": "True", "fenceAgentActionSucceededConditionStatus": "Unknown", "succeededConditionStatus": "Unknown", "reason": "RemediationStarted", "LastUpdateTime": "2025-09-12T13:10:15.8568139Z"} 2025-09-12T13:10:15.85689691Z DEBUG events [remediation] Remediation started {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-2-t66k4","uid":"306b530d-e6a9-4c55-8274-c44a8e5681bd","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"116628"}, "reason": "RemediationStarted"} 2025-09-12T13:10:15.857166937Z DEBUG events [remediation] Finalizer was added {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-2-t66k4","uid":"306b530d-e6a9-4c55-8274-c44a8e5681bd","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"116628"}, "reason": "AddFinalizer"} 2025-09-12T13:10:15.880587342Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-12T13:10:15.881055423Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-12T13:10:15.881136274Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-12T13:10:15.917381174Z INFO taints Taint was added {"taint effect": "NoExecute", "taint list": [{"key":"node.kubernetes.io/unreachable","effect":"NoSchedule","timeAdded":"2025-09-12T12:41:04Z"},{"key":"node.kubernetes.io/unreachable","effect":"NoExecute","timeAdded":"2025-09-12T12:41:11Z"},{"key":"medik8s.io/fence-agents-remediation","effect":"NoExecute","timeAdded":"2025-09-12T13:10:15Z"}]} 2025-09-12T13:10:15.917542732Z INFO controllers.FenceAgentsRemediation FAR remediation taint was added {"Node Name": "worker-0-2"} 2025-09-12T13:10:15.917775573Z INFO controllers.FenceAgentsRemediation Build fence agent command line {"Fence Agent": "fence_ipmilan", "Node Name": "worker-0-2"} 2025-09-12T13:10:15.918075181Z INFO controllers.FenceAgentsRemediation found a value from secret {"secret name": "test-far-shared", "parameter name": "--password"} 2025-09-12T13:10:15.918106601Z INFO controllers.FenceAgentsRemediation found a value from secret {"secret name": "test-far-shared", "parameter name": "--username"} 2025-09-12T13:10:15.918116887Z DEBUG events [remediation] Remediation taint was added {"type": "Normal", "object": {"kind":"Node","name":"worker-0-2","uid":"33b1fdb5-b748-44b3-a34a-efe8e0fb4cc6","apiVersion":"v1","resourceVersion":"116620"}, "reason": "AddRemediationTaint"} 2025-09-12T13:10:15.918273015Z INFO controllers.FenceAgentsRemediation Execute the fence agent {"Fence Agent": "fence_ipmilan", "Node Name": "worker-0-2", "FAR uid": "306b530d-e6a9-4c55-8274-c44a8e5681bd", "Parameters": ["--action","--ip","--lanplus","--ipport","--username","--password"]} 2025-09-12T13:10:15.919247301Z DEBUG events [remediation] Fence agent was executed {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-2-t66k4","uid":"306b530d-e6a9-4c55-8274-c44a8e5681bd","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"116632"}, "reason": "FenceAgentExecuted"} 2025-09-12T13:10:15.918758943Z INFO executer fence agent start {"uid": "306b530d-e6a9-4c55-8274-c44a8e5681bd", "fence_agent": "fence_ipmilan", "retryCount": 5, "retryInterval": "10s", "timeout": "5m0s"} 2025-09-12T13:10:15.951930755Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-12T13:10:15.952144973Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-12T13:10:15.952192493Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-12T13:10:15.952345267Z INFO controllers.FenceAgentsRemediation A Fence Agent is already running {"Fence Agent": "fence_ipmilan", "Node Name": "worker-0-2", "FAR uid": "306b530d-e6a9-4c55-8274-c44a8e5681bd"} 2025-09-12T13:10:15.982234211Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-12T13:10:22.492073836Z INFO executer command completed {"uid": "306b530d-e6a9-4c55-8274-c44a8e5681bd", "response": "Success: Rebooted\n", "errMessage": "", "err": null} 2025-09-12T13:10:22.492440674Z INFO executer fence agent done {"uid": "306b530d-e6a9-4c55-8274-c44a8e5681bd", "fence_agent": "fence_ipmilan", "stdout": "Success: Rebooted\n", "stderr": "", "err": null} 2025-09-12T13:10:22.492570358Z INFO executer updating status {"FAR uid": "306b530d-e6a9-4c55-8274-c44a8e5681bd"} 2025-09-12T13:10:22.493113203Z INFO executer Updating Status Condition {"processingConditionStatus": "", "fenceAgentActionSucceededConditionStatus": "True", "succeededConditionStatus": "", "reason": "FenceAgentSucceeded", "LastUpdateTime": "2025-09-12T13:10:22.493106664Z"} 2025-09-12T13:10:22.493220191Z DEBUG events [remediation] Fence agent was succeeded {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-2-t66k4","uid":"306b530d-e6a9-4c55-8274-c44a8e5681bd","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"116632"}, "reason": "FenceAgentSucceeded"} 2025-09-12T13:10:22.514025745Z INFO executer status updated {"FAR uid": "306b530d-e6a9-4c55-8274-c44a8e5681bd"} 2025-09-12T13:10:22.514100801Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-12T13:10:22.514492303Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-12T13:10:22.514789234Z INFO controllers.FenceAgentsRemediation Remediation strategy is ResourceDeletion which explicitly deletes resources - manually deleting workload {"Node Name": "worker-0-2-t66k4"} 2025-09-12T13:10:22.515353293Z DEBUG events [remediation] Manually delete pods from the unhealthy node {"type": "Normal", "object": {"kind":"Node","name":"worker-0-2","uid":"33b1fdb5-b748-44b3-a34a-efe8e0fb4cc6","apiVersion":"v1","resourceVersion":"116634"}, "reason": "DeleteResources"} 2025-09-12T13:10:22.61739323Z INFO commons-resource starting to delete pods {"node name": "worker-0-2"} 2025-09-12T13:10:25.180021467Z INFO commons-resource done deleting pods {"node name": "worker-0-2"} 2025-09-12T13:10:25.180077956Z INFO controllers.FenceAgentsRemediation Updating Status Condition {"processingConditionStatus": "False", "fenceAgentActionSucceededConditionStatus": "", "succeededConditionStatus": "True", "reason": "RemediationFinishedSuccessfully", "LastUpdateTime": "2025-09-12T13:10:25.18007606Z"} 2025-09-12T13:10:25.18010578Z INFO executer cancelling fence agent routine {"uid": "306b530d-e6a9-4c55-8274-c44a8e5681bd"} 2025-09-12T13:10:25.180118975Z INFO controllers.FenceAgentsRemediation FenceAgentsRemediation CR has completed to remediate the node {"Node Name": "worker-0-2"} 2025-09-12T13:10:25.180486827Z DEBUG events [remediation] Unhealthy node remediation was completed {"type": "Normal", "object": {"kind":"Node","name":"worker-0-2","uid":"33b1fdb5-b748-44b3-a34a-efe8e0fb4cc6","apiVersion":"v1","resourceVersion":"116634"}, "reason": "NodeRemediationCompleted"} 2025-09-12T13:10:25.180553113Z DEBUG events [remediation] Remediation finished {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-2-t66k4","uid":"306b530d-e6a9-4c55-8274-c44a8e5681bd","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"116692"}, "reason": "RemediationFinished"} 2025-09-12T13:10:25.199722291Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-12T13:10:25.200094225Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-12T13:10:25.20022978Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-12T13:10:25.213959666Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-12T13:12:05.350404481Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-12T13:12:05.350557812Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-12T13:12:05.350689692Z INFO controllers.FenceAgentsRemediation CR's deletion timestamp is not zero, and FAR finalizer exists {"CR Name": "worker-0-2-t66k4"} 2025-09-12T13:12:05.418982797Z INFO taints Taint was removed {"taint effect": "NoExecute", "taint list": [{"key":"node.kubernetes.io/unreachable","effect":"NoExecute","timeAdded":"2025-09-12T12:41:11Z"}]} 2025-09-12T13:12:05.419142166Z INFO controllers.FenceAgentsRemediation FAR remediation taint was removed {"Node Name": "worker-0-2"} 2025-09-12T13:12:05.419455924Z DEBUG events [remediation] Remediation taint was removed {"type": "Normal", "object": {"kind":"Node","name":"worker-0-2","uid":"33b1fdb5-b748-44b3-a34a-efe8e0fb4cc6","apiVersion":"v1","resourceVersion":"117492"}, "reason": "RemoveRemediationTaint"} 2025-09-12T13:12:05.465068817Z INFO fenceagentsremediation-resource validate update {"name": "worker-0-2-t66k4"} 2025-09-12T13:12:05.501857425Z INFO controllers.FenceAgentsRemediation Finalizer was removed {"CR Name": "worker-0-2-t66k4"} 2025-09-12T13:12:05.502071344Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-12T13:12:05.502148837Z DEBUG events [remediation] Finalizer was removed {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-2-t66k4","uid":"306b530d-e6a9-4c55-8274-c44a8e5681bd","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"117503"}, "reason": "RemoveFinalizer"} 2025-09-12T13:12:05.502939927Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-12T13:12:05.503184958Z INFO controllers.FenceAgentsRemediation FenceAgentsRemediation CR was not found {"CR Name": "worker-0-2-t66k4", "CR Namespace": "openshift-workload-availability"} 2025-09-12T13:12:05.503315709Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile