[kni@cert-rhosp-02 ~]$ oc project openshift-workload-availability oc get clusterversion oc get csv PODS=$(oc get pods -o name | grep fence-agents-remediation-controller-manager) echo $PODS # PODS=$(oc get pods -o name | grep self-node) for p in $PODS; do echo "== $p" oc get "$p" -o json | jq .spec.nodeName done Already on project "openshift-workload-availability" on server "https://api.ocp-edge-cluster-0.qe.lab.redhat.com:6443". NAME VERSION AVAILABLE PROGRESSING SINCE STATUS version 4.18.0-0.nightly-2025-09-03-101304 True False 11h Cluster version is 4.18.0-0.nightly-2025-09-03-101304 NAME DISPLAY VERSION REPLACES PHASE fence-agents-remediation.v0.6.0 Fence Agents Remediation Operator 0.6.0 fence-agents-remediation.v0.5.1 Succeeded node-healthcheck-operator.v0.10.0 Node Health Check Operator 0.10.0 node-healthcheck-operator.v0.9.1 Succeeded pod/fence-agents-remediation-controller-manager-7cb55f4fcd-5b8f9 pod/fence-agents-remediation-controller-manager-7cb55f4fcd-hsdhk == pod/fence-agents-remediation-controller-manager-7cb55f4fcd-5b8f9 "worker-0-2" == pod/fence-agents-remediation-controller-manager-7cb55f4fcd-hsdhk "worker-0-0" [kni@cert-rhosp-02 ~]$ oc get bmh -n openshift-machine-api NAME STATE CONSUMER ONLINE ERROR AGE openshift-master-0-0 provisioned ocp-edge-cluster-0-9wrvs-master-0 true 12h openshift-master-0-1 provisioned ocp-edge-cluster-0-9wrvs-master-1 true 12h openshift-master-0-2 provisioned ocp-edge-cluster-0-9wrvs-master-2 true 12h openshift-worker-0-0 provisioned ocp-edge-cluster-0-9wrvs-worker-0-6z8jb true 12h openshift-worker-0-1 provisioned ocp-edge-cluster-0-9wrvs-worker-0-6vrd9 true 12h openshift-worker-0-2 provisioned ocp-edge-cluster-0-9wrvs-worker-0-qdr29 true 12h openshift-worker-0-3 provisioned ocp-edge-cluster-0-9wrvs-worker-0-pllch true 12h [kni@cert-rhosp-02 ~]$ oc get bmh openshift-worker-0-3 -n openshift-machine-api -o json | jq .metadata.annotations null [kni@cert-rhosp-02 ~]$ oc patch bmh openshift-worker-0-3 -n openshift-machine-api --type='merge' -p '{"metadata":{"annotations":{"baremetalhost.metal3.io/detached":"far-ipmi-off"}}}' baremetalhost.metal3.io/openshift-worker-0-3 patched [kni@cert-rhosp-02 ~]$ oc get bmh openshift-worker-0-3 -n openshift-machine-api -o json | jq .metadata.annotations { "baremetalhost.metal3.io/detached": "far-ipmi-off" } [kni@cert-rhosp-02 ~]$ oc debug node/worker-0-3 -- chroot /host bash -c "date & uptime -s" Temporary namespace openshift-debug-xdg6p is created for debugging node... Starting pod/worker-0-3-debug-p4c57 ... To use host binaries, run `chroot /host` Thu Sep 11 09:46:18 UTC 2025 2025-09-11 09:32:35 Removing debug pod ... Temporary namespace openshift-debug-xdg6p was removed. [kni@cert-rhosp-02 ~]$ oc debug node/worker-0-2 -- chroot /host bash -c "date & uptime -s" Temporary namespace openshift-debug-9dnn7 is created for debugging node... Starting pod/worker-0-2-debug-xftzv ... To use host binaries, run `chroot /host` Thu Sep 11 09:56:39 UTC 2025 2025-09-11 05:53:35 Removing debug pod ... Temporary namespace openshift-debug-9dnn7 was removed. [kni@cert-rhosp-02 ~]$ oc debug node/worker-0-1 -- chroot /host bash -c "date & uptime -s" Temporary namespace openshift-debug-295bm is created for debugging node... Starting pod/worker-0-1-debug-lwc2n ... To use host binaries, run `chroot /host` Thu Sep 11 09:56:52 UTC 2025 2025-09-10 21:20:20 Removing debug pod ... Temporary namespace openshift-debug-295bm was removed. [kni@cert-rhosp-02 ~]$ vi test.yaml [kni@cert-rhosp-02 ~]$ cat test.yaml apiVersion: v1 kind: Secret stringData: '--password': password '--username': admin metadata: name: test-far-shared namespace: openshift-workload-availability type: Opaque --- apiVersion: fence-agents-remediation.medik8s.io/v1alpha1 kind: FenceAgentsRemediationTemplate metadata: name: fenceagentsremediationtemplate-test namespace: openshift-workload-availability spec: template: spec: agent: fence_ipmilan retrycount: 5 retryinterval: 10s timeout: 300s nodeparameters: '--ipport': master-0-0: '6230' master-0-1: '6231' master-0-2: '6232' worker-0-0: '6233' worker-0-1: '6234' worker-0-2: '6235' worker-0-3: '6236' sharedparameters: '--action': reboot '--lanplus': '' '--ip': 192.168.123.1 sharedSecretName: test-far-shared --- apiVersion: remediation.medik8s.io/v1alpha1 kind: NodeHealthCheck metadata: name: nhc-far-worker spec: maxUnhealthy: 1 remediationTemplate: apiVersion: fence-agents-remediation.medik8s.io/v1alpha1 kind: FenceAgentsRemediationTemplate name: fenceagentsremediationtemplate-test namespace: openshift-workload-availability selector: matchExpressions: - key: node-role.kubernetes.io/control-plane operator: DoesNotExist values: [] - key: node-role.kubernetes.io/master operator: DoesNotExist values: [] unhealthyConditions: - duration: 30s status: 'False' type: Ready - duration: 30s status: Unknown type: Ready [kni@cert-rhosp-02 ~]$ oc get far No resources found in openshift-workload-availability namespace. [kni@cert-rhosp-02 ~]$ oc get fartemplate No resources found in openshift-workload-availability namespace. [kni@cert-rhosp-02 ~]$ oc get nhc No resources found [kni@cert-rhosp-02 ~]$ oc apply -f test.yaml secret/test-far-shared created fenceagentsremediationtemplate.fence-agents-remediation.medik8s.io/fenceagentsremediationtemplate-test created nodehealthcheck.remediation.medik8s.io/nhc-far-worker created [kni@cert-rhosp-02 ~]$ oc get nodes -l 'node-role.kubernetes.io/worker' NAME STATUS ROLES AGE VERSION worker-0-0 Ready worker 12h v1.31.11 worker-0-1 Ready worker 12h v1.31.11 worker-0-2 Ready worker 12h v1.31.11 worker-0-3 Ready worker 12h v1.31.11 [kni@cert-rhosp-02 ~]$ oc get nodes -l 'node-role.kubernetes.io/worker' NAME STATUS ROLES AGE VERSION worker-0-0 Ready worker 12h v1.31.11 worker-0-1 NotReady worker 12h v1.31.11 worker-0-2 NotReady worker 12h v1.31.11 worker-0-3 NotReady worker 12h v1.31.11 [kni@cert-rhosp-02 ~]$ oc get nhc -o json | jq .items[0].status.unhealthyNodes [ { "name": "worker-0-2" }, { "name": "worker-0-3" }, { "name": "worker-0-1" } ] [kni@cert-rhosp-02 ~]$ vi test.yaml [kni@cert-rhosp-02 ~]$ cat test.yaml --- apiVersion: remediation.medik8s.io/v1alpha1 kind: NodeHealthCheck metadata: name: nhc-far-worker spec: maxUnhealthy: 2 remediationTemplate: apiVersion: fence-agents-remediation.medik8s.io/v1alpha1 kind: FenceAgentsRemediationTemplate name: fenceagentsremediationtemplate-test namespace: openshift-workload-availability selector: matchExpressions: - key: node-role.kubernetes.io/control-plane operator: DoesNotExist values: [] - key: node-role.kubernetes.io/master operator: DoesNotExist values: [] unhealthyConditions: - duration: 30s status: 'False' type: Ready - duration: 30s status: Unknown type: Ready [kni@cert-rhosp-02 ~]$ oc apply -f test.yaml nodehealthcheck.remediation.medik8s.io/nhc-far-worker configured [kni@cert-rhosp-02 ~]$ vi test.yaml [kni@cert-rhosp-02 ~]$ cat test.yaml --- apiVersion: remediation.medik8s.io/v1alpha1 kind: NodeHealthCheck metadata: name: nhc-far-worker spec: maxUnhealthy: 3 remediationTemplate: apiVersion: fence-agents-remediation.medik8s.io/v1alpha1 kind: FenceAgentsRemediationTemplate name: fenceagentsremediationtemplate-test namespace: openshift-workload-availability selector: matchExpressions: - key: node-role.kubernetes.io/control-plane operator: DoesNotExist values: [] - key: node-role.kubernetes.io/master operator: DoesNotExist values: [] unhealthyConditions: - duration: 30s status: 'False' type: Ready - duration: 30s status: Unknown type: Ready [kni@cert-rhosp-02 ~]$ oc apply -f test.yaml nodehealthcheck.remediation.medik8s.io/nhc-far-worker configured [kni@cert-rhosp-02 ~]$ oc get far NAME AGE worker-0-1-6lcbd 15s worker-0-2-vtfhp 15s worker-0-3-vks87 14s [kni@cert-rhosp-02 ~]$ oc get far -o yaml apiVersion: v1 items: - apiVersion: fence-agents-remediation.medik8s.io/v1alpha1 kind: FenceAgentsRemediation metadata: annotations: remediation.medik8s.io/node-name: worker-0-1 remediation.medik8s.io/template-name: fenceagentsremediationtemplate-test creationTimestamp: "2025-09-11T10:14:02Z" finalizers: - fence-agents-remediation.medik8s.io/far-finalizer generateName: worker-0-1- generation: 1 labels: app.kubernetes.io/part-of: node-healthcheck-controller name: worker-0-1-6lcbd namespace: openshift-workload-availability ownerReferences: - apiVersion: remediation.medik8s.io/v1alpha1 controller: false kind: NodeHealthCheck name: nhc-far-worker uid: 8fd17847-eb17-4e87-aba6-8770ba64a27b resourceVersion: "245529" uid: 128cdad0-679f-424b-b4c6-8da3d65e6ef4 spec: agent: fence_ipmilan nodeparameters: --ipport: master-0-0: "6230" master-0-1: "6231" master-0-2: "6232" worker-0-0: "6233" worker-0-1: "6234" worker-0-2: "6235" worker-0-3: "6236" remediationStrategy: ResourceDeletion retrycount: 5 retryinterval: 10s sharedSecretName: test-far-shared sharedparameters: --action: reboot --ip: 192.168.123.1 --lanplus: "" timeout: 5m0s status: conditions: - lastTransitionTime: "2025-09-11T10:14:11Z" message: The unhealthy node was fully remediated (it was tainted, fenced using the fence agent and all the node resources have been deleted) reason: RemediationFinishedSuccessfully status: "False" type: Processing - lastTransitionTime: "2025-09-11T10:14:08Z" message: FAR taint was added and the fence agent command has been created and executed successfully reason: FenceAgentSucceeded status: "True" type: FenceAgentActionSucceeded - lastTransitionTime: "2025-09-11T10:14:11Z" message: The unhealthy node was fully remediated (it was tainted, fenced using the fence agent and all the node resources have been deleted) reason: RemediationFinishedSuccessfully status: "True" type: Succeeded lastUpdateTime: "2025-09-11T10:14:11Z" - apiVersion: fence-agents-remediation.medik8s.io/v1alpha1 kind: FenceAgentsRemediation metadata: annotations: remediation.medik8s.io/node-name: worker-0-2 remediation.medik8s.io/template-name: fenceagentsremediationtemplate-test creationTimestamp: "2025-09-11T10:14:02Z" finalizers: - fence-agents-remediation.medik8s.io/far-finalizer generateName: worker-0-2- generation: 1 labels: app.kubernetes.io/part-of: node-healthcheck-controller name: worker-0-2-vtfhp namespace: openshift-workload-availability ownerReferences: - apiVersion: remediation.medik8s.io/v1alpha1 controller: false kind: NodeHealthCheck name: nhc-far-worker uid: 8fd17847-eb17-4e87-aba6-8770ba64a27b resourceVersion: "245842" uid: b28ddf12-0382-439d-929c-4732aac8593a spec: agent: fence_ipmilan nodeparameters: --ipport: master-0-0: "6230" master-0-1: "6231" master-0-2: "6232" worker-0-0: "6233" worker-0-1: "6234" worker-0-2: "6235" worker-0-3: "6236" remediationStrategy: ResourceDeletion retrycount: 5 retryinterval: 10s sharedSecretName: test-far-shared sharedparameters: --action: reboot --ip: 192.168.123.1 --lanplus: "" timeout: 5m0s status: conditions: - lastTransitionTime: "2025-09-11T10:14:19Z" message: The unhealthy node was fully remediated (it was tainted, fenced using the fence agent and all the node resources have been deleted) reason: RemediationFinishedSuccessfully status: "False" type: Processing - lastTransitionTime: "2025-09-11T10:14:08Z" message: FAR taint was added and the fence agent command has been created and executed successfully reason: FenceAgentSucceeded status: "True" type: FenceAgentActionSucceeded - lastTransitionTime: "2025-09-11T10:14:19Z" message: The unhealthy node was fully remediated (it was tainted, fenced using the fence agent and all the node resources have been deleted) reason: RemediationFinishedSuccessfully status: "True" type: Succeeded lastUpdateTime: "2025-09-11T10:14:19Z" - apiVersion: fence-agents-remediation.medik8s.io/v1alpha1 kind: FenceAgentsRemediation metadata: annotations: remediation.medik8s.io/node-name: worker-0-3 remediation.medik8s.io/template-name: fenceagentsremediationtemplate-test creationTimestamp: "2025-09-11T10:14:03Z" finalizers: - fence-agents-remediation.medik8s.io/far-finalizer generateName: worker-0-3- generation: 1 labels: app.kubernetes.io/part-of: node-healthcheck-controller name: worker-0-3-vks87 namespace: openshift-workload-availability ownerReferences: - apiVersion: remediation.medik8s.io/v1alpha1 controller: false kind: NodeHealthCheck name: nhc-far-worker uid: 8fd17847-eb17-4e87-aba6-8770ba64a27b resourceVersion: "245692" uid: 878bdfc2-52cf-4803-8b2a-460c61ac6c85 spec: agent: fence_ipmilan nodeparameters: --ipport: master-0-0: "6230" master-0-1: "6231" master-0-2: "6232" worker-0-0: "6233" worker-0-1: "6234" worker-0-2: "6235" worker-0-3: "6236" remediationStrategy: ResourceDeletion retrycount: 5 retryinterval: 10s sharedSecretName: test-far-shared sharedparameters: --action: reboot --ip: 192.168.123.1 --lanplus: "" timeout: 5m0s status: conditions: - lastTransitionTime: "2025-09-11T10:14:15Z" message: The unhealthy node was fully remediated (it was tainted, fenced using the fence agent and all the node resources have been deleted) reason: RemediationFinishedSuccessfully status: "False" type: Processing - lastTransitionTime: "2025-09-11T10:14:08Z" message: FAR taint was added and the fence agent command has been created and executed successfully reason: FenceAgentSucceeded status: "True" type: FenceAgentActionSucceeded - lastTransitionTime: "2025-09-11T10:14:15Z" message: The unhealthy node was fully remediated (it was tainted, fenced using the fence agent and all the node resources have been deleted) reason: RemediationFinishedSuccessfully status: "True" type: Succeeded lastUpdateTime: "2025-09-11T10:14:15Z" kind: List metadata: resourceVersion: "" [kni@cert-rhosp-02 ~]$ oc get bmh openshift-worker-0-3 -n openshift-machine-api -o json | jq .metadata.annotations { "baremetalhost.metal3.io/detached": "far-ipmi-off" } [kni@cert-rhosp-02 ~]$ oc get nodes -l 'node-role.kubernetes.io/worker' NAME STATUS ROLES AGE VERSION worker-0-0 Ready worker 12h v1.31.11 worker-0-1 Ready worker 12h v1.31.11 worker-0-2 Ready worker 12h v1.31.11 worker-0-3 Ready worker 12h v1.31.11 [kni@cert-rhosp-02 ~]$ oc get pods | grep fenc fence-agents-remediation-controller-manager-7cb55f4fcd-hsdhk 2/2 Running 0 4h42m fence-agents-remediation-controller-manager-7cb55f4fcd-q742f 2/2 Running 0 10m [kni@cert-rhosp-02 ~]$ oc get pod fence-agents-remediation-controller-manager-7cb55f4fcd-hsdhk -o yaml apiVersion: v1 kind: Pod metadata: annotations: alm-examples: |- [ { "apiVersion": "fence-agents-remediation.medik8s.io/v1alpha1", "kind": "FenceAgentsRemediation", "metadata": { "name": "worker-1" }, "spec": { "agent": "fence_ipmilan", "nodeSecretNames": { "master-0": "fence-agents-credentials-master0", "master-1": "fence-agents-credentials-master1", "master-2": "fence-agents-credentials-master2", "worker-0": "fence-agents-credentials-worker0", "worker-1": "fence-agents-credentials-worker1", "worker-2": "fence-agents-credentials-worker2" }, "nodeparameters": { "--ipport": { "master-0": "6230", "master-1": "6231", "master-2": "6232", "worker-0": "6233", "worker-1": "6234", "worker-2": "6235" } }, "remediationStrategy": "ResourceDeletion", "retrycount": 5, "retryinterval": "5s", "sharedSecretName": "fence-agents-credentials-shared", "sharedparameters": { "--action": "reboot", "--ip": "192.168.111.1", "--lanplus": "", "--username": "admin" }, "timeout": "60s" } }, { "apiVersion": "fence-agents-remediation.medik8s.io/v1alpha1", "kind": "FenceAgentsRemediationTemplate", "metadata": { "name": "fenceagentsremediationtemplate-default" }, "spec": { "template": { "spec": { "agent": "fence_ipmilan", "nodeSecretNames": { "master-0": "fence-agents-credentials-master0", "master-1": "fence-agents-credentials-master1", "master-2": "fence-agents-credentials-master2", "worker-0": "fence-agents-credentials-worker0", "worker-1": "fence-agents-credentials-worker1", "worker-2": "fence-agents-credentials-worker2" }, "nodeparameters": { "--ipport": { "master-0": "6230", "master-1": "6231", "master-2": "6232", "worker-0": "6233", "worker-1": "6234", "worker-2": "6235" } }, "remediationStrategy": "ResourceDeletion", "retrycount": 5, "retryinterval": "5s", "sharedSecretName": "fence-agents-credentials-shared", "sharedparameters": { "--action": "reboot", "--ip": "192.168.111.1", "--lanplus": "", "--username": "admin" }, "timeout": "60s" } } } } ] capabilities: Basic Install categories: OpenShift Optional containerImage: "" createdAt: "2025-09-08 09:19:00" description: Fence Agents Remediation Operator uses well-known agents to fence and remediate unhealthy nodes. The remediation includes rebooting the unhealthy node using a fence agent, and then evicting workloads from the unhealthy node. features.operators.openshift.io/cnf: "false" features.operators.openshift.io/cni: "false" features.operators.openshift.io/csi: "false" features.operators.openshift.io/disconnected: "true" features.operators.openshift.io/fips-compliant: "true" features.operators.openshift.io/proxy-aware: "false" features.operators.openshift.io/tls-profiles: "false" features.operators.openshift.io/token-auth-aws: "false" features.operators.openshift.io/token-auth-azure: "false" features.operators.openshift.io/token-auth-gcp: "false" k8s.ovn.org/pod-networks: '{"default":{"ip_addresses":["10.130.2.49/23"],"mac_address":"0a:58:0a:82:02:31","gateway_ips":["10.130.2.1"],"routes":[{"dest":"10.128.0.0/14","nextHop":"10.130.2.1"},{"dest":"172.30.0.0/16","nextHop":"10.130.2.1"},{"dest":"169.254.0.5/32","nextHop":"10.130.2.1"},{"dest":"100.64.0.0/16","nextHop":"10.130.2.1"}],"ip_address":"10.130.2.49/23","gateway_ip":"10.130.2.1","role":"primary"}}' k8s.v1.cni.cncf.io/network-status: |- [{ "name": "ovn-kubernetes", "interface": "eth0", "ips": [ "10.130.2.49" ], "mac": "0a:58:0a:82:02:31", "default": true, "dns": {} }] kubectl.kubernetes.io/default-container: manager olm.operatorGroup: openshift-workload-availability-hjmlc olm.operatorNamespace: openshift-workload-availability olm.skipRange: '>=0.5.0 <0.6.0' olm.targetNamespaces: "" olmcahash: 03b815d79160c81fd9e605ce4f21a3ef6b508554084e9079477b423e2808dc1b openshift.io/scc: restricted-v2 operatorframework.io/properties: '{"properties":[{"type":"olm.gvk","value":{"group":"fence-agents-remediation.medik8s.io","kind":"FenceAgentsRemediation","version":"v1alpha1"}},{"type":"olm.gvk","value":{"group":"fence-agents-remediation.medik8s.io","kind":"FenceAgentsRemediationTemplate","version":"v1alpha1"}},{"type":"olm.package","value":{"packageName":"fence-agents-remediation","version":"0.6.0"}}]}' operatorframework.io/suggested-namespace: openshift-workload-availability operatorframework.io/suggested-namespace-template: '{"kind":"Namespace","apiVersion":"v1","metadata":{"name":"openshift-workload-availability","annotations":{"openshift.io/node-selector":""}}}' operators.openshift.io/valid-subscription: '["OpenShift Kubernetes Engine", "OpenShift Container Platform", "OpenShift Platform Plus"]' operators.operatorframework.io/builder: operator-sdk-v1.32.0 operators.operatorframework.io/project_layout: go.kubebuilder.io/v3 repository: https://github.com/medik8s/fence-agents-remediation seccomp.security.alpha.kubernetes.io/pod: runtime/default support: Red Hat creationTimestamp: "2025-09-11T05:34:06Z" generateName: fence-agents-remediation-controller-manager-7cb55f4fcd- labels: app.kubernetes.io/name: fence-agents-remediation-operator control-plane: controller-manager pod-template-hash: 7cb55f4fcd name: fence-agents-remediation-controller-manager-7cb55f4fcd-hsdhk namespace: openshift-workload-availability ownerReferences: - apiVersion: apps/v1 blockOwnerDeletion: true controller: true kind: ReplicaSet name: fence-agents-remediation-controller-manager-7cb55f4fcd uid: 6ff59928-638d-4cb5-8747-6c14c1bdc9fc resourceVersion: "155450" uid: ccb653bf-2fce-4d79-a68a-3d285dcea561 spec: affinity: podAntiAffinity: preferredDuringSchedulingIgnoredDuringExecution: - podAffinityTerm: labelSelector: matchExpressions: - key: control-plane operator: In values: - controller-manager - key: app.kubernetes.io/name operator: In values: - fence-agents-remediation-operator topologyKey: kubernetes.io/hostname weight: 100 containers: - args: - --secure-listen-address=0.0.0.0:8443 - --http2-disable - --upstream=http://127.0.0.1:8080/ - --logtostderr=true - --v=0 env: - name: OPERATOR_CONDITION_NAME value: fence-agents-remediation.v0.6.0 image: registry.redhat.io/openshift4/ose-kube-rbac-proxy-rhel9@sha256:d37a6d10b0fa07370066a31fdaffe2ea553faf4e4e98be7fcef5ec40d62ffe29 imagePullPolicy: IfNotPresent name: kube-rbac-proxy ports: - containerPort: 8443 name: https protocol: TCP resources: limits: cpu: 500m memory: 128Mi requests: cpu: 5m memory: 64Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL runAsUser: 1000740000 terminationMessagePath: /dev/termination-log terminationMessagePolicy: File volumeMounts: - mountPath: /apiserver.local.config/certificates name: apiservice-cert - mountPath: /tmp/k8s-webhook-server/serving-certs name: webhook-cert - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-kwmql readOnly: true - args: - --health-probe-bind-address=:8081 - --metrics-bind-address=127.0.0.1:8080 - --leader-elect command: - /manager env: - name: DEPLOYMENT_NAMESPACE valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.namespace - name: OPERATOR_CONDITION_NAME value: fence-agents-remediation.v0.6.0 image: registry.redhat.io/workload-availability/fence-agents-remediation-rhel9-operator@sha256:eef1a298718650a22a3da4a07c140ce21b6f287d87af79801610317fa8f63d52 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 3 httpGet: path: /healthz port: 8081 scheme: HTTP initialDelaySeconds: 15 periodSeconds: 20 successThreshold: 1 timeoutSeconds: 1 name: manager readinessProbe: failureThreshold: 3 httpGet: path: /readyz port: 8081 scheme: HTTP initialDelaySeconds: 5 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 1 resources: limits: cpu: 500m memory: 512Mi requests: cpu: 10m memory: 64Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL runAsUser: 1000740000 terminationMessagePath: /dev/termination-log terminationMessagePolicy: File volumeMounts: - mountPath: /apiserver.local.config/certificates name: apiservice-cert - mountPath: /tmp/k8s-webhook-server/serving-certs name: webhook-cert - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-kwmql readOnly: true dnsPolicy: ClusterFirst enableServiceLinks: true imagePullSecrets: - name: fence-agents-remediation-controller-manager-dockercfg-8nzws nodeName: worker-0-0 preemptionPolicy: PreemptLowerPriority priority: 2000000000 priorityClassName: system-cluster-critical restartPolicy: Always schedulerName: default-scheduler securityContext: fsGroup: 1000740000 runAsNonRoot: true seLinuxOptions: level: s0:c27,c19 seccompProfile: type: RuntimeDefault serviceAccount: fence-agents-remediation-controller-manager serviceAccountName: fence-agents-remediation-controller-manager terminationGracePeriodSeconds: 10 tolerations: - effect: NoExecute key: node.kubernetes.io/not-ready operator: Exists tolerationSeconds: 300 - effect: NoExecute key: node.kubernetes.io/unreachable operator: Exists tolerationSeconds: 300 - effect: NoSchedule key: node.kubernetes.io/memory-pressure operator: Exists volumes: - name: apiservice-cert secret: defaultMode: 420 items: - key: tls.crt path: apiserver.crt - key: tls.key path: apiserver.key secretName: fence-agents-remediation-controller-manager-service-cert - name: webhook-cert secret: defaultMode: 420 items: - key: tls.crt path: tls.crt - key: tls.key path: tls.key secretName: fence-agents-remediation-controller-manager-service-cert - name: kube-api-access-kwmql projected: defaultMode: 420 sources: - serviceAccountToken: expirationSeconds: 3607 path: token - configMap: items: - key: ca.crt path: ca.crt name: kube-root-ca.crt - downwardAPI: items: - fieldRef: apiVersion: v1 fieldPath: metadata.namespace path: namespace - configMap: items: - key: service-ca.crt path: service-ca.crt name: openshift-service-ca.crt status: conditions: - lastProbeTime: null lastTransitionTime: "2025-09-11T05:36:15Z" status: "True" type: PodReadyToStartContainers - lastProbeTime: null lastTransitionTime: "2025-09-11T05:34:06Z" status: "True" type: Initialized - lastProbeTime: null lastTransitionTime: "2025-09-11T05:36:26Z" status: "True" type: Ready - lastProbeTime: null lastTransitionTime: "2025-09-11T05:36:26Z" status: "True" type: ContainersReady - lastProbeTime: null lastTransitionTime: "2025-09-11T05:34:06Z" status: "True" type: PodScheduled containerStatuses: - containerID: cri-o://e494f026c695aba6d8eddce83a0ef609d70cac9363ddcd7a109d9f5ec693c92d image: registry.redhat.io/openshift4/ose-kube-rbac-proxy-rhel9@sha256:d37a6d10b0fa07370066a31fdaffe2ea553faf4e4e98be7fcef5ec40d62ffe29 imageID: registry.redhat.io/openshift4/ose-kube-rbac-proxy-rhel9@sha256:61bcf4eb286aed402e5db1c10efbce58816db1f67ef7c0464d201a9cb001a24e lastState: {} name: kube-rbac-proxy ready: true restartCount: 0 started: true state: running: startedAt: "2025-09-11T05:34:12Z" volumeMounts: - mountPath: /apiserver.local.config/certificates name: apiservice-cert - mountPath: /tmp/k8s-webhook-server/serving-certs name: webhook-cert - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-kwmql readOnly: true recursiveReadOnly: Disabled - containerID: cri-o://cbd81827213e266ba0186a350c12f8d8ac7a5d8c44ed556c7a31c22d2fd68bb9 image: registry.redhat.io/workload-availability/fence-agents-remediation-rhel9-operator@sha256:eef1a298718650a22a3da4a07c140ce21b6f287d87af79801610317fa8f63d52 imageID: registry.redhat.io/workload-availability/fence-agents-remediation-rhel9-operator@sha256:eef1a298718650a22a3da4a07c140ce21b6f287d87af79801610317fa8f63d52 lastState: {} name: manager ready: true restartCount: 0 started: true state: running: startedAt: "2025-09-11T05:36:14Z" volumeMounts: - mountPath: /apiserver.local.config/certificates name: apiservice-cert - mountPath: /tmp/k8s-webhook-server/serving-certs name: webhook-cert - mountPath: /var/run/secrets/kubernetes.io/serviceaccount name: kube-api-access-kwmql readOnly: true recursiveReadOnly: Disabled hostIP: 192.168.123.134 hostIPs: - ip: 192.168.123.134 phase: Running podIP: 10.130.2.49 podIPs: - ip: 10.130.2.49 qosClass: Burstable startTime: "2025-09-11T05:34:06Z" [kni@cert-rhosp-02 ~]$ oc get csv fence-agents-remediation.v0.6.0 -o yaml apiVersion: operators.coreos.com/v1alpha1 kind: ClusterServiceVersion metadata: annotations: alm-examples: |- [ { "apiVersion": "fence-agents-remediation.medik8s.io/v1alpha1", "kind": "FenceAgentsRemediation", "metadata": { "name": "worker-1" }, "spec": { "agent": "fence_ipmilan", "nodeSecretNames": { "master-0": "fence-agents-credentials-master0", "master-1": "fence-agents-credentials-master1", "master-2": "fence-agents-credentials-master2", "worker-0": "fence-agents-credentials-worker0", "worker-1": "fence-agents-credentials-worker1", "worker-2": "fence-agents-credentials-worker2" }, "nodeparameters": { "--ipport": { "master-0": "6230", "master-1": "6231", "master-2": "6232", "worker-0": "6233", "worker-1": "6234", "worker-2": "6235" } }, "remediationStrategy": "ResourceDeletion", "retrycount": 5, "retryinterval": "5s", "sharedSecretName": "fence-agents-credentials-shared", "sharedparameters": { "--action": "reboot", "--ip": "192.168.111.1", "--lanplus": "", "--username": "admin" }, "timeout": "60s" } }, { "apiVersion": "fence-agents-remediation.medik8s.io/v1alpha1", "kind": "FenceAgentsRemediationTemplate", "metadata": { "name": "fenceagentsremediationtemplate-default" }, "spec": { "template": { "spec": { "agent": "fence_ipmilan", "nodeSecretNames": { "master-0": "fence-agents-credentials-master0", "master-1": "fence-agents-credentials-master1", "master-2": "fence-agents-credentials-master2", "worker-0": "fence-agents-credentials-worker0", "worker-1": "fence-agents-credentials-worker1", "worker-2": "fence-agents-credentials-worker2" }, "nodeparameters": { "--ipport": { "master-0": "6230", "master-1": "6231", "master-2": "6232", "worker-0": "6233", "worker-1": "6234", "worker-2": "6235" } }, "remediationStrategy": "ResourceDeletion", "retrycount": 5, "retryinterval": "5s", "sharedSecretName": "fence-agents-credentials-shared", "sharedparameters": { "--action": "reboot", "--ip": "192.168.111.1", "--lanplus": "", "--username": "admin" }, "timeout": "60s" } } } } ] capabilities: Basic Install categories: OpenShift Optional containerImage: "" createdAt: "2025-09-08 09:19:00" description: Fence Agents Remediation Operator uses well-known agents to fence and remediate unhealthy nodes. The remediation includes rebooting the unhealthy node using a fence agent, and then evicting workloads from the unhealthy node. features.operators.openshift.io/cnf: "false" features.operators.openshift.io/cni: "false" features.operators.openshift.io/csi: "false" features.operators.openshift.io/disconnected: "true" features.operators.openshift.io/fips-compliant: "true" features.operators.openshift.io/proxy-aware: "false" features.operators.openshift.io/tls-profiles: "false" features.operators.openshift.io/token-auth-aws: "false" features.operators.openshift.io/token-auth-azure: "false" features.operators.openshift.io/token-auth-gcp: "false" olm.operatorGroup: openshift-workload-availability-hjmlc olm.operatorNamespace: openshift-workload-availability olm.skipRange: '>=0.5.0 <0.6.0' olm.targetNamespaces: "" operatorframework.io/properties: '{"properties":[{"type":"olm.gvk","value":{"group":"fence-agents-remediation.medik8s.io","kind":"FenceAgentsRemediation","version":"v1alpha1"}},{"type":"olm.gvk","value":{"group":"fence-agents-remediation.medik8s.io","kind":"FenceAgentsRemediationTemplate","version":"v1alpha1"}},{"type":"olm.package","value":{"packageName":"fence-agents-remediation","version":"0.6.0"}}]}' operatorframework.io/suggested-namespace: openshift-workload-availability operatorframework.io/suggested-namespace-template: '{"kind":"Namespace","apiVersion":"v1","metadata":{"name":"openshift-workload-availability","annotations":{"openshift.io/node-selector":""}}}' operators.openshift.io/valid-subscription: '["OpenShift Kubernetes Engine", "OpenShift Container Platform", "OpenShift Platform Plus"]' operators.operatorframework.io/builder: operator-sdk-v1.32.0 operators.operatorframework.io/project_layout: go.kubebuilder.io/v3 repository: https://github.com/medik8s/fence-agents-remediation support: Red Hat creationTimestamp: "2025-09-11T05:34:02Z" finalizers: - operators.coreos.com/csv-cleanup generation: 1 labels: olm.managed: "true" operators.coreos.com/fence-agents-remediation.openshift-workload-availability: "" name: fence-agents-remediation.v0.6.0 namespace: openshift-workload-availability resourceVersion: "242958" uid: fb956e44-ff7c-4263-94e4-08fa9cabb634 spec: apiservicedefinitions: {} cleanup: enabled: false customresourcedefinitions: owned: - description: FenceAgentsRemediation is the Schema for the fenceagentsremediations API displayName: Fence Agents Remediation kind: FenceAgentsRemediation name: fenceagentsremediations.fence-agents-remediation.medik8s.io resources: - kind: FenceAgentsRemediation name: fenceagentsremediations version: v1alpha1 specDescriptors: - description: Agent is the name of fence agent that will be used. It should have a fence_ prefix. displayName: Agent path: agent - description: NodeSecretNames maps the node name to the Secret name which contains params relevant for that node. displayName: Node Secret Names path: nodeSecretNames - description: NodeParameters are passed to the fencing agent according to the node that is fenced, since they are node specific displayName: Node Parameters path: nodeparameters - description: RemediationStrategy is the remediation method for unhealthy nodes. Currently, it could be either "OutOfServiceTaint" or "ResourceDeletion". ResourceDeletion will iterate over all pods related to the unhealthy node and delete them. OutOfServiceTaint will add the out-of-service taint which is a new well-known taint "node.kubernetes.io/out-of-service" that enables automatic deletion of pv-attached pods on failed nodes, "out-of-service" taint is only supported on clusters with k8s version 1.26+ or OCP/OKD version 4.13+. displayName: Remediation Strategy path: remediationStrategy - description: RetryCount is the number of times the fencing agent will be executed displayName: Retry Count path: retrycount - description: RetryInterval is the interval between each fencing agent execution displayName: Retry Interval path: retryinterval - description: SharedSecretName is the name of the Secret which will contain params needed for FAR in order to remediate any node. Using this Secret is optional. displayName: Shared Secret Name path: sharedSecretName - description: SharedParameters are parameters common to all nodes displayName: Shared Parameters path: sharedparameters - description: Timeout is the timeout for each fencing agent execution displayName: Timeout path: timeout statusDescriptors: - description: 'Represents the observations of a FenceAgentsRemediation''s current state. Known .status.conditions.type are: "Processing", "FenceAgentActionSucceeded", and "Succeeded".' displayName: conditions path: conditions x-descriptors: - urn:alm:descriptor:io.kubernetes.conditions - description: LastUpdateTime is the last time the status was updated. displayName: Last Update Time path: lastUpdateTime version: v1alpha1 - description: FenceAgentsRemediationTemplate is the Schema for the fenceagentsremediationtemplates API displayName: Fence Agents Remediation Template kind: FenceAgentsRemediationTemplate name: fenceagentsremediationtemplates.fence-agents-remediation.medik8s.io resources: - kind: FenceAgentsRemediationTemplate name: fenceagentsremediationtemplates version: v1alpha1 specDescriptors: - description: Template defines the desired state of FenceAgentsRemediationTemplate displayName: Template path: template - description: Agent is the name of fence agent that will be used. It should have a fence_ prefix. displayName: Agent path: template.spec.agent - description: NodeSecretNames maps the node name to the Secret name which contains params relevant for that node. displayName: Node Secret Names path: template.spec.nodeSecretNames - description: NodeParameters are passed to the fencing agent according to the node that is fenced, since they are node specific displayName: Node Parameters path: template.spec.nodeparameters - description: RemediationStrategy is the remediation method for unhealthy nodes. Currently, it could be either "OutOfServiceTaint" or "ResourceDeletion". ResourceDeletion will iterate over all pods related to the unhealthy node and delete them. OutOfServiceTaint will add the out-of-service taint which is a new well-known taint "node.kubernetes.io/out-of-service" that enables automatic deletion of pv-attached pods on failed nodes, "out-of-service" taint is only supported on clusters with k8s version 1.26+ or OCP/OKD version 4.13+. displayName: Remediation Strategy path: template.spec.remediationStrategy - description: RetryCount is the number of times the fencing agent will be executed displayName: Retry Count path: template.spec.retrycount - description: RetryInterval is the interval between each fencing agent execution displayName: Retry Interval path: template.spec.retryinterval - description: SharedSecretName is the name of the Secret which will contain params needed for FAR in order to remediate any node. Using this Secret is optional. displayName: Shared Secret Name path: template.spec.sharedSecretName - description: SharedParameters are parameters common to all nodes displayName: Shared Parameters path: template.spec.sharedparameters - description: Timeout is the timeout for each fencing agent execution displayName: Timeout path: template.spec.timeout version: v1alpha1 description: | ### Introduction Fence Agents Remediation (FAR) is a Kubernetes operator that uses well-known agents to fence and remediate unhealthy nodes. The remediation includes rebooting the unhealthy node using a fence agent and then evicting workloads from the unhealthy node. ### Compatibility FAR is one of the remediator operators by [Medik8s](https://www.medik8s.io/remediation/remediation/), such as [Self Node Remediation](https://github.com/medik8s/self-node-remediation) and [Machine Deletion Remediation](https://github.com/medik8s/machine-deletion-remediation), that were designed to run with the Node HealthCheck Operator [(NHC)](https://github.com/medik8s/node-healthcheck-operator) which detects an unhealthy node and creates remediation CR. It is recommended to use FAR with NHC for an easier and smoother experience by fully automating the remediation process, but it can be used as a standalone remediator for the more experienced user. ### Advantages - Robustness - FAR has direct feedback from the traditional Application Programming Interface (API) call (e.g., IPMI) about the result of the fence action without using the Kubernetes API. - Speed - FAR is rapid since it can reboot a node and receive an acknowledgment from the API call while other remediators might need to wait a safe time till they can expect the node to be rebooted. - Diversity - FAR includes several fence agents from a large known set of upstream fencing agents for bare metal servers, virtual machines, cloud platforms, etc. - Adjustability - FAR allows to set up different parameters for running the API call that remediates the node. displayName: Fence Agents Remediation Operator icon: - base64data: iVBORw0KGgoAAAANSUhEUgAACicAA ... ... ... AAAAAAAC8Baf84nL9t+JjAAAAAElFTkSuQmCC mediatype: image/png install: spec: clusterPermissions: - rules: - apiGroups: - "" resources: - namespaces verbs: - get - list - watch - apiGroups: - "" resources: - nodes verbs: - delete - get - list - update - watch - apiGroups: - "" resources: - pods verbs: - delete - deletecollection - get - list - update - watch - apiGroups: - "" resources: - pods/exec verbs: - create - apiGroups: - fence-agents-remediation.medik8s.io resources: - fenceagentsremediations verbs: - create - delete - get - list - patch - update - watch - apiGroups: - fence-agents-remediation.medik8s.io resources: - fenceagentsremediations/finalizers verbs: - update - apiGroups: - fence-agents-remediation.medik8s.io resources: - fenceagentsremediations/status verbs: - get - patch - update - apiGroups: - storage.k8s.io resources: - volumeattachments verbs: - delete - get - list - watch - apiGroups: - authentication.k8s.io resources: - tokenreviews verbs: - create - apiGroups: - authorization.k8s.io resources: - subjectaccessreviews verbs: - create serviceAccountName: fence-agents-remediation-controller-manager deployments: - label: app.kubernetes.io/name: fence-agents-remediation-operator control-plane: controller-manager name: fence-agents-remediation-controller-manager spec: replicas: 2 selector: matchLabels: app.kubernetes.io/name: fence-agents-remediation-operator control-plane: controller-manager strategy: {} template: metadata: annotations: kubectl.kubernetes.io/default-container: manager creationTimestamp: null labels: app.kubernetes.io/name: fence-agents-remediation-operator control-plane: controller-manager spec: affinity: podAntiAffinity: preferredDuringSchedulingIgnoredDuringExecution: - podAffinityTerm: labelSelector: matchExpressions: - key: control-plane operator: In values: - controller-manager - key: app.kubernetes.io/name operator: In values: - fence-agents-remediation-operator topologyKey: kubernetes.io/hostname weight: 100 containers: - args: - --secure-listen-address=0.0.0.0:8443 - --http2-disable - --upstream=http://127.0.0.1:8080/ - --logtostderr=true - --v=0 image: registry.redhat.io/openshift4/ose-kube-rbac-proxy-rhel9@sha256:d37a6d10b0fa07370066a31fdaffe2ea553faf4e4e98be7fcef5ec40d62ffe29 name: kube-rbac-proxy ports: - containerPort: 8443 name: https protocol: TCP resources: limits: cpu: 500m memory: 128Mi requests: cpu: 5m memory: 64Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL - args: - --health-probe-bind-address=:8081 - --metrics-bind-address=127.0.0.1:8080 - --leader-elect command: - /manager env: - name: DEPLOYMENT_NAMESPACE valueFrom: fieldRef: fieldPath: metadata.namespace image: registry.redhat.io/workload-availability/fence-agents-remediation-rhel9-operator@sha256:eef1a298718650a22a3da4a07c140ce21b6f287d87af79801610317fa8f63d52 livenessProbe: httpGet: path: /healthz port: 8081 initialDelaySeconds: 15 periodSeconds: 20 name: manager readinessProbe: httpGet: path: /readyz port: 8081 initialDelaySeconds: 5 periodSeconds: 10 resources: limits: cpu: 500m memory: 512Mi requests: cpu: 10m memory: 64Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL priorityClassName: system-cluster-critical securityContext: runAsNonRoot: true seccompProfile: type: RuntimeDefault serviceAccountName: fence-agents-remediation-controller-manager terminationGracePeriodSeconds: 10 permissions: - rules: - apiGroups: - "" resources: - configmaps verbs: - get - list - watch - create - update - patch - delete - apiGroups: - coordination.k8s.io resources: - leases verbs: - get - list - watch - create - update - patch - delete - apiGroups: - "" resources: - events verbs: - create - patch - apiGroups: - "" resources: - secrets verbs: - get - list - watch serviceAccountName: fence-agents-remediation-controller-manager strategy: deployment installModes: - supported: false type: OwnNamespace - supported: false type: SingleNamespace - supported: false type: MultiNamespace - supported: true type: AllNamespaces keywords: - medik8s - fencing - auto-healing - recovery - high-availability - fence-agents - fence-agents-remediation - remediation - far - baremetal links: - name: Fence Agents Remediation url: https://access.redhat.com/documentation/en-us/workload_availability_for_red_hat_openshift/25.8/html/remediation_fencing_and_maintenance/about-remediation-fencing-maintenance - name: Source Code url: https://www.github.com/medik8s/fence-agents-remediation maintainers: - email: team-dragonfly@redhat.com name: Dragonfly Team maturity: alpha minKubeVersion: 1.24.0 provider: name: Red Hat url: https://www.redhat.com relatedImages: - image: registry.redhat.io/workload-availability/fence-agents-remediation-rhel9-operator@sha256:eef1a298718650a22a3da4a07c140ce21b6f287d87af79801610317fa8f63d52 name: manager - image: registry.redhat.io/openshift4/ose-kube-rbac-proxy-rhel9@sha256:d37a6d10b0fa07370066a31fdaffe2ea553faf4e4e98be7fcef5ec40d62ffe29 name: kube-rbac-proxy replaces: fence-agents-remediation.v0.5.1 version: 0.6.0 webhookdefinitions: - admissionReviewVersions: - v1 containerPort: 443 deploymentName: fence-agents-remediation-controller-manager failurePolicy: Fail generateName: mfenceagentsremediationtemplate.kb.io rules: - apiGroups: - fence-agents-remediation.medik8s.io apiVersions: - v1alpha1 operations: - CREATE - UPDATE resources: - fenceagentsremediationtemplates sideEffects: None targetPort: 9443 type: MutatingAdmissionWebhook webhookPath: /mutate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediationtemplate - admissionReviewVersions: - v1 containerPort: 443 deploymentName: fence-agents-remediation-controller-manager failurePolicy: Fail generateName: vfenceagentsremediation.kb.io rules: - apiGroups: - fence-agents-remediation.medik8s.io apiVersions: - v1alpha1 operations: - CREATE - UPDATE resources: - fenceagentsremediations sideEffects: None targetPort: 9443 type: ValidatingAdmissionWebhook webhookPath: /validate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediation - admissionReviewVersions: - v1 containerPort: 443 deploymentName: fence-agents-remediation-controller-manager failurePolicy: Fail generateName: vfenceagentsremediationtemplate.kb.io rules: - apiGroups: - fence-agents-remediation.medik8s.io apiVersions: - v1alpha1 operations: - CREATE - UPDATE resources: - fenceagentsremediationtemplates sideEffects: None targetPort: 9443 type: ValidatingAdmissionWebhook webhookPath: /validate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediationtemplate status: certsLastUpdated: "2025-09-11T05:34:06Z" certsRotateAt: "2027-09-10T05:34:05Z" cleanup: {} conditions: - lastTransitionTime: "2025-09-11T05:34:05Z" lastUpdateTime: "2025-09-11T05:34:05Z" message: waiting for install components to report healthy phase: Installing reason: InstallSucceeded - lastTransitionTime: "2025-09-11T05:34:05Z" lastUpdateTime: "2025-09-11T05:34:06Z" message: 'installing: waiting for deployment fence-agents-remediation-controller-manager to become ready: deployment "fence-agents-remediation-controller-manager" not available: Deployment does not have minimum availability.' phase: Installing reason: InstallWaiting - lastTransitionTime: "2025-09-11T05:36:26Z" lastUpdateTime: "2025-09-11T05:36:26Z" message: install strategy completed with no errors phase: Succeeded reason: InstallSucceeded - lastTransitionTime: "2025-09-11T07:25:30Z" lastUpdateTime: "2025-09-11T07:25:30Z" message: 'installing: waiting for deployment fence-agents-remediation-controller-manager to become ready: deployment "fence-agents-remediation-controller-manager" not available: Deployment does not have minimum availability.' phase: Failed reason: ComponentUnhealthy - lastTransitionTime: "2025-09-11T07:25:30Z" lastUpdateTime: "2025-09-11T07:25:30Z" message: 'installing: waiting for deployment fence-agents-remediation-controller-manager to become ready: deployment "fence-agents-remediation-controller-manager" not available: Deployment does not have minimum availability.' phase: Pending reason: NeedsReinstall - lastTransitionTime: "2025-09-11T07:25:30Z" lastUpdateTime: "2025-09-11T07:25:30Z" message: all requirements found, attempting install phase: InstallReady reason: AllRequirementsMet - lastTransitionTime: "2025-09-11T07:25:30Z" lastUpdateTime: "2025-09-11T07:25:30Z" message: waiting for install components to report healthy phase: Installing reason: InstallSucceeded - lastTransitionTime: "2025-09-11T07:25:30Z" lastUpdateTime: "2025-09-11T07:25:31Z" message: 'installing: waiting for deployment fence-agents-remediation-controller-manager to become ready: deployment "fence-agents-remediation-controller-manager" not available: Deployment does not have minimum availability.' phase: Installing reason: InstallWaiting - lastTransitionTime: "2025-09-11T07:25:51Z" lastUpdateTime: "2025-09-11T07:25:51Z" message: install strategy completed with no errors phase: Succeeded reason: InstallSucceeded - lastTransitionTime: "2025-09-11T10:00:38Z" lastUpdateTime: "2025-09-11T10:00:38Z" message: 'installing: waiting for deployment fence-agents-remediation-controller-manager to become ready: deployment "fence-agents-remediation-controller-manager" not available: Deployment does not have minimum availability.' phase: Failed reason: ComponentUnhealthy - lastTransitionTime: "2025-09-11T10:00:38Z" lastUpdateTime: "2025-09-11T10:00:38Z" message: 'installing: waiting for deployment fence-agents-remediation-controller-manager to become ready: deployment "fence-agents-remediation-controller-manager" not available: Deployment does not have minimum availability.' phase: Pending reason: NeedsReinstall - lastTransitionTime: "2025-09-11T10:00:38Z" lastUpdateTime: "2025-09-11T10:00:38Z" message: all requirements found, attempting install phase: InstallReady reason: AllRequirementsMet - lastTransitionTime: "2025-09-11T10:00:38Z" lastUpdateTime: "2025-09-11T10:00:38Z" message: waiting for install components to report healthy phase: Installing reason: InstallSucceeded - lastTransitionTime: "2025-09-11T10:00:38Z" lastUpdateTime: "2025-09-11T10:00:39Z" message: 'installing: waiting for deployment fence-agents-remediation-controller-manager to become ready: deployment "fence-agents-remediation-controller-manager" not available: Deployment does not have minimum availability.' phase: Installing reason: InstallWaiting - lastTransitionTime: "2025-09-11T10:05:37Z" lastUpdateTime: "2025-09-11T10:05:37Z" message: install timeout phase: Failed reason: InstallCheckFailed - lastTransitionTime: "2025-09-11T10:05:38Z" lastUpdateTime: "2025-09-11T10:05:38Z" message: 'installing: waiting for deployment fence-agents-remediation-controller-manager to become ready: deployment "fence-agents-remediation-controller-manager" not available: Deployment does not have minimum availability.' phase: Pending reason: NeedsReinstall - lastTransitionTime: "2025-09-11T10:05:38Z" lastUpdateTime: "2025-09-11T10:05:38Z" message: all requirements found, attempting install phase: InstallReady reason: AllRequirementsMet - lastTransitionTime: "2025-09-11T10:05:39Z" lastUpdateTime: "2025-09-11T10:05:39Z" message: waiting for install components to report healthy phase: Installing reason: InstallSucceeded - lastTransitionTime: "2025-09-11T10:05:39Z" lastUpdateTime: "2025-09-11T10:05:39Z" message: 'installing: waiting for deployment fence-agents-remediation-controller-manager to become ready: deployment "fence-agents-remediation-controller-manager" not available: Deployment does not have minimum availability.' phase: Installing reason: InstallWaiting - lastTransitionTime: "2025-09-11T10:06:14Z" lastUpdateTime: "2025-09-11T10:06:14Z" message: install strategy completed with no errors phase: Succeeded reason: InstallSucceeded lastTransitionTime: "2025-09-11T10:06:14Z" lastUpdateTime: "2025-09-11T10:06:14Z" message: install strategy completed with no errors phase: Succeeded reason: InstallSucceeded requirementStatus: - group: operators.coreos.com kind: ClusterServiceVersion message: CSV minKubeVersion (1.24.0) less than server version (v1.31.11) name: fence-agents-remediation.v0.6.0 status: Present version: v1alpha1 - group: apiextensions.k8s.io kind: CustomResourceDefinition message: CRD is present and Established condition is true name: fenceagentsremediations.fence-agents-remediation.medik8s.io status: Present uuid: db47f721-5346-4d9b-987d-a83e323d2626 version: v1 - group: apiextensions.k8s.io kind: CustomResourceDefinition message: CRD is present and Established condition is true name: fenceagentsremediationtemplates.fence-agents-remediation.medik8s.io status: Present uuid: 7fc63780-1ab3-46e8-9a3a-e12049295fad version: v1 - dependents: - group: rbac.authorization.k8s.io kind: PolicyRule message: cluster rule:{"verbs":["get","list","watch"],"apiGroups":[""],"resources":["namespaces"]} status: Satisfied version: v1 - group: rbac.authorization.k8s.io kind: PolicyRule message: cluster rule:{"verbs":["delete","get","list","update","watch"],"apiGroups":[""],"resources":["nodes"]} status: Satisfied version: v1 - group: rbac.authorization.k8s.io kind: PolicyRule message: cluster rule:{"verbs":["delete","deletecollection","get","list","update","watch"],"apiGroups":[""],"resources":["pods"]} status: Satisfied version: v1 - group: rbac.authorization.k8s.io kind: PolicyRule message: cluster rule:{"verbs":["create"],"apiGroups":[""],"resources":["pods/exec"]} status: Satisfied version: v1 - group: rbac.authorization.k8s.io kind: PolicyRule message: cluster rule:{"verbs":["create","delete","get","list","patch","update","watch"],"apiGroups":["fence-agents-remediation.medik8s.io"],"resources":["fenceagentsremediations"]} status: Satisfied version: v1 - group: rbac.authorization.k8s.io kind: PolicyRule message: cluster rule:{"verbs":["update"],"apiGroups":["fence-agents-remediation.medik8s.io"],"resources":["fenceagentsremediations/finalizers"]} status: Satisfied version: v1 - group: rbac.authorization.k8s.io kind: PolicyRule message: cluster rule:{"verbs":["get","patch","update"],"apiGroups":["fence-agents-remediation.medik8s.io"],"resources":["fenceagentsremediations/status"]} status: Satisfied version: v1 - group: rbac.authorization.k8s.io kind: PolicyRule message: cluster rule:{"verbs":["delete","get","list","watch"],"apiGroups":["storage.k8s.io"],"resources":["volumeattachments"]} status: Satisfied version: v1 - group: rbac.authorization.k8s.io kind: PolicyRule message: cluster rule:{"verbs":["create"],"apiGroups":["authentication.k8s.io"],"resources":["tokenreviews"]} status: Satisfied version: v1 - group: rbac.authorization.k8s.io kind: PolicyRule message: cluster rule:{"verbs":["create"],"apiGroups":["authorization.k8s.io"],"resources":["subjectaccessreviews"]} status: Satisfied version: v1 group: "" kind: ServiceAccount message: "" name: fence-agents-remediation-controller-manager status: Present version: v1 [kni@cert-rhosp-02 ~]$ oc logs pod/fence-agents-remediation-controller-manager-7cb55f4fcd-hsdhk 2025-09-11T05:36:14.527263389Z INFO setup Go Version: go1.24.4 (Red Hat 1.24.4-2.el9) X:strictfipsruntime 2025-09-11T05:36:14.527370284Z INFO setup Go OS/Arch: linux/amd64 2025-09-11T05:36:14.527372974Z INFO setup Operator Version: bd73055e 2025-09-11T05:36:14.527374714Z INFO setup Git Commit: bd73055ef2c68bfdc865d2c54179f4448bd454da 2025-09-11T05:36:14.527381571Z INFO setup Build Date: 2025-09-08T09:09:10+00:00 2025-09-11T05:36:14.527397084Z INFO setup HTTP/2 for webhooks disabled 2025-09-11T05:36:14.534381248Z INFO validation out of service taint strategy {"isSupported": true, "k8sMajorVersion": 1, "k8sMinorVersion": 31} 2025-09-11T05:36:14.534411167Z INFO setup out-of-service taint is supported on this cluster 2025-09-11T05:36:14.53444962Z INFO controller-runtime.builder skip registering a mutating webhook, object does not implement admission.Defaulter or WithDefaulter wasn't called {"GVK": "fence-agents-remediation.medik8s.io/v1alpha1, Kind=FenceAgentsRemediation"} 2025-09-11T05:36:14.534489281Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "fence-agents-remediation.medik8s.io/v1alpha1, Kind=FenceAgentsRemediation", "path": "/validate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediation"} 2025-09-11T05:36:14.53456682Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediation"} 2025-09-11T05:36:14.534610486Z INFO controller-runtime.builder Registering a mutating webhook {"GVK": "fence-agents-remediation.medik8s.io/v1alpha1, Kind=FenceAgentsRemediationTemplate", "path": "/mutate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediationtemplate"} 2025-09-11T05:36:14.534640986Z INFO controller-runtime.webhook Registering webhook {"path": "/mutate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediationtemplate"} 2025-09-11T05:36:14.534665326Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "fence-agents-remediation.medik8s.io/v1alpha1, Kind=FenceAgentsRemediationTemplate", "path": "/validate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediationtemplate"} 2025-09-11T05:36:14.534722327Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediationtemplate"} 2025-09-11T05:36:14.534741433Z INFO setup starting manager 2025-09-11T05:36:14.534854882Z INFO controller-runtime.metrics Starting metrics server 2025-09-11T05:36:14.534910429Z INFO starting server {"name": "health probe", "addr": "[::]:8081"} 2025-09-11T05:36:14.534936735Z INFO controller-runtime.metrics Serving metrics server {"bindAddress": ":8080", "secure": false} 2025-09-11T05:36:14.534960837Z INFO controller-runtime.webhook Starting webhook server I0911 05:36:14.535054 1 leaderelection.go:257] attempting to acquire leader lease openshift-workload-availability/cb305759.medik8s.io... 2025-09-11T05:36:14.535180059Z INFO controller-runtime.certwatcher Updated current TLS certificate 2025-09-11T05:36:14.535264355Z INFO controller-runtime.webhook Serving webhook server {"host": "", "port": 9443} 2025-09-11T05:36:14.535311607Z INFO controller-runtime.certwatcher Starting certificate poll+watcher {"interval": "10s"} 2025-09-11T05:52:48.927674701Z INFO fenceagentsremediation-resource validate create {"name": "worker-0-2"} 2025-09-11T05:52:49.036911335Z INFO fenceagentsremediation-resource validate update {"name": "worker-0-2"} 2025-09-11T06:15:36.245634525Z INFO fenceagentsremediation-resource validate create {"name": "worker-0-3"} 2025-09-11T06:16:04.404393393Z INFO fenceagentsremediation-resource validate create {"name": "worker-0-3"} 2025-09-11T06:18:33.650310447Z INFO fenceagentsremediation-resource validate update {"name": "worker-0-3"} 2025-09-11T06:19:00.771986592Z INFO fenceagentsremediation-resource validate update {"name": "worker-0-3"} 2025-09-11T07:08:02.527580959Z INFO fenceagentsremediation-resource validate create {"name": "worker-0-3"} I0911 07:25:50.256758 1 leaderelection.go:271] successfully acquired lease openshift-workload-availability/cb305759.medik8s.io 2025-09-11T07:25:50.256780455Z DEBUG events fence-agents-remediation-controller-manager-7cb55f4fcd-hsdhk_e6b244eb-2ae9-4c2a-97f1-793e8e4af15b became leader {"type": "Normal", "object": {"kind":"Lease","namespace":"openshift-workload-availability","name":"cb305759.medik8s.io","uid":"730d14ab-c3d7-4c09-9fa2-a93678a846f9","apiVersion":"coordination.k8s.io/v1","resourceVersion":"191697"}, "reason": "LeaderElection"} 2025-09-11T07:25:50.256956339Z INFO Starting EventSource {"controller": "fenceagentsremediation", "controllerGroup": "fence-agents-remediation.medik8s.io", "controllerKind": "FenceAgentsRemediation", "source": "kind source: *v1alpha1.FenceAgentsRemediation"} 2025-09-11T07:25:50.256966493Z INFO Starting Controller {"controller": "fenceagentsremediation", "controllerGroup": "fence-agents-remediation.medik8s.io", "controllerKind": "FenceAgentsRemediation"} 2025-09-11T07:25:50.358758916Z INFO Starting workers {"controller": "fenceagentsremediation", "controllerGroup": "fence-agents-remediation.medik8s.io", "controllerKind": "FenceAgentsRemediation", "worker count": 1} 2025-09-11T09:58:15.831522717Z INFO fenceagentsremediationtemplate-resource default {"name": "fenceagentsremediationtemplate-test"} 2025-09-11T10:14:02.979190006Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-11T10:14:02.979235103Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-11T10:14:02.989590486Z INFO controllers.FenceAgentsRemediation Finalizer was added {"CR Name": "worker-0-1-6lcbd"} 2025-09-11T10:14:02.989616996Z INFO controllers.FenceAgentsRemediation Updating Status Condition {"processingConditionStatus": "True", "fenceAgentActionSucceededConditionStatus": "Unknown", "succeededConditionStatus": "Unknown", "reason": "RemediationStarted", "LastUpdateTime": "2025-09-11T10:14:02.989615664Z"} 2025-09-11T10:14:02.989670093Z DEBUG events [remediation] Remediation started {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-1-6lcbd","uid":"128cdad0-679f-424b-b4c6-8da3d65e6ef4","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"245232"}, "reason": "RemediationStarted"} 2025-09-11T10:14:02.990408593Z DEBUG events [remediation] Finalizer was added {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-1-6lcbd","uid":"128cdad0-679f-424b-b4c6-8da3d65e6ef4","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"245232"}, "reason": "AddFinalizer"} 2025-09-11T10:14:03.196049435Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-11T10:14:03.196136884Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-11T10:14:03.196169064Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-11T10:14:03.203191208Z INFO controllers.FenceAgentsRemediation Finalizer was added {"CR Name": "worker-0-2-vtfhp"} 2025-09-11T10:14:03.203210232Z INFO controllers.FenceAgentsRemediation Updating Status Condition {"processingConditionStatus": "True", "fenceAgentActionSucceededConditionStatus": "Unknown", "succeededConditionStatus": "Unknown", "reason": "RemediationStarted", "LastUpdateTime": "2025-09-11T10:14:03.203208855Z"} 2025-09-11T10:14:03.203217917Z DEBUG events [remediation] Remediation started {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-2-vtfhp","uid":"b28ddf12-0382-439d-929c-4732aac8593a","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"245244"}, "reason": "RemediationStarted"} 2025-09-11T10:14:03.203347189Z DEBUG events [remediation] Finalizer was added {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-2-vtfhp","uid":"b28ddf12-0382-439d-929c-4732aac8593a","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"245244"}, "reason": "AddFinalizer"} 2025-09-11T10:14:03.408930124Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-11T10:14:03.409042817Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-11T10:14:03.409072757Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-11T10:14:03.416654825Z INFO controllers.FenceAgentsRemediation Finalizer was added {"CR Name": "worker-0-3-vks87"} 2025-09-11T10:14:03.416675798Z INFO controllers.FenceAgentsRemediation Updating Status Condition {"processingConditionStatus": "True", "fenceAgentActionSucceededConditionStatus": "Unknown", "succeededConditionStatus": "Unknown", "reason": "RemediationStarted", "LastUpdateTime": "2025-09-11T10:14:03.416674697Z"} 2025-09-11T10:14:03.416739865Z DEBUG events [remediation] Remediation started {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-3-vks87","uid":"878bdfc2-52cf-4803-8b2a-460c61ac6c85","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"245248"}, "reason": "RemediationStarted"} 2025-09-11T10:14:03.416765856Z DEBUG events [remediation] Finalizer was added {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-3-vks87","uid":"878bdfc2-52cf-4803-8b2a-460c61ac6c85","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"245248"}, "reason": "AddFinalizer"} 2025-09-11T10:14:03.421424226Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-11T10:14:03.421475552Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-11T10:14:03.421699453Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-11T10:14:03.427827012Z INFO taints Taint was added {"taint effect": "NoExecute", "taint list": [{"key":"node.kubernetes.io/unreachable","effect":"NoSchedule","timeAdded":"2025-09-11T10:00:43Z"},{"key":"node.kubernetes.io/unreachable","effect":"NoExecute","timeAdded":"2025-09-11T10:00:53Z"},{"key":"medik8s.io/fence-agents-remediation","effect":"NoExecute","timeAdded":"2025-09-11T10:14:03Z"}]} 2025-09-11T10:14:03.427858495Z INFO controllers.FenceAgentsRemediation FAR remediation taint was added {"Node Name": "worker-0-1"} 2025-09-11T10:14:03.427877569Z INFO controllers.FenceAgentsRemediation Build fence agent command line {"Fence Agent": "fence_ipmilan", "Node Name": "worker-0-1"} 2025-09-11T10:14:03.427898765Z INFO controllers.FenceAgentsRemediation found a value from secret {"secret name": "test-far-shared", "parameter name": "--password"} 2025-09-11T10:14:03.427901962Z INFO controllers.FenceAgentsRemediation found a value from secret {"secret name": "test-far-shared", "parameter name": "--username"} 2025-09-11T10:14:03.428343728Z INFO controllers.FenceAgentsRemediation Execute the fence agent {"Fence Agent": "fence_ipmilan", "Node Name": "worker-0-1", "FAR uid": "128cdad0-679f-424b-b4c6-8da3d65e6ef4", "Parameters": ["--ip","--lanplus","--ipport","--password","--username","--action"]} 2025-09-11T10:14:03.429964158Z DEBUG events [remediation] Remediation taint was added {"type": "Normal", "object": {"kind":"Node","name":"worker-0-1","uid":"74a6b690-4210-4c25-8153-ecb6141b9e2c","apiVersion":"v1","resourceVersion":"245201"}, "reason": "AddRemediationTaint"} 2025-09-11T10:14:03.429992568Z DEBUG events [remediation] Fence agent was executed {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-1-6lcbd","uid":"128cdad0-679f-424b-b4c6-8da3d65e6ef4","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"245234"}, "reason": "FenceAgentExecuted"} 2025-09-11T10:14:03.429973377Z INFO executer fence agent start {"uid": "128cdad0-679f-424b-b4c6-8da3d65e6ef4", "fence_agent": "fence_ipmilan", "retryCount": 5, "retryInterval": "10s", "timeout": "5m0s"} 2025-09-11T10:14:03.447115353Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-11T10:14:03.447217122Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-11T10:14:03.447421241Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-11T10:14:03.454222053Z INFO taints Taint was added {"taint effect": "NoExecute", "taint list": [{"key":"node.kubernetes.io/unreachable","effect":"NoSchedule","timeAdded":"2025-09-11T10:00:38Z"},{"key":"node.kubernetes.io/unreachable","effect":"NoExecute","timeAdded":"2025-09-11T10:01:03Z"},{"key":"medik8s.io/fence-agents-remediation","effect":"NoExecute","timeAdded":"2025-09-11T10:14:03Z"}]} 2025-09-11T10:14:03.45426077Z INFO controllers.FenceAgentsRemediation FAR remediation taint was added {"Node Name": "worker-0-2"} 2025-09-11T10:14:03.454290157Z INFO controllers.FenceAgentsRemediation Build fence agent command line {"Fence Agent": "fence_ipmilan", "Node Name": "worker-0-2"} 2025-09-11T10:14:03.454312139Z INFO controllers.FenceAgentsRemediation found a value from secret {"secret name": "test-far-shared", "parameter name": "--username"} 2025-09-11T10:14:03.454315689Z INFO controllers.FenceAgentsRemediation found a value from secret {"secret name": "test-far-shared", "parameter name": "--password"} 2025-09-11T10:14:03.454363031Z INFO controllers.FenceAgentsRemediation Execute the fence agent {"Fence Agent": "fence_ipmilan", "Node Name": "worker-0-2", "FAR uid": "b28ddf12-0382-439d-929c-4732aac8593a", "Parameters": ["--lanplus","--action","--ip","--ipport","--username","--password"]} 2025-09-11T10:14:03.454553061Z DEBUG events [remediation] Remediation taint was added {"type": "Normal", "object": {"kind":"Node","name":"worker-0-2","uid":"82e78454-ed47-414e-b096-58a43499dc01","apiVersion":"v1","resourceVersion":"245131"}, "reason": "AddRemediationTaint"} 2025-09-11T10:14:03.454622902Z DEBUG events [remediation] Fence agent was executed {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-2-vtfhp","uid":"b28ddf12-0382-439d-929c-4732aac8593a","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"245246"}, "reason": "FenceAgentExecuted"} 2025-09-11T10:14:03.454848024Z INFO executer fence agent start {"uid": "b28ddf12-0382-439d-929c-4732aac8593a", "fence_agent": "fence_ipmilan", "retryCount": 5, "retryInterval": "10s", "timeout": "5m0s"} 2025-09-11T10:14:03.463997259Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-11T10:14:03.464278067Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-11T10:14:03.46432531Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-11T10:14:03.480601515Z INFO taints Taint was added {"taint effect": "NoExecute", "taint list": [{"key":"node.kubernetes.io/unreachable","effect":"NoSchedule","timeAdded":"2025-09-11T10:00:38Z"},{"key":"node.kubernetes.io/unreachable","effect":"NoExecute","timeAdded":"2025-09-11T10:00:43Z"},{"key":"medik8s.io/fence-agents-remediation","effect":"NoExecute","timeAdded":"2025-09-11T10:14:03Z"}]} 2025-09-11T10:14:03.480749132Z INFO controllers.FenceAgentsRemediation FAR remediation taint was added {"Node Name": "worker-0-3"} 2025-09-11T10:14:03.480808284Z INFO controllers.FenceAgentsRemediation Build fence agent command line {"Fence Agent": "fence_ipmilan", "Node Name": "worker-0-3"} 2025-09-11T10:14:03.480856877Z INFO controllers.FenceAgentsRemediation found a value from secret {"secret name": "test-far-shared", "parameter name": "--username"} 2025-09-11T10:14:03.480879449Z INFO controllers.FenceAgentsRemediation found a value from secret {"secret name": "test-far-shared", "parameter name": "--password"} 2025-09-11T10:14:03.480875581Z DEBUG events [remediation] Remediation taint was added {"type": "Normal", "object": {"kind":"Node","name":"worker-0-3","uid":"44efe16d-4075-4575-9502-b85c43a3dea3","apiVersion":"v1","resourceVersion":"245130"}, "reason": "AddRemediationTaint"} 2025-09-11T10:14:03.480973862Z INFO controllers.FenceAgentsRemediation Execute the fence agent {"Fence Agent": "fence_ipmilan", "Node Name": "worker-0-3", "FAR uid": "878bdfc2-52cf-4803-8b2a-460c61ac6c85", "Parameters": ["--lanplus","--action","--ipport","--username","--password","--ip"]} 2025-09-11T10:14:03.481194721Z INFO executer fence agent start {"uid": "878bdfc2-52cf-4803-8b2a-460c61ac6c85", "fence_agent": "fence_ipmilan", "retryCount": 5, "retryInterval": "10s", "timeout": "5m0s"} 2025-09-11T10:14:03.481333947Z DEBUG events [remediation] Fence agent was executed {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-3-vks87","uid":"878bdfc2-52cf-4803-8b2a-460c61ac6c85","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"245250"}, "reason": "FenceAgentExecuted"} 2025-09-11T10:14:03.490321784Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-11T10:14:08.67527645Z INFO executer command completed {"uid": "128cdad0-679f-424b-b4c6-8da3d65e6ef4", "response": "Success: Rebooted\n", "errMessage": "", "err": null} 2025-09-11T10:14:08.6753115Z INFO executer fence agent done {"uid": "128cdad0-679f-424b-b4c6-8da3d65e6ef4", "fence_agent": "fence_ipmilan", "stdout": "Success: Rebooted\n", "stderr": "", "err": null} 2025-09-11T10:14:08.675319476Z INFO executer updating status {"FAR uid": "128cdad0-679f-424b-b4c6-8da3d65e6ef4"} 2025-09-11T10:14:08.675384049Z INFO executer Updating Status Condition {"processingConditionStatus": "", "fenceAgentActionSucceededConditionStatus": "True", "succeededConditionStatus": "", "reason": "FenceAgentSucceeded", "LastUpdateTime": "2025-09-11T10:14:08.675382542Z"} 2025-09-11T10:14:08.675475998Z DEBUG events [remediation] Fence agent was succeeded {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-1-6lcbd","uid":"128cdad0-679f-424b-b4c6-8da3d65e6ef4","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"245234"}, "reason": "FenceAgentSucceeded"} 2025-09-11T10:14:08.679467915Z INFO executer command completed {"uid": "878bdfc2-52cf-4803-8b2a-460c61ac6c85", "response": "Success: Rebooted\n", "errMessage": "", "err": null} 2025-09-11T10:14:08.679501933Z INFO executer fence agent done {"uid": "878bdfc2-52cf-4803-8b2a-460c61ac6c85", "fence_agent": "fence_ipmilan", "stdout": "Success: Rebooted\n", "stderr": "", "err": null} 2025-09-11T10:14:08.67950637Z INFO executer updating status {"FAR uid": "878bdfc2-52cf-4803-8b2a-460c61ac6c85"} 2025-09-11T10:14:08.679569109Z INFO executer Updating Status Condition {"processingConditionStatus": "", "fenceAgentActionSucceededConditionStatus": "True", "succeededConditionStatus": "", "reason": "FenceAgentSucceeded", "LastUpdateTime": "2025-09-11T10:14:08.679568346Z"} 2025-09-11T10:14:08.679650944Z DEBUG events [remediation] Fence agent was succeeded {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-3-vks87","uid":"878bdfc2-52cf-4803-8b2a-460c61ac6c85","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"245250"}, "reason": "FenceAgentSucceeded"} 2025-09-11T10:14:08.680707359Z INFO executer status updated {"FAR uid": "128cdad0-679f-424b-b4c6-8da3d65e6ef4"} 2025-09-11T10:14:08.681576109Z INFO executer command completed {"uid": "b28ddf12-0382-439d-929c-4732aac8593a", "response": "Success: Rebooted\n", "errMessage": "", "err": null} 2025-09-11T10:14:08.681588736Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-11T10:14:08.681597987Z INFO executer fence agent done {"uid": "b28ddf12-0382-439d-929c-4732aac8593a", "fence_agent": "fence_ipmilan", "stdout": "Success: Rebooted\n", "stderr": "", "err": null} 2025-09-11T10:14:08.681603297Z INFO executer updating status {"FAR uid": "b28ddf12-0382-439d-929c-4732aac8593a"} 2025-09-11T10:14:08.681607569Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-11T10:14:08.681644327Z INFO controllers.FenceAgentsRemediation Remediation strategy is ResourceDeletion which explicitly deletes resources - manually deleting workload {"Node Name": "worker-0-1-6lcbd"} 2025-09-11T10:14:08.681646958Z INFO executer Updating Status Condition {"processingConditionStatus": "", "fenceAgentActionSucceededConditionStatus": "True", "succeededConditionStatus": "", "reason": "FenceAgentSucceeded", "LastUpdateTime": "2025-09-11T10:14:08.681646422Z"} 2025-09-11T10:14:08.681849374Z DEBUG events [remediation] Fence agent was succeeded {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-2-vtfhp","uid":"b28ddf12-0382-439d-929c-4732aac8593a","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"245246"}, "reason": "FenceAgentSucceeded"} 2025-09-11T10:14:08.681858396Z DEBUG events [remediation] Manually delete pods from the unhealthy node {"type": "Normal", "object": {"kind":"Node","name":"worker-0-1","uid":"74a6b690-4210-4c25-8153-ecb6141b9e2c","apiVersion":"v1","resourceVersion":"245252"}, "reason": "DeleteResources"} 2025-09-11T10:14:08.681986074Z INFO commons-resource starting to delete pods {"node name": "worker-0-1"} 2025-09-11T10:14:08.684927939Z INFO executer status updated {"FAR uid": "878bdfc2-52cf-4803-8b2a-460c61ac6c85"} 2025-09-11T10:14:08.687966649Z INFO executer status updated {"FAR uid": "b28ddf12-0382-439d-929c-4732aac8593a"} 2025-09-11T10:14:11.188783186Z INFO commons-resource done deleting pods {"node name": "worker-0-1"} 2025-09-11T10:14:11.188936815Z INFO controllers.FenceAgentsRemediation Updating Status Condition {"processingConditionStatus": "False", "fenceAgentActionSucceededConditionStatus": "", "succeededConditionStatus": "True", "reason": "RemediationFinishedSuccessfully", "LastUpdateTime": "2025-09-11T10:14:11.188934835Z"} 2025-09-11T10:14:11.188972883Z INFO executer cancelling fence agent routine {"uid": "128cdad0-679f-424b-b4c6-8da3d65e6ef4"} 2025-09-11T10:14:11.18899242Z INFO controllers.FenceAgentsRemediation FenceAgentsRemediation CR has completed to remediate the node {"Node Name": "worker-0-1"} 2025-09-11T10:14:11.189125175Z DEBUG events [remediation] Unhealthy node remediation was completed {"type": "Normal", "object": {"kind":"Node","name":"worker-0-1","uid":"74a6b690-4210-4c25-8153-ecb6141b9e2c","apiVersion":"v1","resourceVersion":"245252"}, "reason": "NodeRemediationCompleted"} 2025-09-11T10:14:11.189171237Z DEBUG events [remediation] Remediation finished {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-1-6lcbd","uid":"128cdad0-679f-424b-b4c6-8da3d65e6ef4","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"245339"}, "reason": "RemediationFinished"} 2025-09-11T10:14:11.396226432Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-11T10:14:11.396357301Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-11T10:14:11.396373503Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-11T10:14:11.396433623Z INFO controllers.FenceAgentsRemediation Remediation strategy is ResourceDeletion which explicitly deletes resources - manually deleting workload {"Node Name": "worker-0-3-vks87"} 2025-09-11T10:14:11.396585979Z DEBUG events [remediation] Manually delete pods from the unhealthy node {"type": "Normal", "object": {"kind":"Node","name":"worker-0-3","uid":"44efe16d-4075-4575-9502-b85c43a3dea3","apiVersion":"v1","resourceVersion":"245259"}, "reason": "DeleteResources"} 2025-09-11T10:14:11.396865912Z INFO commons-resource starting to delete pods {"node name": "worker-0-3"} 2025-09-11T10:14:15.18888944Z INFO commons-resource done deleting pods {"node name": "worker-0-3"} 2025-09-11T10:14:15.188922165Z INFO controllers.FenceAgentsRemediation Updating Status Condition {"processingConditionStatus": "False", "fenceAgentActionSucceededConditionStatus": "", "succeededConditionStatus": "True", "reason": "RemediationFinishedSuccessfully", "LastUpdateTime": "2025-09-11T10:14:15.188920983Z"} 2025-09-11T10:14:15.188944511Z INFO executer cancelling fence agent routine {"uid": "878bdfc2-52cf-4803-8b2a-460c61ac6c85"} 2025-09-11T10:14:15.188950887Z INFO controllers.FenceAgentsRemediation FenceAgentsRemediation CR has completed to remediate the node {"Node Name": "worker-0-3"} 2025-09-11T10:14:15.189101418Z DEBUG events [remediation] Unhealthy node remediation was completed {"type": "Normal", "object": {"kind":"Node","name":"worker-0-3","uid":"44efe16d-4075-4575-9502-b85c43a3dea3","apiVersion":"v1","resourceVersion":"245259"}, "reason": "NodeRemediationCompleted"} 2025-09-11T10:14:15.189231713Z DEBUG events [remediation] Remediation finished {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-3-vks87","uid":"878bdfc2-52cf-4803-8b2a-460c61ac6c85","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"245341"}, "reason": "RemediationFinished"} 2025-09-11T10:14:15.395303861Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-11T10:14:15.395408989Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-11T10:14:15.395480052Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-11T10:14:15.39554572Z INFO controllers.FenceAgentsRemediation Remediation strategy is ResourceDeletion which explicitly deletes resources - manually deleting workload {"Node Name": "worker-0-2-vtfhp"} 2025-09-11T10:14:15.395655429Z DEBUG events [remediation] Manually delete pods from the unhealthy node {"type": "Normal", "object": {"kind":"Node","name":"worker-0-2","uid":"82e78454-ed47-414e-b096-58a43499dc01","apiVersion":"v1","resourceVersion":"245255"}, "reason": "DeleteResources"} 2025-09-11T10:14:15.396440238Z INFO commons-resource starting to delete pods {"node name": "worker-0-2"} 2025-09-11T10:14:19.138242392Z INFO commons-resource done deleting pods {"node name": "worker-0-2"} 2025-09-11T10:14:19.138267889Z INFO controllers.FenceAgentsRemediation Updating Status Condition {"processingConditionStatus": "False", "fenceAgentActionSucceededConditionStatus": "", "succeededConditionStatus": "True", "reason": "RemediationFinishedSuccessfully", "LastUpdateTime": "2025-09-11T10:14:19.138266243Z"} 2025-09-11T10:14:19.138282432Z INFO executer cancelling fence agent routine {"uid": "b28ddf12-0382-439d-929c-4732aac8593a"} 2025-09-11T10:14:19.138289754Z INFO controllers.FenceAgentsRemediation FenceAgentsRemediation CR has completed to remediate the node {"Node Name": "worker-0-2"} 2025-09-11T10:14:19.138414933Z DEBUG events [remediation] Unhealthy node remediation was completed {"type": "Normal", "object": {"kind":"Node","name":"worker-0-2","uid":"82e78454-ed47-414e-b096-58a43499dc01","apiVersion":"v1","resourceVersion":"245255"}, "reason": "NodeRemediationCompleted"} 2025-09-11T10:14:19.138446124Z DEBUG events [remediation] Remediation finished {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-2-vtfhp","uid":"b28ddf12-0382-439d-929c-4732aac8593a","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"245343"}, "reason": "RemediationFinished"} 2025-09-11T10:14:19.144399113Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-11T10:14:19.144486558Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-11T10:14:19.144508921Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-11T10:14:19.150116268Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-11T10:14:19.150493396Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-11T10:14:19.150592466Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-11T10:14:19.156663975Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-11T10:14:19.15672149Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-11T10:14:19.156735721Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-11T10:14:19.162403378Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-11T10:15:22.459378413Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-11T10:15:22.459410485Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-11T10:15:22.459438072Z INFO controllers.FenceAgentsRemediation CR's deletion timestamp is not zero, and FAR finalizer exists {"CR Name": "worker-0-2-vtfhp"} 2025-09-11T10:15:22.467284899Z INFO taints Taint was removed {"taint effect": "NoExecute", "taint list": [{"key":"node.kubernetes.io/unreachable","effect":"NoExecute","timeAdded":"2025-09-11T10:01:03Z"}]} 2025-09-11T10:15:22.467332155Z INFO controllers.FenceAgentsRemediation FAR remediation taint was removed {"Node Name": "worker-0-2"} 2025-09-11T10:15:22.470278996Z DEBUG events [remediation] Remediation taint was removed {"type": "Normal", "object": {"kind":"Node","name":"worker-0-2","uid":"82e78454-ed47-414e-b096-58a43499dc01","apiVersion":"v1","resourceVersion":"246659"}, "reason": "RemoveRemediationTaint"} 2025-09-11T10:15:22.480552003Z INFO controllers.FenceAgentsRemediation Finalizer was removed {"CR Name": "worker-0-2-vtfhp"} 2025-09-11T10:15:22.48061205Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-11T10:15:22.480654506Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-11T10:15:22.480755163Z DEBUG events [remediation] Finalizer was removed {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-2-vtfhp","uid":"b28ddf12-0382-439d-929c-4732aac8593a","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"246723"}, "reason": "RemoveFinalizer"} 2025-09-11T10:15:22.48083483Z INFO controllers.FenceAgentsRemediation FenceAgentsRemediation CR was not found {"CR Name": "worker-0-2-vtfhp", "CR Namespace": "openshift-workload-availability"} 2025-09-11T10:15:22.48084253Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-11T10:15:24.224261294Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-11T10:15:24.224291289Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-11T10:15:24.224320281Z INFO controllers.FenceAgentsRemediation CR's deletion timestamp is not zero, and FAR finalizer exists {"CR Name": "worker-0-3-vks87"} 2025-09-11T10:15:24.229889751Z INFO taints Taint was removed {"taint effect": "NoExecute", "taint list": [{"key":"node.kubernetes.io/not-ready","effect":"NoExecute","timeAdded":"2025-09-11T10:15:23Z"}]} 2025-09-11T10:15:24.230012555Z INFO controllers.FenceAgentsRemediation FAR remediation taint was removed {"Node Name": "worker-0-3"} 2025-09-11T10:15:24.230217071Z DEBUG events [remediation] Remediation taint was removed {"type": "Normal", "object": {"kind":"Node","name":"worker-0-3","uid":"44efe16d-4075-4575-9502-b85c43a3dea3","apiVersion":"v1","resourceVersion":"246906"}, "reason": "RemoveRemediationTaint"} 2025-09-11T10:15:24.241969495Z INFO controllers.FenceAgentsRemediation Finalizer was removed {"CR Name": "worker-0-3-vks87"} 2025-09-11T10:15:24.242020731Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-11T10:15:24.242069187Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-11T10:15:24.242089691Z INFO controllers.FenceAgentsRemediation FenceAgentsRemediation CR was not found {"CR Name": "worker-0-3-vks87", "CR Namespace": "openshift-workload-availability"} 2025-09-11T10:15:24.24209338Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-11T10:15:24.242103173Z DEBUG events [remediation] Finalizer was removed {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-3-vks87","uid":"878bdfc2-52cf-4803-8b2a-460c61ac6c85","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"246914"}, "reason": "RemoveFinalizer"} 2025-09-11T10:15:25.035948834Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-11T10:15:25.035986399Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-09-11T10:15:25.036023948Z INFO controllers.FenceAgentsRemediation CR's deletion timestamp is not zero, and FAR finalizer exists {"CR Name": "worker-0-1-6lcbd"} 2025-09-11T10:15:25.04366311Z INFO taints Taint was removed {"taint effect": "NoExecute", "taint list": [{"key":"node.kubernetes.io/not-ready","effect":"NoExecute","timeAdded":"2025-09-11T10:15:23Z"}]} 2025-09-11T10:15:25.043714533Z INFO controllers.FenceAgentsRemediation FAR remediation taint was removed {"Node Name": "worker-0-1"} 2025-09-11T10:15:25.044027369Z DEBUG events [remediation] Remediation taint was removed {"type": "Normal", "object": {"kind":"Node","name":"worker-0-1","uid":"74a6b690-4210-4c25-8153-ecb6141b9e2c","apiVersion":"v1","resourceVersion":"246972"}, "reason": "RemoveRemediationTaint"} 2025-09-11T10:15:25.054931008Z INFO controllers.FenceAgentsRemediation Finalizer was removed {"CR Name": "worker-0-1-6lcbd"} 2025-09-11T10:15:25.054978126Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-09-11T10:15:25.055002433Z DEBUG events [remediation] Finalizer was removed {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-1-6lcbd","uid":"128cdad0-679f-424b-b4c6-8da3d65e6ef4","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"246976"}, "reason": "RemoveFinalizer"} 2025-09-11T10:15:25.055673001Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-09-11T10:15:25.055699449Z INFO controllers.FenceAgentsRemediation FenceAgentsRemediation CR was not found {"CR Name": "worker-0-1-6lcbd", "CR Namespace": "openshift-workload-availability"} 2025-09-11T10:15:25.055702781Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile [kni@cert-rhosp-02 ~]$ oc debug node/worker-0-1 -- chroot /host bash -c "date & uptime -s" Temporary namespace openshift-debug-9pkf9 is created for debugging node... Starting pod/worker-0-1-debug-8z2km ... To use host binaries, run `chroot /host` Thu Sep 11 10:25:43 UTC 2025 2025-09-11 10:14:12 Removing debug pod ... Temporary namespace openshift-debug-9pkf9 was removed. [kni@cert-rhosp-02 ~]$ oc debug node/worker-0-2 -- chroot /host bash -c "date & uptime -s" Temporary namespace openshift-debug-29f87 is created for debugging node... Starting pod/worker-0-2-debug-vqq7k ... To use host binaries, run `chroot /host` Thu Sep 11 10:26:09 UTC 2025 2025-09-11 10:14:12 Removing debug pod ... Temporary namespace openshift-debug-29f87 was removed. [kni@cert-rhosp-02 ~]$ oc debug node/worker-0-3 -- chroot /host bash -c "date & uptime -s" Temporary namespace openshift-debug-r9g6j is created for debugging node... Starting pod/worker-0-3-debug-wppxb ... To use host binaries, run `chroot /host` Thu Sep 11 10:26:16 UTC 2025 2025-09-11 10:14:12 Removing debug pod ... Temporary namespace openshift-debug-r9g6j was removed. NHc Logs: 2025-09-11T05:28:57.09374197Z INFO setup Go Version: go1.24.4 (Red Hat 1.24.4-2.el9) X:strictfipsruntime 2025-09-11T05:28:57.093844696Z INFO setup Go OS/Arch: linux/amd64 2025-09-11T05:28:57.093847903Z INFO setup Operator Version: 2ef588d 2025-09-11T05:28:57.093849693Z INFO setup Git Commit: 2ef588d65de4087c46447401cd6e757459d79210 2025-09-11T05:28:57.093851764Z INFO setup Build Date: 2025-09-01T17:55:11+00:00 2025-09-11T05:28:57.093853548Z INFO setup HTTP/2 for metrics and webhook server disabled 2025-09-11T05:28:57.115829707Z INFO setup supported control plane topology {"topology": "HighlyAvailable"} 2025-09-11T05:28:57.115868481Z INFO setup Cluster capabilities {"IsOnOpenshift": true, "HasMachineAPI": true} 2025-09-11T05:28:57.11618186Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "remediation.medik8s.io/v1alpha1, Kind=NodeHealthCheck", "path": "/validate-remediation-medik8s-io-v1alpha1-nodehealthcheck"} 2025-09-11T05:28:57.116281857Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-remediation-medik8s-io-v1alpha1-nodehealthcheck"} 2025-09-11T05:28:57.11632777Z INFO setup starting manager 2025-09-11T05:28:57.116389099Z INFO controller-runtime.metrics Starting metrics server 2025-09-11T05:28:57.116475646Z INFO starting server {"name": "health probe", "addr": "[::]:8081"} 2025-09-11T05:28:57.116498267Z INFO controller-runtime.webhook Starting webhook server 2025-09-11T05:28:57.116517422Z INFO controller-runtime.metrics Serving metrics server {"bindAddress": "127.0.0.1:8080", "secure": false} 2025-09-11T05:28:57.11674311Z INFO controller-runtime.certwatcher Updated current TLS certificate 2025-09-11T05:28:57.116845565Z INFO controller-runtime.webhook Serving webhook server {"host": "", "port": 9443} 2025-09-11T05:28:57.116935837Z INFO controller-runtime.certwatcher Starting certificate poll+watcher {"interval": "10s"} I0911 05:28:57.217529 1 leaderelection.go:257] attempting to acquire leader lease openshift-workload-availability/e1f13584.medik8s.io... I0911 05:28:57.223726 1 leaderelection.go:271] successfully acquired lease openshift-workload-availability/e1f13584.medik8s.io 2025-09-11T05:28:57.22379214Z DEBUG events node-healthcheck-controller-manager-69489fd9c8-gvb6c_11dff404-9134-4f52-a5db-6fba44c52c00 became leader {"type": "Normal", "object": {"kind":"Lease","namespace":"openshift-workload-availability","name":"e1f13584.medik8s.io","uid":"b63f2a69-17f5-41a2-8cf1-28717d233797","apiVersion":"coordination.k8s.io/v1","resourceVersion":"152291"}, "reason": "LeaderElection"} 2025-09-11T05:28:57.224090973Z INFO Starting EventSource {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "source": "kind source: *v1alpha1.NodeHealthCheck"} 2025-09-11T05:28:57.224138254Z INFO Starting EventSource {"controller": "machinehealthcheck", "controllerGroup": "machine.openshift.io", "controllerKind": "MachineHealthCheck", "source": "channel source: 0xc000706690"} 2025-09-11T05:28:57.224150639Z INFO Starting EventSource {"controller": "machinehealthcheck", "controllerGroup": "machine.openshift.io", "controllerKind": "MachineHealthCheck", "source": "kind source: *v1.Node"} 2025-09-11T05:28:57.224169649Z INFO Starting EventSource {"controller": "machinehealthcheck", "controllerGroup": "machine.openshift.io", "controllerKind": "MachineHealthCheck", "source": "kind source: *v1beta1.Machine"} 2025-09-11T05:28:57.224153033Z INFO Starting EventSource {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "source": "channel source: 0xc0007060e0"} 2025-09-11T05:28:57.224183141Z INFO Starting EventSource {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "source": "kind source: *v1.Node"} 2025-09-11T05:28:57.224191649Z INFO Starting EventSource {"controller": "machinehealthcheck", "controllerGroup": "machine.openshift.io", "controllerKind": "MachineHealthCheck", "source": "kind source: *v1beta1.MachineHealthCheck"} I0911 05:28:57.225490 1 shared_informer.go:313] Waiting for caches to sync for feature gate accessor 2025-09-11T05:28:57.324893008Z INFO MHCChecker found termination handler MHC, will ignore Nodes with Terminating condition 2025-09-11T05:28:57.32498527Z INFO MHCChecker MHC Checker status changed, notifying NHC controller 2025-09-11T05:28:57.324911423Z INFO Starting Controller {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck"} 2025-09-11T05:28:57.325056961Z INFO Starting workers {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "worker count": 1} 2025-09-11T05:28:57.32494725Z INFO Starting Controller {"controller": "machinehealthcheck", "controllerGroup": "machine.openshift.io", "controllerKind": "MachineHealthCheck"} 2025-09-11T05:28:57.325093284Z INFO Starting workers {"controller": "machinehealthcheck", "controllerGroup": "machine.openshift.io", "controllerKind": "MachineHealthCheck", "worker count": 1} 2025-09-11T05:28:57.325149698Z INFO adding all NHCs to reconcile queue for handling MHC event I0911 05:28:57.326144 1 shared_informer.go:320] Caches are synced for feature gate accessor I0911 05:28:57.326182 1 simple_featuregate_reader.go:171] Starting feature-gate-detector 2025-09-11T05:28:57.326317033Z INFO FeatureGateAccessor FeatureGates initialized I0911 05:28:57.326444 1 recorder_logging.go:49] &Event{ObjectMeta:{dummy.18642345cfbd44ab.d1e567b7 dummy 0 0001-01-01 00:00:00 +0000 UTC map[] map[] [] [] []},InvolvedObject:ObjectReference{Kind:Pod,Namespace:dummy,Name:dummy,UID:,APIVersion:v1,ResourceVersion:,FieldPath:,},Reason:FeatureGatesInitialized,Message:FeatureGates updated to featuregates.Features{Enabled:[]v1.FeatureGateName{"AWSEFSDriverVolumeMetrics", "AdminNetworkPolicy", "AlibabaPlatform", "AzureWorkloadIdentity", "BareMetalLoadBalancer", "BuildCSIVolumes", "ChunkSizeMiB", "CloudDualStackNodeIPs", "DisableKubeletCloudCredentialProviders", "GCPLabelsTags", "HardwareSpeed", "IngressControllerLBSubnetsAWS", "KMSv1", "ManagedBootImages", "ManagedBootImagesAWS", "MultiArchInstallAWS", "MultiArchInstallGCP", "NetworkDiagnosticsConfig", "NetworkLiveMigration", "NetworkSegmentation", "NewOLM", "NodeDisruptionPolicy", "OnClusterBuild", "PersistentIPsForVirtualization", "PrivateHostedZoneAWS", "SetEIPForNLBIngressController", "VSphereControlPlaneMachineSet", "VSphereDriverConfiguration", "VSphereMultiVCenters", "VSphereStaticIPs", "ValidatingAdmissionPolicy"}, Disabled:[]v1.FeatureGateName{"AWSClusterHostedDNS", "AdditionalRoutingCapabilities", "AutomatedEtcdBackup", "BootcNodeManagement", "CSIDriverSharedResource", "ClusterAPIInstall", "ClusterAPIInstallIBMCloud", "ClusterMonitoringConfig", "ConsolePluginContentSecurityPolicy", "DNSNameResolver", "DynamicResourceAllocation", "EtcdBackendQuota", "EventedPLEG", "Example", "ExternalOIDC", "ExternalOIDCWithUIDAndExtraClaimMappings", "GCPClusterHostedDNS", "GatewayAPI", "ImageStreamImportMode", "IngressControllerDynamicConfigurationManager", "InsightsConfig", "InsightsConfigAPI", "InsightsOnDemandDataGather", "InsightsRuntimeExtractor", "MachineAPIMigration", "MachineAPIOperatorDisableMachineHealthCheckController", "MachineAPIProviderOpenStack", "MachineConfigNodes", "MaxUnavailableStatefulSet", "MetricsCollectionProfiles", "MinimumKubeletVersion", "MixedCPUsAllocation", "MultiArchInstallAzure", "NodeSwap", "NutanixMultiSubnets", "OVNObservability", "OpenShiftPodSecurityAdmission", "PinnedImages", "PlatformOperators", "ProcMountType", "RouteAdvertisements", "RouteExternalCertificate", "ServiceAccountTokenNodeBinding", "SignatureStores", "SigstoreImageVerification", "TranslateStreamCloseWebsocketRequests", "UpgradeStatus", "UserNamespacesPodSecurityStandards", "UserNamespacesSupport", "VSphereMultiNetworks", "VolumeAttributesClass", "VolumeGroupSnapshot"}},Source:EventSource{Component:,Host:,},FirstTimestamp:2025-09-11 05:28:57.326339243 +0000 UTC m=+0.255929631,LastTimestamp:2025-09-11 05:28:57.326339243 +0000 UTC m=+0.255929631,Count:1,Type:Normal,EventTime:0001-01-01 00:00:00 +0000 UTC,Series:nil,Action:,Related:nil,ReportingController:,ReportingInstance:,} 2025-09-11T05:28:57.639962897Z INFO KubeAPIWarningLogger unknown field "spec.contentSecurityPolicy" 2025-09-11T05:28:57.640072189Z INFO console-plugin successfully created / updated console plugin resources 2025-09-11T09:58:15.847163475Z INFO nodehealthcheck-resource validate create {"name": "nhc-far-worker"} 2025-09-11T09:58:15.850566859Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-far-worker"} 2025-09-11T09:58:15.877771026Z INFO controllers.NodeHealthCheck enabling NHC, valid config, no conflicting MHC configured in the cluster {"NodeHealthCheck name": "nhc-far-worker"} 2025-09-11T09:58:15.87792109Z INFO Starting EventSource {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "source": "kind source: *unstructured.Unstructured"} 2025-09-11T09:58:15.877956653Z INFO controllers.NodeHealthCheck.WatchManager added watch for remediation template CRs {"kind": "FenceAgentsRemediationTemplate"} 2025-09-11T09:58:15.877967268Z INFO Starting EventSource {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "source": "kind source: *unstructured.Unstructured"} 2025-09-11T09:58:15.877971987Z INFO controllers.NodeHealthCheck.WatchManager added watch for remediation CRs {"kind": "FenceAgentsRemediation"} 2025-09-11T09:58:15.87795098Z DEBUG events [remediation] No issues found, NodeHealthCheck is enabled. {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"239655"}, "reason": "Enabled"} 2025-09-11T09:58:15.887980078Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-3"} time="2025-09-11T09:58:15Z" level=info msg="invalidating lease" time="2025-09-11T09:58:15Z" level=info msg="getting lease" 2025-09-11T09:58:16.089353918Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-0"} time="2025-09-11T09:58:16Z" level=info msg="invalidating lease" time="2025-09-11T09:58:16Z" level=info msg="getting lease" 2025-09-11T09:58:16.09212189Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-1"} time="2025-09-11T09:58:16Z" level=info msg="invalidating lease" time="2025-09-11T09:58:16Z" level=info msg="getting lease" 2025-09-11T09:58:16.093693829Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-2"} time="2025-09-11T09:58:16Z" level=info msg="invalidating lease" time="2025-09-11T09:58:16Z" level=info msg="getting lease" 2025-09-11T09:58:16.095673515Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-far-worker", "new status": {"observedNodes":4,"healthyNodes":4,"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-09-11T09:58:15Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Enabled","reason":"NHC is enabled, no ongoing remediation"}, "patch": "{\"status\":{\"conditions\":[{\"lastTransitionTime\":\"2025-09-11T09:58:15Z\",\"message\":\"No issues found, NodeHealthCheck is enabled.\",\"reason\":\"NodeHealthCheckEnabled\",\"status\":\"False\",\"type\":\"Disabled\"}],\"healthyNodes\":4,\"observedNodes\":4,\"phase\":\"Enabled\",\"reason\":\"NHC is enabled, no ongoing remediation\"}}"} 2025-09-11T09:58:16.301065592Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-far-worker", "error": null, "requeue": false, "requeuAfter": "0s"} 2025-09-11T10:00:38.13571776Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0-2", "NHC": "nhc-far-worker"} 2025-09-11T10:00:38.135850194Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0-2", "NHC": "nhc-far-worker"} 2025-09-11T10:00:38.135902725Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0-3", "NHC": "nhc-far-worker"} 2025-09-11T10:00:38.1359469Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0-3", "NHC": "nhc-far-worker"} 2025-09-11T10:00:38.136102848Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-far-worker"} 2025-09-11T10:00:38.147838448Z INFO controllers.NodeHealthCheck Node is going to match unhealthy condition {"node": "worker-0-2", "condition type": "Ready", "condition status": "Unknown", "duration left": "29.85216557s"} 2025-09-11T10:00:38.147914808Z INFO controllers.NodeHealthCheck Node is going to match unhealthy condition {"node": "worker-0-3", "condition type": "Ready", "condition status": "Unknown", "duration left": "29.852085945s"} 2025-09-11T10:00:38.265022572Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-0"} time="2025-09-11T10:00:38Z" level=info msg="invalidating lease" time="2025-09-11T10:00:38Z" level=info msg="getting lease" 2025-09-11T10:00:38.365331717Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-1"} time="2025-09-11T10:00:38Z" level=info msg="invalidating lease" time="2025-09-11T10:00:38Z" level=info msg="getting lease" 2025-09-11T10:00:38.465724735Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-far-worker", "new status": {"observedNodes":4,"healthyNodes":2,"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-09-11T09:58:15Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Enabled","reason":"NHC is enabled, no ongoing remediation","lastUpdateTime":"2025-09-11T09:58:16Z"}, "patch": "{\"status\":{\"healthyNodes\":2}}"} 2025-09-11T10:00:38.672079745Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-far-worker", "error": null, "requeue": false, "requeuAfter": "30.852085945s"} 2025-09-11T10:00:43.408911132Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0-1", "NHC": "nhc-far-worker"} 2025-09-11T10:00:43.409021658Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0-1", "NHC": "nhc-far-worker"} 2025-09-11T10:00:43.409101657Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-far-worker"} 2025-09-11T10:00:43.416762326Z INFO controllers.NodeHealthCheck Node is going to match unhealthy condition {"node": "worker-0-3", "condition type": "Ready", "condition status": "Unknown", "duration left": "24.583240332s"} 2025-09-11T10:00:43.416797746Z INFO controllers.NodeHealthCheck Node is going to match unhealthy condition {"node": "worker-0-1", "condition type": "Ready", "condition status": "Unknown", "duration left": "29.583202691s"} 2025-09-11T10:00:43.416803587Z INFO controllers.NodeHealthCheck Node is going to match unhealthy condition {"node": "worker-0-2", "condition type": "Ready", "condition status": "Unknown", "duration left": "24.583196839s"} 2025-09-11T10:00:43.427190179Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-0"} time="2025-09-11T10:00:43Z" level=info msg="invalidating lease" time="2025-09-11T10:00:43Z" level=info msg="getting lease" 2025-09-11T10:00:43.527847942Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-far-worker", "new status": {"observedNodes":4,"healthyNodes":1,"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-09-11T09:58:15Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Enabled","reason":"NHC is enabled, no ongoing remediation","lastUpdateTime":"2025-09-11T10:00:38Z"}, "patch": "{\"status\":{\"healthyNodes\":1}}"} 2025-09-11T10:00:43.739585994Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-far-worker", "error": null, "requeue": false, "requeuAfter": "25.583196839s"} 2025-09-11T10:01:09.323970477Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-far-worker"} 2025-09-11T10:01:09.328588803Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-2", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T10:01:09.328682791Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-3", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T10:01:09.328708528Z INFO controllers.NodeHealthCheck Node is going to match unhealthy condition {"node": "worker-0-1", "condition type": "Ready", "condition status": "Unknown", "duration left": "3.671292144s"} 2025-09-11T10:01:09.328760044Z DEBUG events [remediation] Node matches unhealthy condition. Node "worker-0-2", condition type "Ready", condition status "Unknown" {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"240832"}, "reason": "DetectedUnhealthy"} 2025-09-11T10:01:09.328802095Z DEBUG events [remediation] Node matches unhealthy condition. Node "worker-0-3", condition type "Ready", condition status "Unknown" {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"240832"}, "reason": "DetectedUnhealthy"} 2025-09-11T10:01:09.333421735Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-0"} time="2025-09-11T10:01:09Z" level=info msg="invalidating lease" time="2025-09-11T10:01:09Z" level=info msg="getting lease" 2025-09-11T10:01:09.335188874Z INFO controllers.NodeHealthCheck Skipped remediation because the number of healthy nodes selected by the selector is 1 and should equal or exceed 3 {"NodeHealthCheck name": "nhc-far-worker"} 2025-09-11T10:01:09.3353228Z DEBUG events [remediation] Skipped remediation because the number of healthy nodes selected by the selector is 1 and should equal or exceed 3 {"type": "Warning", "object": {"kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"240832"}, "reason": "RemediationSkipped"} 2025-09-11T10:01:09.335441496Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-far-worker", "new status": {"observedNodes":4,"healthyNodes":1,"unhealthyNodes":[{"name":"worker-0-2"},{"name":"worker-0-3"}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-09-11T09:58:15Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Enabled","reason":"NHC is enabled, no ongoing remediation","lastUpdateTime":"2025-09-11T10:00:43Z"}, "patch": "{\"status\":{\"unhealthyNodes\":[{\"name\":\"worker-0-2\"},{\"name\":\"worker-0-3\"}]}}"} 2025-09-11T10:01:09.541513588Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-far-worker", "error": null, "requeue": false, "requeuAfter": "4.671292144s"} 2025-09-11T10:01:14.213022312Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-far-worker"} 2025-09-11T10:01:14.217727947Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-1", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T10:01:14.217821573Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-2", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T10:01:14.217842814Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-3", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T10:01:14.217900633Z DEBUG events [remediation] Node matches unhealthy condition. Node "worker-0-1", condition type "Ready", condition status "Unknown" {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"241137"}, "reason": "DetectedUnhealthy"} 2025-09-11T10:01:14.217924413Z DEBUG events [remediation] Node matches unhealthy condition. Node "worker-0-2", condition type "Ready", condition status "Unknown" {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"241137"}, "reason": "DetectedUnhealthy"} 2025-09-11T10:01:14.217928378Z DEBUG events [remediation] Node matches unhealthy condition. Node "worker-0-3", condition type "Ready", condition status "Unknown" {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"241137"}, "reason": "DetectedUnhealthy"} 2025-09-11T10:01:14.222228734Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-0"} time="2025-09-11T10:01:14Z" level=info msg="invalidating lease" time="2025-09-11T10:01:14Z" level=info msg="getting lease" 2025-09-11T10:01:14.223809646Z INFO controllers.NodeHealthCheck Skipped remediation because the number of healthy nodes selected by the selector is 1 and should equal or exceed 3 {"NodeHealthCheck name": "nhc-far-worker"} 2025-09-11T10:01:14.223913463Z DEBUG events [remediation] Skipped remediation because the number of healthy nodes selected by the selector is 1 and should equal or exceed 3 {"type": "Warning", "object": {"kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"241137"}, "reason": "RemediationSkipped"} 2025-09-11T10:01:14.223989741Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-far-worker", "new status": {"observedNodes":4,"healthyNodes":1,"unhealthyNodes":[{"name":"worker-0-2"},{"name":"worker-0-3"},{"name":"worker-0-1"}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-09-11T09:58:15Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Enabled","reason":"NHC is enabled, no ongoing remediation","lastUpdateTime":"2025-09-11T10:01:09Z"}, "patch": "{\"status\":{\"unhealthyNodes\":[{\"name\":\"worker-0-2\"},{\"name\":\"worker-0-3\"},{\"name\":\"worker-0-1\"}]}}"} 2025-09-11T10:01:14.429825675Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-far-worker", "error": null, "requeue": false, "requeuAfter": "0s"} 2025-09-11T10:13:03.852844755Z INFO nodehealthcheck-resource validate update {"name": "nhc-far-worker"} 2025-09-11T10:13:03.856222399Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-far-worker"} 2025-09-11T10:13:03.860843266Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-1", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T10:13:03.860881676Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-2", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T10:13:03.860890035Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-3", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T10:13:03.861028135Z DEBUG events [remediation] Node matches unhealthy condition. Node "worker-0-1", condition type "Ready", condition status "Unknown" {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"244958"}, "reason": "DetectedUnhealthy"} 2025-09-11T10:13:03.861083013Z DEBUG events [remediation] Node matches unhealthy condition. Node "worker-0-2", condition type "Ready", condition status "Unknown" {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"244958"}, "reason": "DetectedUnhealthy"} 2025-09-11T10:13:03.86108745Z DEBUG events [remediation] Node matches unhealthy condition. Node "worker-0-3", condition type "Ready", condition status "Unknown" {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"244958"}, "reason": "DetectedUnhealthy"} 2025-09-11T10:13:03.865647753Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-0"} time="2025-09-11T10:13:03Z" level=info msg="invalidating lease" time="2025-09-11T10:13:03Z" level=info msg="getting lease" 2025-09-11T10:13:03.966757012Z INFO controllers.NodeHealthCheck Skipped remediation because the number of healthy nodes selected by the selector is 1 and should equal or exceed 2 {"NodeHealthCheck name": "nhc-far-worker"} 2025-09-11T10:13:03.966869708Z DEBUG events [remediation] Skipped remediation because the number of healthy nodes selected by the selector is 1 and should equal or exceed 2 {"type": "Warning", "object": {"kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"244958"}, "reason": "RemediationSkipped"} 2025-09-11T10:13:03.966955041Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-far-worker", "error": null, "requeue": false, "requeuAfter": "0s"} 2025-09-11T10:14:02.850173149Z INFO nodehealthcheck-resource validate update {"name": "nhc-far-worker"} 2025-09-11T10:14:02.853850102Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-far-worker"} 2025-09-11T10:14:02.859064778Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-1", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T10:14:02.859103089Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-2", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T10:14:02.859111249Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-3", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T10:14:02.859154193Z DEBUG events [remediation] Node matches unhealthy condition. Node "worker-0-1", condition type "Ready", condition status "Unknown" {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"245225"}, "reason": "DetectedUnhealthy"} 2025-09-11T10:14:02.859201091Z DEBUG events [remediation] Node matches unhealthy condition. Node "worker-0-2", condition type "Ready", condition status "Unknown" {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"245225"}, "reason": "DetectedUnhealthy"} 2025-09-11T10:14:02.859206536Z DEBUG events [remediation] Node matches unhealthy condition. Node "worker-0-3", condition type "Ready", condition status "Unknown" {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"245225"}, "reason": "DetectedUnhealthy"} 2025-09-11T10:14:02.863942521Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-0"} time="2025-09-11T10:14:02Z" level=info msg="invalidating lease" time="2025-09-11T10:14:02Z" level=info msg="getting lease" 2025-09-11T10:14:02.866199172Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-1"} 2025-09-11T10:14:02.965024452Z INFO controllers.NodeHealthCheck.resource manager Attempting to obtain Node Lease {"Node name": "worker-0-1"} time="2025-09-11T10:14:02Z" level=info msg="request lease" time="2025-09-11T10:14:02Z" level=info msg="getting lease" time="2025-09-11T10:14:02Z" level=info msg="create lease" 2025-09-11T10:14:02.969300996Z INFO controllers.NodeHealthCheck.resource manager Creating a remediation CR {"CR name": "", "CR kind": "FenceAgentsRemediation", "namespace": "openshift-workload-availability"} 2025-09-11T10:14:02.978948988Z DEBUG events [remediation] Created remediation object for node worker-0-1 {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"245225"}, "reason": "RemediationCreated"} 2025-09-11T10:14:02.981621423Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-2"} 2025-09-11T10:14:02.989336895Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","controller":false}} 2025-09-11T10:14:02.989393232Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-far-worker", "Remediation CR Name": "worker-0-1-6lcbd", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T10:14:02.989403977Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","controller":false}} 2025-09-11T10:14:02.989408657Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-far-worker", "Remediation CR Name": "worker-0-1-6lcbd", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T10:14:02.989750748Z INFO controllers.NodeHealthCheck.resource manager Attempting to obtain Node Lease {"Node name": "worker-0-2"} time="2025-09-11T10:14:02Z" level=info msg="request lease" time="2025-09-11T10:14:02Z" level=info msg="getting lease" time="2025-09-11T10:14:02Z" level=info msg="create lease" 2025-09-11T10:14:02.993173713Z INFO controllers.NodeHealthCheck.resource manager Creating a remediation CR {"CR name": "", "CR kind": "FenceAgentsRemediation", "namespace": "openshift-workload-availability"} 2025-09-11T10:14:02.995239885Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","controller":false}} 2025-09-11T10:14:02.995270681Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-far-worker", "Remediation CR Name": "worker-0-1-6lcbd", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T10:14:02.99527943Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","controller":false}} 2025-09-11T10:14:02.99528387Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-far-worker", "Remediation CR Name": "worker-0-1-6lcbd", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T10:14:02.998548443Z DEBUG events [remediation] Created remediation object for node worker-0-2 {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"245225"}, "reason": "RemediationCreated"} 2025-09-11T10:14:03.06576959Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-3"} 2025-09-11T10:14:03.070695673Z INFO controllers.NodeHealthCheck.resource manager Attempting to obtain Node Lease {"Node name": "worker-0-3"} time="2025-09-11T10:14:03Z" level=info msg="request lease" time="2025-09-11T10:14:03Z" level=info msg="getting lease" time="2025-09-11T10:14:03Z" level=info msg="create lease" 2025-09-11T10:14:03.073587743Z INFO controllers.NodeHealthCheck.resource manager Creating a remediation CR {"CR name": "", "CR kind": "FenceAgentsRemediation", "namespace": "openshift-workload-availability"} 2025-09-11T10:14:03.079048643Z DEBUG events [remediation] Created remediation object for node worker-0-3 {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"245225"}, "reason": "RemediationCreated"} 2025-09-11T10:14:03.081950748Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-far-worker", "new status": {"observedNodes":4,"healthyNodes":1,"unhealthyNodes":[{"name":"worker-0-2","remediations":[{"resource":{"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-2-vtfhp","uid":"b28ddf12-0382-439d-929c-4732aac8593a","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1"},"started":"2025-09-11T10:14:02Z","templateName":"fenceagentsremediationtemplate-test"}]},{"name":"worker-0-3","remediations":[{"resource":{"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-3-vks87","uid":"878bdfc2-52cf-4803-8b2a-460c61ac6c85","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1"},"started":"2025-09-11T10:14:03Z","templateName":"fenceagentsremediationtemplate-test"}]},{"name":"worker-0-1","remediations":[{"resource":{"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-1-6lcbd","uid":"128cdad0-679f-424b-b4c6-8da3d65e6ef4","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1"},"started":"2025-09-11T10:14:02Z","templateName":"fenceagentsremediationtemplate-test"}]}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-09-11T09:58:15Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Remediating","reason":"NHC is remediating 3 nodes","lastUpdateTime":"2025-09-11T10:01:14Z"}, "patch": "{\"status\":{\"phase\":\"Remediating\",\"reason\":\"NHC is remediating 3 nodes\",\"unhealthyNodes\":[{\"name\":\"worker-0-2\",\"remediations\":[{\"resource\":{\"apiVersion\":\"fence-agents-remediation.medik8s.io/v1alpha1\",\"kind\":\"FenceAgentsRemediation\",\"name\":\"worker-0-2-vtfhp\",\"namespace\":\"openshift-workload-availability\",\"uid\":\"b28ddf12-0382-439d-929c-4732aac8593a\"},\"started\":\"2025-09-11T10:14:02Z\",\"templateName\":\"fenceagentsremediationtemplate-test\"}]},{\"name\":\"worker-0-3\",\"remediations\":[{\"resource\":{\"apiVersion\":\"fence-agents-remediation.medik8s.io/v1alpha1\",\"kind\":\"FenceAgentsRemediation\",\"name\":\"worker-0-3-vks87\",\"namespace\":\"openshift-workload-availability\",\"uid\":\"878bdfc2-52cf-4803-8b2a-460c61ac6c85\"},\"started\":\"2025-09-11T10:14:03Z\",\"templateName\":\"fenceagentsremediationtemplate-test\"}]},{\"name\":\"worker-0-1\",\"remediations\":[{\"resource\":{\"apiVersion\":\"fence-agents-remediation.medik8s.io/v1alpha1\",\"kind\":\"FenceAgentsRemediation\",\"name\":\"worker-0-1-6lcbd\",\"namespace\":\"openshift-workload-availability\",\"uid\":\"128cdad0-679f-424b-b4c6-8da3d65e6ef4\"},\"started\":\"2025-09-11T10:14:02Z\",\"templateName\":\"fenceagentsremediationtemplate-test\"}]}]}}"} 2025-09-11T10:14:03.203234858Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","controller":false}} 2025-09-11T10:14:03.203266055Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-far-worker", "Remediation CR Name": "worker-0-2-vtfhp", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T10:14:03.203275455Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","controller":false}} 2025-09-11T10:14:03.203279833Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-far-worker", "Remediation CR Name": "worker-0-2-vtfhp", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T10:14:03.208704809Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","controller":false}} 2025-09-11T10:14:03.208749461Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-far-worker", "Remediation CR Name": "worker-0-2-vtfhp", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T10:14:03.208761625Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","controller":false}} 2025-09-11T10:14:03.208767584Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-far-worker", "Remediation CR Name": "worker-0-2-vtfhp", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T10:14:03.287773437Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-far-worker", "error": null, "requeue": false, "requeuAfter": "10m0s"} 2025-09-11T10:14:03.287852794Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-far-worker"} 2025-09-11T10:14:03.293054551Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-1", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T10:14:03.293079513Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-2", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T10:14:03.293084647Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-3", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T10:14:03.29824204Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-far-worker", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-09-11T10:14:02Z"} 2025-09-11T10:14:03.298273018Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-far-worker", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-09-11T10:14:03Z"} 2025-09-11T10:14:03.298283371Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-0"} time="2025-09-11T10:14:03Z" level=info msg="invalidating lease" time="2025-09-11T10:14:03Z" level=info msg="getting lease" 2025-09-11T10:14:03.300769882Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-1"} 2025-09-11T10:14:03.305400929Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-1-6lcbd", "kind": "FenceAgentsRemediation", "namespace": "openshift-workload-availability"} time="2025-09-11T10:14:03Z" level=info msg="getting lease" 2025-09-11T10:14:03.305456574Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-far-worker", "lease name": "node-worker-0-1", "NHC is lease owner": true, "lease expiration time": "10m0s"} 2025-09-11T10:14:03.307900767Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-2"} 2025-09-11T10:14:03.312240818Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-2-vtfhp", "kind": "FenceAgentsRemediation", "namespace": "openshift-workload-availability"} time="2025-09-11T10:14:03Z" level=info msg="getting lease" 2025-09-11T10:14:03.312291467Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-far-worker", "lease name": "node-worker-0-2", "NHC is lease owner": true, "lease expiration time": "10m0s"} 2025-09-11T10:14:03.314884685Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-3"} 2025-09-11T10:14:03.319847043Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-3-vks87", "kind": "FenceAgentsRemediation", "namespace": "openshift-workload-availability"} time="2025-09-11T10:14:03Z" level=info msg="getting lease" 2025-09-11T10:14:03.319905795Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-far-worker", "lease name": "node-worker-0-3", "NHC is lease owner": true, "lease expiration time": "10m0s"} 2025-09-11T10:14:03.322308922Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-far-worker", "error": null, "requeue": false, "requeuAfter": "10m0s"} 2025-09-11T10:14:03.416766519Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","controller":false}} 2025-09-11T10:14:03.416799464Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-far-worker", "Remediation CR Name": "worker-0-3-vks87", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T10:14:03.416810859Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","controller":false}} 2025-09-11T10:14:03.416814893Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-far-worker", "Remediation CR Name": "worker-0-3-vks87", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T10:14:03.416853402Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-far-worker"} 2025-09-11T10:14:03.421608775Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","controller":false}} 2025-09-11T10:14:03.421642943Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-far-worker", "Remediation CR Name": "worker-0-3-vks87", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T10:14:03.421653976Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","controller":false}} 2025-09-11T10:14:03.421659801Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-far-worker", "Remediation CR Name": "worker-0-3-vks87", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T10:14:03.422007519Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-1", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T10:14:03.422028134Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-2", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T10:14:03.422032611Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-3", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T10:14:03.4274017Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-far-worker", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-09-11T10:14:02Z"} 2025-09-11T10:14:03.427440119Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-far-worker", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-09-11T10:14:03Z"} 2025-09-11T10:14:03.427454846Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-far-worker", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-09-11T10:14:03Z"} 2025-09-11T10:14:03.427470716Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-0"} time="2025-09-11T10:14:03Z" level=info msg="invalidating lease" time="2025-09-11T10:14:03Z" level=info msg="getting lease" 2025-09-11T10:14:03.528216079Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-1"} 2025-09-11T10:14:03.628268134Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-1-6lcbd", "kind": "FenceAgentsRemediation", "namespace": "openshift-workload-availability"} time="2025-09-11T10:14:03Z" level=info msg="getting lease" 2025-09-11T10:14:03.62834426Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-far-worker", "lease name": "node-worker-0-1", "NHC is lease owner": true, "lease expiration time": "10m0s"} 2025-09-11T10:14:03.631024352Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-2"} 2025-09-11T10:14:03.635438585Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-2-vtfhp", "kind": "FenceAgentsRemediation", "namespace": "openshift-workload-availability"} time="2025-09-11T10:14:03Z" level=info msg="getting lease" 2025-09-11T10:14:03.635493912Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-far-worker", "lease name": "node-worker-0-2", "NHC is lease owner": true, "lease expiration time": "10m0s"} 2025-09-11T10:14:03.637955775Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-3"} 2025-09-11T10:14:03.642219862Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-3-vks87", "kind": "FenceAgentsRemediation", "namespace": "openshift-workload-availability"} time="2025-09-11T10:14:03Z" level=info msg="getting lease" 2025-09-11T10:14:03.642278787Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-far-worker", "lease name": "node-worker-0-3", "NHC is lease owner": true, "lease expiration time": "10m0s"} 2025-09-11T10:14:03.645276988Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-far-worker", "error": null, "requeue": false, "requeuAfter": "10m0s"} 2025-09-11T10:14:03.645341879Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-far-worker"} 2025-09-11T10:14:03.649328258Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-1", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T10:14:03.649361582Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-2", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T10:14:03.649366958Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-3", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T10:14:03.654093128Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-far-worker", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-09-11T10:14:02Z"} 2025-09-11T10:14:03.654162853Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-far-worker", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-09-11T10:14:03Z"} 2025-09-11T10:14:03.654183676Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-far-worker", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-09-11T10:14:03Z"} 2025-09-11T10:14:03.654199991Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-0"} time="2025-09-11T10:14:03Z" level=info msg="invalidating lease" time="2025-09-11T10:14:03Z" level=info msg="getting lease" 2025-09-11T10:14:03.656818154Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-1"} 2025-09-11T10:14:03.660885103Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-1-6lcbd", "kind": "FenceAgentsRemediation", "namespace": "openshift-workload-availability"} time="2025-09-11T10:14:03Z" level=info msg="getting lease" 2025-09-11T10:14:03.660943928Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-far-worker", "lease name": "node-worker-0-1", "NHC is lease owner": true, "lease expiration time": "10m0s"} 2025-09-11T10:14:03.663166753Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-2"} 2025-09-11T10:14:03.668627167Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-2-vtfhp", "kind": "FenceAgentsRemediation", "namespace": "openshift-workload-availability"} time="2025-09-11T10:14:03Z" level=info msg="getting lease" 2025-09-11T10:14:03.668679339Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-far-worker", "lease name": "node-worker-0-2", "NHC is lease owner": true, "lease expiration time": "10m0s"} 2025-09-11T10:14:03.670898129Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-3"} 2025-09-11T10:14:03.675114098Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-3-vks87", "kind": "FenceAgentsRemediation", "namespace": "openshift-workload-availability"} time="2025-09-11T10:14:03Z" level=info msg="getting lease" 2025-09-11T10:14:03.675171515Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-far-worker", "lease name": "node-worker-0-3", "NHC is lease owner": true, "lease expiration time": "10m0s"} 2025-09-11T10:14:03.677519579Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-far-worker", "error": null, "requeue": false, "requeuAfter": "10m0s"} 2025-09-11T10:14:08.68123477Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","controller":false}} 2025-09-11T10:14:08.681295721Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-far-worker", "Remediation CR Name": "worker-0-1-6lcbd", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T10:14:08.681313595Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","controller":false}} 2025-09-11T10:14:08.681320629Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-far-worker", "Remediation CR Name": "worker-0-1-6lcbd", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T10:14:08.681354735Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-far-worker"} 2025-09-11T10:14:08.685542449Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","controller":false}} 2025-09-11T10:14:08.685686877Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-far-worker", "Remediation CR Name": "worker-0-3-vks87", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T10:14:08.685726212Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","controller":false}} 2025-09-11T10:14:08.68574921Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-far-worker", "Remediation CR Name": "worker-0-3-vks87", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T10:14:08.68683111Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-1", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T10:14:08.686860446Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-2", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T10:14:08.686865928Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-3", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T10:14:08.687962443Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","controller":false}} 2025-09-11T10:14:08.688031615Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-far-worker", "Remediation CR Name": "worker-0-2-vtfhp", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T10:14:08.688065291Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","controller":false}} 2025-09-11T10:14:08.688080876Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-far-worker", "Remediation CR Name": "worker-0-2-vtfhp", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T10:14:08.692819685Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-far-worker", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-09-11T10:14:02Z"} 2025-09-11T10:14:08.692934158Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-far-worker", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-09-11T10:14:03Z"} 2025-09-11T10:14:08.692953764Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-far-worker", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-09-11T10:14:03Z"} 2025-09-11T10:14:08.692969432Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-0"} time="2025-09-11T10:14:08Z" level=info msg="invalidating lease" time="2025-09-11T10:14:08Z" level=info msg="getting lease" 2025-09-11T10:14:08.695634426Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-1"} 2025-09-11T10:14:08.700060828Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-1-6lcbd", "kind": "FenceAgentsRemediation", "namespace": "openshift-workload-availability"} time="2025-09-11T10:14:08Z" level=info msg="getting lease" 2025-09-11T10:14:08.700127612Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-far-worker", "lease name": "node-worker-0-1", "NHC is lease owner": true, "lease expiration time": "10m0s"} 2025-09-11T10:14:08.702419543Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-2"} 2025-09-11T10:14:08.706626438Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-2-vtfhp", "kind": "FenceAgentsRemediation", "namespace": "openshift-workload-availability"} time="2025-09-11T10:14:08Z" level=info msg="getting lease" 2025-09-11T10:14:08.706703339Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-far-worker", "lease name": "node-worker-0-2", "NHC is lease owner": true, "lease expiration time": "10m0s"} 2025-09-11T10:14:08.709183016Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-3"} 2025-09-11T10:14:08.713477949Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-3-vks87", "kind": "FenceAgentsRemediation", "namespace": "openshift-workload-availability"} time="2025-09-11T10:14:08Z" level=info msg="getting lease" 2025-09-11T10:14:08.71353102Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-far-worker", "lease name": "node-worker-0-3", "NHC is lease owner": true, "lease expiration time": "10m0s"} 2025-09-11T10:14:08.716145213Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-far-worker", "error": null, "requeue": false, "requeuAfter": "10m0s"} 2025-09-11T10:14:08.716220105Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-far-worker"} 2025-09-11T10:14:08.720146363Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-1", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T10:14:08.720171481Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-2", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T10:14:08.720177894Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-3", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T10:14:08.725074982Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-far-worker", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-09-11T10:14:02Z"} 2025-09-11T10:14:08.725118785Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-far-worker", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-09-11T10:14:03Z"} 2025-09-11T10:14:08.725139144Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-far-worker", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-09-11T10:14:03Z"} 2025-09-11T10:14:08.725147539Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-0"} time="2025-09-11T10:14:08Z" level=info msg="invalidating lease" time="2025-09-11T10:14:08Z" level=info msg="getting lease" 2025-09-11T10:14:08.728167966Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-1"} 2025-09-11T10:14:08.733637102Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-1-6lcbd", "kind": "FenceAgentsRemediation", "namespace": "openshift-workload-availability"} time="2025-09-11T10:14:08Z" level=info msg="getting lease" 2025-09-11T10:14:08.733724976Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-far-worker", "lease name": "node-worker-0-1", "NHC is lease owner": true, "lease expiration time": "10m0s"} 2025-09-11T10:14:08.736438341Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-2"} 2025-09-11T10:14:08.740928659Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-2-vtfhp", "kind": "FenceAgentsRemediation", "namespace": "openshift-workload-availability"} time="2025-09-11T10:14:08Z" level=info msg="getting lease" 2025-09-11T10:14:08.740996Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-far-worker", "lease name": "node-worker-0-2", "NHC is lease owner": true, "lease expiration time": "10m0s"} 2025-09-11T10:14:08.743470955Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-3"} 2025-09-11T10:14:08.747975676Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-3-vks87", "kind": "FenceAgentsRemediation", "namespace": "openshift-workload-availability"} time="2025-09-11T10:14:08Z" level=info msg="getting lease" 2025-09-11T10:14:08.748065709Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-far-worker", "lease name": "node-worker-0-3", "NHC is lease owner": true, "lease expiration time": "10m0s"} 2025-09-11T10:14:08.75053205Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-far-worker", "error": null, "requeue": false, "requeuAfter": "10m0s"} 2025-09-11T10:14:11.196004023Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","controller":false}} 2025-09-11T10:14:11.196064701Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-far-worker", "Remediation CR Name": "worker-0-1-6lcbd", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T10:14:11.196083871Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","controller":false}} 2025-09-11T10:14:11.196089176Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-far-worker", "Remediation CR Name": "worker-0-1-6lcbd", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T10:14:11.196139123Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-far-worker"} 2025-09-11T10:14:11.200624497Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-1", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T10:14:11.200656061Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-2", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T10:14:11.200664519Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-3", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T10:14:11.20564101Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-far-worker", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-09-11T10:14:11Z"} 2025-09-11T10:14:11.205672551Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-far-worker", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-09-11T10:14:03Z"} 2025-09-11T10:14:11.205681279Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-far-worker", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-09-11T10:14:03Z"} 2025-09-11T10:14:11.205686937Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-0"} time="2025-09-11T10:14:11Z" level=info msg="invalidating lease" time="2025-09-11T10:14:11Z" level=info msg="getting lease" 2025-09-11T10:14:11.208462229Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-1"} 2025-09-11T10:14:11.212834Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-1-6lcbd", "kind": "FenceAgentsRemediation", "namespace": "openshift-workload-availability"} time="2025-09-11T10:14:11Z" level=info msg="getting lease" 2025-09-11T10:14:11.212901015Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-far-worker", "lease name": "node-worker-0-1", "NHC is lease owner": true, "lease expiration time": "10m0s"} 2025-09-11T10:14:11.215029094Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-2"} 2025-09-11T10:14:11.219187185Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-2-vtfhp", "kind": "FenceAgentsRemediation", "namespace": "openshift-workload-availability"} time="2025-09-11T10:14:11Z" level=info msg="getting lease" 2025-09-11T10:14:11.219248092Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-far-worker", "lease name": "node-worker-0-2", "NHC is lease owner": true, "lease expiration time": "10m0s"} 2025-09-11T10:14:11.221546257Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-3"} 2025-09-11T10:14:11.225984234Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-3-vks87", "kind": "FenceAgentsRemediation", "namespace": "openshift-workload-availability"} time="2025-09-11T10:14:11Z" level=info msg="getting lease" 2025-09-11T10:14:11.226060982Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-far-worker", "lease name": "node-worker-0-3", "NHC is lease owner": true, "lease expiration time": "10m0s"} 2025-09-11T10:14:11.229347237Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-far-worker", "error": null, "requeue": false, "requeuAfter": "10m0s"} 2025-09-11T10:14:15.194837797Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","controller":false}} 2025-09-11T10:14:15.194895274Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-far-worker", "Remediation CR Name": "worker-0-3-vks87", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T10:14:15.194912336Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","controller":false}} 2025-09-11T10:14:15.194916794Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-far-worker", "Remediation CR Name": "worker-0-3-vks87", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T10:14:15.194949322Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-far-worker"} 2025-09-11T10:14:15.200498351Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-1", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T10:14:15.200542729Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-2", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T10:14:15.200548741Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-3", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T10:14:15.20599423Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-far-worker", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-09-11T10:14:11Z"} 2025-09-11T10:14:15.206051186Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-far-worker", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-09-11T10:14:03Z"} 2025-09-11T10:14:15.206065245Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-far-worker", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-09-11T10:14:15Z"} 2025-09-11T10:14:15.206073032Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-0"} time="2025-09-11T10:14:15Z" level=info msg="invalidating lease" time="2025-09-11T10:14:15Z" level=info msg="getting lease" 2025-09-11T10:14:15.209313443Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-1"} 2025-09-11T10:14:15.214005541Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-1-6lcbd", "kind": "FenceAgentsRemediation", "namespace": "openshift-workload-availability"} time="2025-09-11T10:14:15Z" level=info msg="getting lease" 2025-09-11T10:14:15.214107467Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-far-worker", "lease name": "node-worker-0-1", "NHC is lease owner": true, "lease expiration time": "10m0s"} 2025-09-11T10:14:15.216344792Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-2"} 2025-09-11T10:14:15.22023137Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-2-vtfhp", "kind": "FenceAgentsRemediation", "namespace": "openshift-workload-availability"} time="2025-09-11T10:14:15Z" level=info msg="getting lease" 2025-09-11T10:14:15.220300309Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-far-worker", "lease name": "node-worker-0-2", "NHC is lease owner": true, "lease expiration time": "10m0s"} 2025-09-11T10:14:15.222774443Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-3"} 2025-09-11T10:14:15.227087124Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-3-vks87", "kind": "FenceAgentsRemediation", "namespace": "openshift-workload-availability"} time="2025-09-11T10:14:15Z" level=info msg="getting lease" 2025-09-11T10:14:15.22724385Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-far-worker", "lease name": "node-worker-0-3", "NHC is lease owner": true, "lease expiration time": "10m0s"} 2025-09-11T10:14:15.229686886Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-far-worker", "error": null, "requeue": false, "requeuAfter": "10m0s"} 2025-09-11T10:14:19.144565604Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","controller":false}} 2025-09-11T10:14:19.144654139Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-far-worker", "Remediation CR Name": "worker-0-2-vtfhp", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T10:14:19.144927528Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","controller":false}} 2025-09-11T10:14:19.144951308Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-far-worker", "Remediation CR Name": "worker-0-2-vtfhp", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T10:14:19.14499406Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-far-worker"} 2025-09-11T10:14:19.150037297Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-1", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T10:14:19.150123914Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-2", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T10:14:19.150159832Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-3", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T10:14:19.15552527Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-far-worker", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-09-11T10:14:11Z"} 2025-09-11T10:14:19.155596535Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-far-worker", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-09-11T10:14:19Z"} 2025-09-11T10:14:19.155616109Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-far-worker", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-09-11T10:14:15Z"} 2025-09-11T10:14:19.155631681Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-0"} time="2025-09-11T10:14:19Z" level=info msg="invalidating lease" time="2025-09-11T10:14:19Z" level=info msg="getting lease" 2025-09-11T10:14:19.159197243Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-1"} 2025-09-11T10:14:19.16434913Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-1-6lcbd", "kind": "FenceAgentsRemediation", "namespace": "openshift-workload-availability"} time="2025-09-11T10:14:19Z" level=info msg="getting lease" 2025-09-11T10:14:19.164475611Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-far-worker", "lease name": "node-worker-0-1", "NHC is lease owner": true, "lease expiration time": "10m0s"} 2025-09-11T10:14:19.256917488Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-2"} 2025-09-11T10:14:19.261960312Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-2-vtfhp", "kind": "FenceAgentsRemediation", "namespace": "openshift-workload-availability"} time="2025-09-11T10:14:19Z" level=info msg="getting lease" 2025-09-11T10:14:19.262113215Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-far-worker", "lease name": "node-worker-0-2", "NHC is lease owner": true, "lease expiration time": "10m0s"} 2025-09-11T10:14:19.264669753Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-3"} 2025-09-11T10:14:19.269628561Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-3-vks87", "kind": "FenceAgentsRemediation", "namespace": "openshift-workload-availability"} time="2025-09-11T10:14:19Z" level=info msg="getting lease" 2025-09-11T10:14:19.269689877Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-far-worker", "lease name": "node-worker-0-3", "NHC is lease owner": true, "lease expiration time": "10m0s"} 2025-09-11T10:14:19.272249722Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-far-worker", "error": null, "requeue": false, "requeuAfter": "10m0s"} 2025-09-11T10:15:21.467551211Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0-3", "NHC": "nhc-far-worker"} 2025-09-11T10:15:21.467643156Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0-3", "NHC": "nhc-far-worker"} 2025-09-11T10:15:21.467717907Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-far-worker"} 2025-09-11T10:15:21.474841016Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-1", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T10:15:21.474875055Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-2", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T10:15:21.474882208Z INFO controllers.NodeHealthCheck Node is going to match unhealthy condition {"node": "worker-0-3", "condition type": "Ready", "condition status": "False", "duration left": "29.525118625s"} 2025-09-11T10:15:21.48137704Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-far-worker", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-09-11T10:14:11Z"} 2025-09-11T10:15:21.481418441Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-far-worker", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-09-11T10:14:19Z"} 2025-09-11T10:15:21.48142975Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-far-worker", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-09-11T10:14:15Z"} 2025-09-11T10:15:21.481434849Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-0"} time="2025-09-11T10:15:21Z" level=info msg="invalidating lease" time="2025-09-11T10:15:21Z" level=info msg="getting lease" 2025-09-11T10:15:21.484410076Z INFO controllers.NodeHealthCheck Ignoring node, because it was unhealthy, and is likely to be unhealthy again. {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-3"} 2025-09-11T10:15:21.484425587Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-1"} 2025-09-11T10:15:21.582160454Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-1-6lcbd", "kind": "FenceAgentsRemediation", "namespace": "openshift-workload-availability"} time="2025-09-11T10:15:21Z" level=info msg="getting lease" 2025-09-11T10:15:21.582344705Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-far-worker", "lease name": "node-worker-0-1", "NHC is lease owner": true, "lease expiration time": "10m0s"} time="2025-09-11T10:15:21Z" level=info msg="request lease" time="2025-09-11T10:15:21Z" level=info msg="getting lease" time="2025-09-11T10:15:21Z" level=info msg="renew lease owned by NodeHealthCheck-nhc-far-worker setAcquireTime=false" 2025-09-11T10:15:21.681918965Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-2"} 2025-09-11T10:15:21.703987706Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0-2", "NHC": "nhc-far-worker"} 2025-09-11T10:15:21.70409627Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0-2", "NHC": "nhc-far-worker"} 2025-09-11T10:15:21.782997615Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-2-vtfhp", "kind": "FenceAgentsRemediation", "namespace": "openshift-workload-availability"} time="2025-09-11T10:15:21Z" level=info msg="getting lease" 2025-09-11T10:15:21.783112285Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-far-worker", "lease name": "node-worker-0-2", "NHC is lease owner": true, "lease expiration time": "10m0s"} time="2025-09-11T10:15:21Z" level=info msg="request lease" time="2025-09-11T10:15:21Z" level=info msg="getting lease" time="2025-09-11T10:15:21Z" level=info msg="renew lease owned by NodeHealthCheck-nhc-far-worker setAcquireTime=false" 2025-09-11T10:15:21.883641932Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-far-worker", "error": null, "requeue": false, "requeuAfter": "30.525118625s"} 2025-09-11T10:15:21.883744347Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-far-worker"} 2025-09-11T10:15:21.889029767Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-1", "condition type": "Ready", "condition status": "Unknown"} 2025-09-11T10:15:21.889082448Z INFO controllers.NodeHealthCheck Node is going to match unhealthy condition {"node": "worker-0-2", "condition type": "Ready", "condition status": "False", "duration left": "29.110918654s"} 2025-09-11T10:15:21.889093174Z INFO controllers.NodeHealthCheck Node is going to match unhealthy condition {"node": "worker-0-3", "condition type": "Ready", "condition status": "False", "duration left": "29.110907226s"} 2025-09-11T10:15:21.894171423Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-far-worker", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-09-11T10:14:11Z"} 2025-09-11T10:15:21.894213552Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-far-worker", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-09-11T10:14:19Z"} 2025-09-11T10:15:21.894227143Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-far-worker", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-09-11T10:14:15Z"} 2025-09-11T10:15:21.89423409Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-0"} time="2025-09-11T10:15:21Z" level=info msg="invalidating lease" time="2025-09-11T10:15:21Z" level=info msg="getting lease" 2025-09-11T10:15:21.897176296Z INFO controllers.NodeHealthCheck Ignoring node, because it was unhealthy, and is likely to be unhealthy again. {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-2"} 2025-09-11T10:15:21.897189087Z INFO controllers.NodeHealthCheck Ignoring node, because it was unhealthy, and is likely to be unhealthy again. {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-3"} 2025-09-11T10:15:21.897193815Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-1"} 2025-09-11T10:15:21.901901792Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-1-6lcbd", "kind": "FenceAgentsRemediation", "namespace": "openshift-workload-availability"} time="2025-09-11T10:15:21Z" level=info msg="getting lease" 2025-09-11T10:15:21.901977707Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-far-worker", "lease name": "node-worker-0-1", "NHC is lease owner": true, "lease expiration time": "10m0s"} 2025-09-11T10:15:21.905416737Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-far-worker", "error": null, "requeue": false, "requeuAfter": "30.110907226s"} 2025-09-11T10:15:21.971178628Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0-1", "NHC": "nhc-far-worker"} 2025-09-11T10:15:21.971272717Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0-1", "NHC": "nhc-far-worker"} 2025-09-11T10:15:21.971332952Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-far-worker"} 2025-09-11T10:15:21.977002846Z INFO controllers.NodeHealthCheck Node is going to match unhealthy condition {"node": "worker-0-1", "condition type": "Ready", "condition status": "False", "duration left": "29.023000373s"} 2025-09-11T10:15:21.977077445Z INFO controllers.NodeHealthCheck Node is going to match unhealthy condition {"node": "worker-0-2", "condition type": "Ready", "condition status": "False", "duration left": "29.022923509s"} 2025-09-11T10:15:21.977086967Z INFO controllers.NodeHealthCheck Node is going to match unhealthy condition {"node": "worker-0-3", "condition type": "Ready", "condition status": "False", "duration left": "29.022913854s"} 2025-09-11T10:15:21.996174045Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-far-worker", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-09-11T10:14:11Z"} 2025-09-11T10:15:21.996220644Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-far-worker", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-09-11T10:14:19Z"} 2025-09-11T10:15:21.996229302Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-far-worker", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-09-11T10:14:15Z"} 2025-09-11T10:15:21.996234789Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-0"} time="2025-09-11T10:15:21Z" level=info msg="invalidating lease" time="2025-09-11T10:15:21Z" level=info msg="getting lease" 2025-09-11T10:15:21.99945284Z INFO controllers.NodeHealthCheck Ignoring node, because it was unhealthy, and is likely to be unhealthy again. {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-1"} 2025-09-11T10:15:21.999468121Z INFO controllers.NodeHealthCheck Ignoring node, because it was unhealthy, and is likely to be unhealthy again. {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-2"} 2025-09-11T10:15:21.999470774Z INFO controllers.NodeHealthCheck Ignoring node, because it was unhealthy, and is likely to be unhealthy again. {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-3"} 2025-09-11T10:15:21.999673669Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-far-worker", "error": null, "requeue": false, "requeuAfter": "30.022913854s"} 2025-09-11T10:15:22.230619532Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0-2", "NHC": "nhc-far-worker"} 2025-09-11T10:15:22.230679385Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0-2", "NHC": "nhc-far-worker"} 2025-09-11T10:15:22.230727129Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-far-worker"} 2025-09-11T10:15:22.242273482Z INFO controllers.NodeHealthCheck Node is going to match unhealthy condition {"node": "worker-0-1", "condition type": "Ready", "condition status": "False", "duration left": "28.757728806s"} 2025-09-11T10:15:22.242320074Z INFO controllers.NodeHealthCheck Node is going to match unhealthy condition {"node": "worker-0-3", "condition type": "Ready", "condition status": "False", "duration left": "28.757680624s"} 2025-09-11T10:15:22.248761115Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-far-worker", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-09-11T10:14:11Z"} 2025-09-11T10:15:22.248804967Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-far-worker", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-09-11T10:14:19Z"} 2025-09-11T10:15:22.248814619Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-far-worker", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-09-11T10:14:15Z"} 2025-09-11T10:15:22.248820298Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-0"} time="2025-09-11T10:15:22Z" level=info msg="invalidating lease" time="2025-09-11T10:15:22Z" level=info msg="getting lease" 2025-09-11T10:15:22.348536907Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-2"} 2025-09-11T10:15:22.457967363Z INFO controllers.NodeHealthCheck.resource manager deleted remediation CR {"name": "worker-0-2-vtfhp"} 2025-09-11T10:15:22.457991606Z INFO controllers.NodeHealthCheck Ignoring node, because it was unhealthy, and is likely to be unhealthy again. {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-1"} 2025-09-11T10:15:22.457995129Z INFO controllers.NodeHealthCheck Ignoring node, because it was unhealthy, and is likely to be unhealthy again. {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-3"} 2025-09-11T10:15:22.458062143Z DEBUG events [remediation] Deleted remediation CR of kind FenceAgentsRemediation with name worker-0-2-vtfhp {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"245243"}, "reason": "RemediationRemoved"} 2025-09-11T10:15:22.458261872Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-far-worker", "new status": {"observedNodes":4,"healthyNodes":1,"unhealthyNodes":[{"name":"worker-0-2","remediations":[{"resource":{"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-2-vtfhp","uid":"b28ddf12-0382-439d-929c-4732aac8593a","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1"},"started":"2025-09-11T10:14:02Z","templateName":"fenceagentsremediationtemplate-test"}],"conditionsHealthyTimestamp":"2025-09-11T10:15:22Z"},{"name":"worker-0-3","remediations":[{"resource":{"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-3-vks87","uid":"878bdfc2-52cf-4803-8b2a-460c61ac6c85","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1"},"started":"2025-09-11T10:14:03Z","templateName":"fenceagentsremediationtemplate-test"}]},{"name":"worker-0-1","remediations":[{"resource":{"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-1-6lcbd","uid":"128cdad0-679f-424b-b4c6-8da3d65e6ef4","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1"},"started":"2025-09-11T10:14:02Z","templateName":"fenceagentsremediationtemplate-test"}]}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-09-11T09:58:15Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Remediating","reason":"NHC is remediating 3 nodes","lastUpdateTime":"2025-09-11T10:14:03Z"}, "patch": "{\"status\":{\"unhealthyNodes\":[{\"conditionsHealthyTimestamp\":\"2025-09-11T10:15:22Z\",\"name\":\"worker-0-2\",\"remediations\":[{\"resource\":{\"apiVersion\":\"fence-agents-remediation.medik8s.io/v1alpha1\",\"kind\":\"FenceAgentsRemediation\",\"name\":\"worker-0-2-vtfhp\",\"namespace\":\"openshift-workload-availability\",\"uid\":\"b28ddf12-0382-439d-929c-4732aac8593a\"},\"started\":\"2025-09-11T10:14:02Z\",\"templateName\":\"fenceagentsremediationtemplate-test\"}]},{\"name\":\"worker-0-3\",\"remediations\":[{\"resource\":{\"apiVersion\":\"fence-agents-remediation.medik8s.io/v1alpha1\",\"kind\":\"FenceAgentsRemediation\",\"name\":\"worker-0-3-vks87\",\"namespace\":\"openshift-workload-availability\",\"uid\":\"878bdfc2-52cf-4803-8b2a-460c61ac6c85\"},\"started\":\"2025-09-11T10:14:03Z\",\"templateName\":\"fenceagentsremediationtemplate-test\"}]},{\"name\":\"worker-0-1\",\"remediations\":[{\"resource\":{\"apiVersion\":\"fence-agents-remediation.medik8s.io/v1alpha1\",\"kind\":\"FenceAgentsRemediation\",\"name\":\"worker-0-1-6lcbd\",\"namespace\":\"openshift-workload-availability\",\"uid\":\"128cdad0-679f-424b-b4c6-8da3d65e6ef4\"},\"started\":\"2025-09-11T10:14:02Z\",\"templateName\":\"fenceagentsremediationtemplate-test\"}]}]}}"} 2025-09-11T10:15:22.45855428Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","controller":false}} 2025-09-11T10:15:22.458620688Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-far-worker", "Remediation CR Name": "worker-0-2-vtfhp", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T10:15:22.458634807Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","controller":false}} 2025-09-11T10:15:22.458655924Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-far-worker", "Remediation CR Name": "worker-0-2-vtfhp", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T10:15:22.481102577Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","controller":false}} 2025-09-11T10:15:22.481155024Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-far-worker", "Remediation CR Name": "worker-0-2-vtfhp", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T10:15:22.665086259Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-far-worker", "error": null, "requeue": false, "requeuAfter": "11s"} 2025-09-11T10:15:22.665188127Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-far-worker"} 2025-09-11T10:15:22.669968458Z INFO controllers.NodeHealthCheck Node is going to match unhealthy condition {"node": "worker-0-1", "condition type": "Ready", "condition status": "False", "duration left": "28.330032719s"} 2025-09-11T10:15:22.670001129Z INFO controllers.NodeHealthCheck Node is going to match unhealthy condition {"node": "worker-0-3", "condition type": "Ready", "condition status": "False", "duration left": "28.329999339s"} 2025-09-11T10:15:22.676533318Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-far-worker", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-09-11T10:14:11Z"} 2025-09-11T10:15:22.676566718Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-far-worker", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-09-11T10:14:15Z"} 2025-09-11T10:15:22.676573695Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-0"} time="2025-09-11T10:15:22Z" level=info msg="invalidating lease" time="2025-09-11T10:15:22Z" level=info msg="getting lease" 2025-09-11T10:15:22.679847664Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-2"} time="2025-09-11T10:15:22Z" level=info msg="invalidating lease" time="2025-09-11T10:15:22Z" level=info msg="getting lease" 2025-09-11T10:15:22.686279312Z INFO controllers.NodeHealthCheck Ignoring node, because it was unhealthy, and is likely to be unhealthy again. {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-1"} 2025-09-11T10:15:22.686300635Z INFO controllers.NodeHealthCheck Ignoring node, because it was unhealthy, and is likely to be unhealthy again. {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-3"} 2025-09-11T10:15:22.68650959Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-far-worker", "new status": {"observedNodes":4,"healthyNodes":2,"unhealthyNodes":[{"name":"worker-0-3","remediations":[{"resource":{"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-3-vks87","uid":"878bdfc2-52cf-4803-8b2a-460c61ac6c85","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1"},"started":"2025-09-11T10:14:03Z","templateName":"fenceagentsremediationtemplate-test"}]},{"name":"worker-0-1","remediations":[{"resource":{"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-1-6lcbd","uid":"128cdad0-679f-424b-b4c6-8da3d65e6ef4","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1"},"started":"2025-09-11T10:14:02Z","templateName":"fenceagentsremediationtemplate-test"}]}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-09-11T09:58:15Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Remediating","reason":"NHC is remediating 2 nodes","lastUpdateTime":"2025-09-11T10:15:22Z"}, "patch": "{\"status\":{\"healthyNodes\":2,\"reason\":\"NHC is remediating 2 nodes\",\"unhealthyNodes\":[{\"name\":\"worker-0-3\",\"remediations\":[{\"resource\":{\"apiVersion\":\"fence-agents-remediation.medik8s.io/v1alpha1\",\"kind\":\"FenceAgentsRemediation\",\"name\":\"worker-0-3-vks87\",\"namespace\":\"openshift-workload-availability\",\"uid\":\"878bdfc2-52cf-4803-8b2a-460c61ac6c85\"},\"started\":\"2025-09-11T10:14:03Z\",\"templateName\":\"fenceagentsremediationtemplate-test\"}]},{\"name\":\"worker-0-1\",\"remediations\":[{\"resource\":{\"apiVersion\":\"fence-agents-remediation.medik8s.io/v1alpha1\",\"kind\":\"FenceAgentsRemediation\",\"name\":\"worker-0-1-6lcbd\",\"namespace\":\"openshift-workload-availability\",\"uid\":\"128cdad0-679f-424b-b4c6-8da3d65e6ef4\"},\"started\":\"2025-09-11T10:14:02Z\",\"templateName\":\"fenceagentsremediationtemplate-test\"}]}]}}"} 2025-09-11T10:15:22.89271497Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-far-worker", "error": null, "requeue": false, "requeuAfter": "29.329999339s"} 2025-09-11T10:15:24.095387314Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0-3", "NHC": "nhc-far-worker"} 2025-09-11T10:15:24.095453875Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0-3", "NHC": "nhc-far-worker"} 2025-09-11T10:15:24.095501965Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-far-worker"} 2025-09-11T10:15:24.104445498Z INFO controllers.NodeHealthCheck Node is going to match unhealthy condition {"node": "worker-0-1", "condition type": "Ready", "condition status": "False", "duration left": "26.895556257s"} 2025-09-11T10:15:24.11052098Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-far-worker", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-09-11T10:14:11Z"} 2025-09-11T10:15:24.110555047Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-far-worker", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-09-11T10:14:15Z"} 2025-09-11T10:15:24.110561355Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-0"} time="2025-09-11T10:15:24Z" level=info msg="invalidating lease" time="2025-09-11T10:15:24Z" level=info msg="getting lease" 2025-09-11T10:15:24.2112088Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-2"} time="2025-09-11T10:15:24Z" level=info msg="invalidating lease" time="2025-09-11T10:15:24Z" level=info msg="getting lease" 2025-09-11T10:15:24.213675793Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-3"} 2025-09-11T10:15:24.223800501Z INFO controllers.NodeHealthCheck.resource manager deleted remediation CR {"name": "worker-0-3-vks87"} 2025-09-11T10:15:24.223831405Z INFO controllers.NodeHealthCheck Ignoring node, because it was unhealthy, and is likely to be unhealthy again. {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-1"} 2025-09-11T10:15:24.223879777Z DEBUG events [remediation] Deleted remediation CR of kind FenceAgentsRemediation with name worker-0-3-vks87 {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"246782"}, "reason": "RemediationRemoved"} 2025-09-11T10:15:24.224137701Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-far-worker", "new status": {"observedNodes":4,"healthyNodes":2,"unhealthyNodes":[{"name":"worker-0-3","remediations":[{"resource":{"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-3-vks87","uid":"878bdfc2-52cf-4803-8b2a-460c61ac6c85","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1"},"started":"2025-09-11T10:14:03Z","templateName":"fenceagentsremediationtemplate-test"}],"conditionsHealthyTimestamp":"2025-09-11T10:15:24Z"},{"name":"worker-0-1","remediations":[{"resource":{"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-1-6lcbd","uid":"128cdad0-679f-424b-b4c6-8da3d65e6ef4","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1"},"started":"2025-09-11T10:14:02Z","templateName":"fenceagentsremediationtemplate-test"}]}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-09-11T09:58:15Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Remediating","reason":"NHC is remediating 2 nodes","lastUpdateTime":"2025-09-11T10:15:22Z"}, "patch": "{\"status\":{\"unhealthyNodes\":[{\"conditionsHealthyTimestamp\":\"2025-09-11T10:15:24Z\",\"name\":\"worker-0-3\",\"remediations\":[{\"resource\":{\"apiVersion\":\"fence-agents-remediation.medik8s.io/v1alpha1\",\"kind\":\"FenceAgentsRemediation\",\"name\":\"worker-0-3-vks87\",\"namespace\":\"openshift-workload-availability\",\"uid\":\"878bdfc2-52cf-4803-8b2a-460c61ac6c85\"},\"started\":\"2025-09-11T10:14:03Z\",\"templateName\":\"fenceagentsremediationtemplate-test\"}]},{\"name\":\"worker-0-1\",\"remediations\":[{\"resource\":{\"apiVersion\":\"fence-agents-remediation.medik8s.io/v1alpha1\",\"kind\":\"FenceAgentsRemediation\",\"name\":\"worker-0-1-6lcbd\",\"namespace\":\"openshift-workload-availability\",\"uid\":\"128cdad0-679f-424b-b4c6-8da3d65e6ef4\"},\"started\":\"2025-09-11T10:14:02Z\",\"templateName\":\"fenceagentsremediationtemplate-test\"}]}]}}"} 2025-09-11T10:15:24.224165492Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","controller":false}} 2025-09-11T10:15:24.224186723Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-far-worker", "Remediation CR Name": "worker-0-3-vks87", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T10:15:24.224196775Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","controller":false}} 2025-09-11T10:15:24.22420675Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-far-worker", "Remediation CR Name": "worker-0-3-vks87", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T10:15:24.242291327Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","controller":false}} 2025-09-11T10:15:24.242352305Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-far-worker", "Remediation CR Name": "worker-0-3-vks87", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T10:15:24.432138987Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-far-worker", "error": null, "requeue": false, "requeuAfter": "11s"} 2025-09-11T10:15:24.432240616Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-far-worker"} 2025-09-11T10:15:24.43642255Z INFO controllers.NodeHealthCheck Node is going to match unhealthy condition {"node": "worker-0-1", "condition type": "Ready", "condition status": "False", "duration left": "26.563579988s"} 2025-09-11T10:15:24.440759295Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-far-worker", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-09-11T10:14:11Z"} 2025-09-11T10:15:24.440782469Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-3"} time="2025-09-11T10:15:24Z" level=info msg="invalidating lease" time="2025-09-11T10:15:24Z" level=info msg="getting lease" 2025-09-11T10:15:24.445907519Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-0"} time="2025-09-11T10:15:24Z" level=info msg="invalidating lease" time="2025-09-11T10:15:24Z" level=info msg="getting lease" 2025-09-11T10:15:24.541883241Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-2"} time="2025-09-11T10:15:24Z" level=info msg="invalidating lease" time="2025-09-11T10:15:24Z" level=info msg="getting lease" 2025-09-11T10:15:24.544253673Z INFO controllers.NodeHealthCheck Ignoring node, because it was unhealthy, and is likely to be unhealthy again. {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-1"} 2025-09-11T10:15:24.544436875Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-far-worker", "new status": {"observedNodes":4,"healthyNodes":3,"unhealthyNodes":[{"name":"worker-0-1","remediations":[{"resource":{"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-1-6lcbd","uid":"128cdad0-679f-424b-b4c6-8da3d65e6ef4","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1"},"started":"2025-09-11T10:14:02Z","templateName":"fenceagentsremediationtemplate-test"}]}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-09-11T09:58:15Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Remediating","reason":"NHC is remediating 1 nodes","lastUpdateTime":"2025-09-11T10:15:24Z"}, "patch": "{\"status\":{\"healthyNodes\":3,\"reason\":\"NHC is remediating 1 nodes\",\"unhealthyNodes\":[{\"name\":\"worker-0-1\",\"remediations\":[{\"resource\":{\"apiVersion\":\"fence-agents-remediation.medik8s.io/v1alpha1\",\"kind\":\"FenceAgentsRemediation\",\"name\":\"worker-0-1-6lcbd\",\"namespace\":\"openshift-workload-availability\",\"uid\":\"128cdad0-679f-424b-b4c6-8da3d65e6ef4\"},\"started\":\"2025-09-11T10:14:02Z\",\"templateName\":\"fenceagentsremediationtemplate-test\"}]}]}}"} 2025-09-11T10:15:24.749724247Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-far-worker", "error": null, "requeue": false, "requeuAfter": "27.563579988s"} 2025-09-11T10:15:25.002016679Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0-1", "NHC": "nhc-far-worker"} 2025-09-11T10:15:25.002114075Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0-1", "NHC": "nhc-far-worker"} 2025-09-11T10:15:25.002189684Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-far-worker"} 2025-09-11T10:15:25.019920708Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-far-worker", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2025-09-11T10:14:11Z"} 2025-09-11T10:15:25.019971699Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-0"} time="2025-09-11T10:15:25Z" level=info msg="invalidating lease" time="2025-09-11T10:15:25Z" level=info msg="getting lease" 2025-09-11T10:15:25.022796512Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-1"} 2025-09-11T10:15:25.034937233Z INFO controllers.NodeHealthCheck.resource manager deleted remediation CR {"name": "worker-0-1-6lcbd"} 2025-09-11T10:15:25.034960064Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-2"} 2025-09-11T10:15:25.035030992Z DEBUG events [remediation] Deleted remediation CR of kind FenceAgentsRemediation with name worker-0-1-6lcbd {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"246946"}, "reason": "RemediationRemoved"} 2025-09-11T10:15:25.035606427Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","controller":false}} 2025-09-11T10:15:25.03565992Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-far-worker", "Remediation CR Name": "worker-0-1-6lcbd", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T10:15:25.035673289Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","controller":false}} 2025-09-11T10:15:25.035677529Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-far-worker", "Remediation CR Name": "worker-0-1-6lcbd", "Remediation CR Kind": "FenceAgentsRemediation"} time="2025-09-11T10:15:25Z" level=info msg="invalidating lease" time="2025-09-11T10:15:25Z" level=info msg="getting lease" 2025-09-11T10:15:25.03719875Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-3"} time="2025-09-11T10:15:25Z" level=info msg="invalidating lease" time="2025-09-11T10:15:25Z" level=info msg="getting lease" 2025-09-11T10:15:25.055568407Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-far-worker","uid":"8fd17847-eb17-4e87-aba6-8770ba64a27b","controller":false}} 2025-09-11T10:15:25.055612535Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-far-worker", "Remediation CR Name": "worker-0-1-6lcbd", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-09-11T10:15:25.055672373Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-far-worker", "new status": {"observedNodes":4,"healthyNodes":3,"unhealthyNodes":[{"name":"worker-0-1","remediations":[{"resource":{"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-1-6lcbd","uid":"128cdad0-679f-424b-b4c6-8da3d65e6ef4","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1"},"started":"2025-09-11T10:14:02Z","templateName":"fenceagentsremediationtemplate-test"}],"conditionsHealthyTimestamp":"2025-09-11T10:15:25Z"}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-09-11T09:58:15Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Remediating","reason":"NHC is remediating 1 nodes","lastUpdateTime":"2025-09-11T10:15:24Z"}, "patch": "{\"status\":{\"unhealthyNodes\":[{\"conditionsHealthyTimestamp\":\"2025-09-11T10:15:25Z\",\"name\":\"worker-0-1\",\"remediations\":[{\"resource\":{\"apiVersion\":\"fence-agents-remediation.medik8s.io/v1alpha1\",\"kind\":\"FenceAgentsRemediation\",\"name\":\"worker-0-1-6lcbd\",\"namespace\":\"openshift-workload-availability\",\"uid\":\"128cdad0-679f-424b-b4c6-8da3d65e6ef4\"},\"started\":\"2025-09-11T10:14:02Z\",\"templateName\":\"fenceagentsremediationtemplate-test\"}]}]}}"} 2025-09-11T10:15:25.262123683Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-far-worker", "error": null, "requeue": false, "requeuAfter": "11s"} 2025-09-11T10:15:25.262254372Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-far-worker"} 2025-09-11T10:15:25.273680056Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-0"} time="2025-09-11T10:15:25Z" level=info msg="invalidating lease" time="2025-09-11T10:15:25Z" level=info msg="getting lease" 2025-09-11T10:15:25.276111131Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-1"} time="2025-09-11T10:15:25Z" level=info msg="invalidating lease" time="2025-09-11T10:15:25Z" level=info msg="getting lease" 2025-09-11T10:15:25.379254871Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-2"} time="2025-09-11T10:15:25Z" level=info msg="invalidating lease" time="2025-09-11T10:15:25Z" level=info msg="getting lease" 2025-09-11T10:15:25.474520333Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-3"} time="2025-09-11T10:15:25Z" level=info msg="invalidating lease" time="2025-09-11T10:15:25Z" level=info msg="getting lease" 2025-09-11T10:15:25.575653814Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-far-worker", "new status": {"observedNodes":4,"healthyNodes":4,"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-09-11T09:58:15Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Enabled","reason":"NHC is enabled, no ongoing remediation","lastUpdateTime":"2025-09-11T10:15:25Z"}, "patch": "{\"status\":{\"healthyNodes\":4,\"phase\":\"Enabled\",\"reason\":\"NHC is enabled, no ongoing remediation\",\"unhealthyNodes\":null}}"} 2025-09-11T10:15:25.781956144Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-far-worker", "error": null, "requeue": false, "requeuAfter": "0s"} 2025-09-11T10:15:33.665742592Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-far-worker"} 2025-09-11T10:15:33.674462426Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-0"} time="2025-09-11T10:15:33Z" level=info msg="invalidating lease" time="2025-09-11T10:15:33Z" level=info msg="getting lease" 2025-09-11T10:15:33.675981593Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-1"} time="2025-09-11T10:15:33Z" level=info msg="invalidating lease" time="2025-09-11T10:15:33Z" level=info msg="getting lease" 2025-09-11T10:15:33.677608211Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-2"} time="2025-09-11T10:15:33Z" level=info msg="invalidating lease" time="2025-09-11T10:15:33Z" level=info msg="getting lease" 2025-09-11T10:15:33.679113083Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-far-worker", "node": "worker-0-3"} time="2025-09-11T10:15:33Z" level=info msg="invalidating lease" time="2025-09-11T10:15:33Z" level=info msg="getting lease" 2025-09-11T10:15:33.680650518Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-far-worker", "error": null, "requeue": false, "requeuAfter": "0s"}