[kni@cert-rhosp-02 ~]$ oc get clusterversion NAME VERSION AVAILABLE PROGRESSING SINCE STATUS version 4.16.0-0.nightly-2025-09-06-014223 True False 37h Error while reconciling 4.16.0-0.nightly-2025-09-06-014223: the cluster operator machine-config is degraded [kni@cert-rhosp-02 ~]$ oc get csv No resources found in openshift-workload-availability namespace. [kni@cert-rhosp-02 ~]$ oc get pods No resources found in openshift-workload-availability namespace. [kni@cert-rhosp-02 ~]$ oc get nodes -l 'node-role.kubernetes.io/worker' NAME STATUS ROLES AGE VERSION worker-0-0 Ready worker 37h v1.29.14+c68a663 worker-0-1 NotReady worker 37h v1.29.14+c68a663 worker-0-2 NotReady worker 37h v1.29.14+c68a663 [kni@cert-rhosp-02 ~]$ oc get csv NAME DISPLAY VERSION REPLACES PHASE fence-agents-remediation.v0.6.0 Fence Agents Remediation Operator 0.6.0 fence-agents-remediation.v0.5.1 Succeeded [kni@cert-rhosp-02 ~]$ oc get pods NAME READY STATUS RESTARTS AGE fence-agents-remediation-controller-manager-5f76bb6467-8m5mv 2/2 Running 0 29s fence-agents-remediation-controller-manager-5f76bb6467-gcxjs 2/2 Running 0 29s [kni@cert-rhosp-02 ~]$ oc get nodes -l 'node-role.kubernetes.io/worker' NAME STATUS ROLES AGE VERSION worker-0-0 Ready worker 37h v1.29.14+c68a663 worker-0-1 NotReady worker 37h v1.29.14+c68a663 worker-0-2 NotReady worker 37h v1.29.14+c68a663 [kni@cert-rhosp-02 ~]$ PODS=$(oc get pods -o name | grep fence-agents-remediation-controller-manager) [kni@cert-rhosp-02 ~]$ echo $PODS pod/fence-agents-remediation-controller-manager-5f76bb6467-8m5mv pod/fence-agents-remediation-controller-manager-5f76bb6467-gcxjs [kni@cert-rhosp-02 ~]$ for p in $PODS; do > echo "== $p" > oc get "$p" -o json | jq .spec.nodeName > done == pod/fence-agents-remediation-controller-manager-5f76bb6467-8m5mv "worker-0-0" == pod/fence-agents-remediation-controller-manager-5f76bb6467-gcxjs "worker-0-0" [kni@cert-rhosp-02 ~]$ [kni@cert-rhosp-02 ~]$ oc get csv fence-agents-remediation.v0.6.0 NAME DISPLAY VERSION REPLACES PHASE fence-agents-remediation.v0.6.0 Fence Agents Remediation Operator 0.6.0 fence-agents-remediation.v0.5.1 Succeeded [kni@cert-rhosp-02 ~]$ [kni@cert-rhosp-02 ~]$ oc get csv fence-agents-remediation.v0.6.0 -o yaml apiVersion: operators.coreos.com/v1alpha1 kind: ClusterServiceVersion metadata: annotations: alm-examples: |- [ { "apiVersion": "fence-agents-remediation.medik8s.io/v1alpha1", "kind": "FenceAgentsRemediation", "metadata": { "name": "worker-1" }, "spec": { "agent": "fence_ipmilan", "nodeSecretNames": { "master-0": "fence-agents-credentials-master0", "master-1": "fence-agents-credentials-master1", "master-2": "fence-agents-credentials-master2", "worker-0": "fence-agents-credentials-worker0", "worker-1": "fence-agents-credentials-worker1", "worker-2": "fence-agents-credentials-worker2" }, "nodeparameters": { "--ipport": { "master-0": "6230", "master-1": "6231", "master-2": "6232", "worker-0": "6233", "worker-1": "6234", "worker-2": "6235" } }, "remediationStrategy": "ResourceDeletion", "retrycount": 5, "retryinterval": "5s", "sharedSecretName": "fence-agents-credentials-shared", "sharedparameters": { "--action": "reboot", "--ip": "192.168.111.1", "--lanplus": "", "--username": "admin" }, "timeout": "60s" } }, { "apiVersion": "fence-agents-remediation.medik8s.io/v1alpha1", "kind": "FenceAgentsRemediationTemplate", "metadata": { "name": "fenceagentsremediationtemplate-default" }, "spec": { "template": { "spec": { "agent": "fence_ipmilan", "nodeSecretNames": { "master-0": "fence-agents-credentials-master0", "master-1": "fence-agents-credentials-master1", "master-2": "fence-agents-credentials-master2", "worker-0": "fence-agents-credentials-worker0", "worker-1": "fence-agents-credentials-worker1", "worker-2": "fence-agents-credentials-worker2" }, "nodeparameters": { "--ipport": { "master-0": "6230", "master-1": "6231", "master-2": "6232", "worker-0": "6233", "worker-1": "6234", "worker-2": "6235" } }, "remediationStrategy": "ResourceDeletion", "retrycount": 5, "retryinterval": "5s", "sharedSecretName": "fence-agents-credentials-shared", "sharedparameters": { "--action": "reboot", "--ip": "192.168.111.1", "--lanplus": "", "--username": "admin" }, "timeout": "60s" } } } } ] capabilities: Basic Install categories: OpenShift Optional containerImage: "" createdAt: "2025-09-08 09:19:00" description: Fence Agents Remediation Operator uses well-known agents to fence and remediate unhealthy nodes. The remediation includes rebooting the unhealthy node using a fence agent, and then evicting workloads from the unhealthy node. features.operators.openshift.io/cnf: "false" features.operators.openshift.io/cni: "false" features.operators.openshift.io/csi: "false" features.operators.openshift.io/disconnected: "true" features.operators.openshift.io/fips-compliant: "true" features.operators.openshift.io/proxy-aware: "false" features.operators.openshift.io/tls-profiles: "false" features.operators.openshift.io/token-auth-aws: "false" features.operators.openshift.io/token-auth-azure: "false" features.operators.openshift.io/token-auth-gcp: "false" olm.operatorGroup: openshift-workload-availability-4p9hd olm.operatorNamespace: openshift-workload-availability olm.skipRange: '>=0.5.0 <0.6.0' olm.targetNamespaces: "" operatorframework.io/properties: '{"properties":[{"type":"olm.gvk","value":{"group":"fence-agents-remediation.medik8s.io","kind":"FenceAgentsRemediation","version":"v1alpha1"}},{"type":"olm.gvk","value":{"group":"fence-agents-remediation.medik8s.io","kind":"FenceAgentsRemediationTemplate","version":"v1alpha1"}},{"type":"olm.package","value":{"packageName":"fence-agents-remediation","version":"0.6.0"}}]}' operatorframework.io/suggested-namespace: openshift-workload-availability operatorframework.io/suggested-namespace-template: '{"kind":"Namespace","apiVersion":"v1","metadata":{"name":"openshift-workload-availability","annotations":{"openshift.io/node-selector":""}}}' operators.openshift.io/valid-subscription: '["OpenShift Kubernetes Engine", "OpenShift Container Platform", "OpenShift Platform Plus"]' operators.operatorframework.io/builder: operator-sdk-v1.32.0 operators.operatorframework.io/project_layout: go.kubebuilder.io/v3 repository: https://github.com/medik8s/fence-agents-remediation support: Red Hat creationTimestamp: "2025-09-09T10:16:01Z" finalizers: - operators.coreos.com/csv-cleanup generation: 1 labels: olm.managed: "true" operators.coreos.com/fence-agents-remediation.openshift-workload-availability: "" name: fence-agents-remediation.v0.6.0 namespace: openshift-workload-availability resourceVersion: "931228" uid: c2edd0c8-158a-4c97-b372-035b585683fb spec: apiservicedefinitions: {} cleanup: enabled: false customresourcedefinitions: owned: - description: FenceAgentsRemediation is the Schema for the fenceagentsremediations API displayName: Fence Agents Remediation kind: FenceAgentsRemediation name: fenceagentsremediations.fence-agents-remediation.medik8s.io resources: - kind: FenceAgentsRemediation name: fenceagentsremediations version: v1alpha1 specDescriptors: - description: Agent is the name of fence agent that will be used. It should have a fence_ prefix. displayName: Agent path: agent - description: NodeSecretNames maps the node name to the Secret name which contains params relevant for that node. displayName: Node Secret Names path: nodeSecretNames - description: NodeParameters are passed to the fencing agent according to the node that is fenced, since they are node specific displayName: Node Parameters path: nodeparameters - description: RemediationStrategy is the remediation method for unhealthy nodes. Currently, it could be either "OutOfServiceTaint" or "ResourceDeletion". ResourceDeletion will iterate over all pods related to the unhealthy node and delete them. OutOfServiceTaint will add the out-of-service taint which is a new well-known taint "node.kubernetes.io/out-of-service" that enables automatic deletion of pv-attached pods on failed nodes, "out-of-service" taint is only supported on clusters with k8s version 1.26+ or OCP/OKD version 4.13+. displayName: Remediation Strategy path: remediationStrategy - description: RetryCount is the number of times the fencing agent will be executed displayName: Retry Count path: retrycount - description: RetryInterval is the interval between each fencing agent execution displayName: Retry Interval path: retryinterval - description: SharedSecretName is the name of the Secret which will contain params needed for FAR in order to remediate any node. Using this Secret is optional. displayName: Shared Secret Name path: sharedSecretName - description: SharedParameters are parameters common to all nodes displayName: Shared Parameters path: sharedparameters - description: Timeout is the timeout for each fencing agent execution displayName: Timeout path: timeout statusDescriptors: - description: 'Represents the observations of a FenceAgentsRemediation''s current state. Known .status.conditions.type are: "Processing", "FenceAgentActionSucceeded", and "Succeeded".' displayName: conditions path: conditions x-descriptors: - urn:alm:descriptor:io.kubernetes.conditions - description: LastUpdateTime is the last time the status was updated. displayName: Last Update Time path: lastUpdateTime version: v1alpha1 - description: FenceAgentsRemediationTemplate is the Schema for the fenceagentsremediationtemplates API displayName: Fence Agents Remediation Template kind: FenceAgentsRemediationTemplate name: fenceagentsremediationtemplates.fence-agents-remediation.medik8s.io resources: - kind: FenceAgentsRemediationTemplate name: fenceagentsremediationtemplates version: v1alpha1 specDescriptors: - description: Template defines the desired state of FenceAgentsRemediationTemplate displayName: Template path: template - description: Agent is the name of fence agent that will be used. It should have a fence_ prefix. displayName: Agent path: template.spec.agent - description: NodeSecretNames maps the node name to the Secret name which contains params relevant for that node. displayName: Node Secret Names path: template.spec.nodeSecretNames - description: NodeParameters are passed to the fencing agent according to the node that is fenced, since they are node specific displayName: Node Parameters path: template.spec.nodeparameters - description: RemediationStrategy is the remediation method for unhealthy nodes. Currently, it could be either "OutOfServiceTaint" or "ResourceDeletion". ResourceDeletion will iterate over all pods related to the unhealthy node and delete them. OutOfServiceTaint will add the out-of-service taint which is a new well-known taint "node.kubernetes.io/out-of-service" that enables automatic deletion of pv-attached pods on failed nodes, "out-of-service" taint is only supported on clusters with k8s version 1.26+ or OCP/OKD version 4.13+. displayName: Remediation Strategy path: template.spec.remediationStrategy - description: RetryCount is the number of times the fencing agent will be executed displayName: Retry Count path: template.spec.retrycount - description: RetryInterval is the interval between each fencing agent execution displayName: Retry Interval path: template.spec.retryinterval - description: SharedSecretName is the name of the Secret which will contain params needed for FAR in order to remediate any node. Using this Secret is optional. displayName: Shared Secret Name path: template.spec.sharedSecretName - description: SharedParameters are parameters common to all nodes displayName: Shared Parameters path: template.spec.sharedparameters - description: Timeout is the timeout for each fencing agent execution displayName: Timeout path: template.spec.timeout version: v1alpha1 description: | ### Introduction Fence Agents Remediation (FAR) is a Kubernetes operator that uses well-known agents to fence and remediate unhealthy nodes. The remediation includes rebooting the unhealthy node using a fence agent and then evicting workloads from the unhealthy node. ### Compatibility FAR is one of the remediator operators by [Medik8s](https://www.medik8s.io/remediation/remediation/), such as [Self Node Remediation](https://github.com/medik8s/self-node-remediation) and [Machine Deletion Remediation](https://github.com/medik8s/machine-deletion-remediation), that were designed to run with the Node HealthCheck Operator [(NHC)](https://github.com/medik8s/node-healthcheck-operator) which detects an unhealthy node and creates remediation CR. It is recommended to use FAR with NHC for an easier and smoother experience by fully automating the remediation process, but it can be used as a standalone remediator for the more experienced user. ### Advantages - Robustness - FAR has direct feedback from the traditional Application Programming Interface (API) call (e.g., IPMI) about the result of the fence action without using the Kubernetes API. - Speed - FAR is rapid since it can reboot a node and receive an acknowledgment from the API call while other remediators might need to wait a safe time till they can expect the node to be rebooted. - Diversity - FAR includes several fence agents from a large known set of upstream fencing agents for bare metal servers, virtual machines, cloud platforms, etc. - Adjustability - FAR allows to set up different parameters for running the API call that remediates the node. displayName: Fence Agents Remediation Operator icon: - base64data: iVBORw0KGgoAAAANSUhEUgAACicAAAgMCAMAAAAHVkIRAAAABGdBTUEAALGPC/xhBQAAAAFzUkdCAK7OHOkAAAJAUExURUdwTPf6/0Z756O89JE3crlBZPv///... .... .... .... 194wAAAAAAAAAAC8Baf84nL9t+JjAAAAAElFTkSuQmCC mediatype: image/png install: spec: clusterPermissions: - rules: - apiGroups: - "" resources: - namespaces verbs: - get - list - watch - apiGroups: - "" resources: - nodes verbs: - delete - get - list - update - watch - apiGroups: - "" resources: - pods verbs: - delete - deletecollection - get - list - update - watch - apiGroups: - "" resources: - pods/exec verbs: - create - apiGroups: - fence-agents-remediation.medik8s.io resources: - fenceagentsremediations verbs: - create - delete - get - list - patch - update - watch - apiGroups: - fence-agents-remediation.medik8s.io resources: - fenceagentsremediations/finalizers verbs: - update - apiGroups: - fence-agents-remediation.medik8s.io resources: - fenceagentsremediations/status verbs: - get - patch - update - apiGroups: - storage.k8s.io resources: - volumeattachments verbs: - delete - get - list - watch - apiGroups: - authentication.k8s.io resources: - tokenreviews verbs: - create - apiGroups: - authorization.k8s.io resources: - subjectaccessreviews verbs: - create serviceAccountName: fence-agents-remediation-controller-manager deployments: - label: app.kubernetes.io/name: fence-agents-remediation-operator control-plane: controller-manager name: fence-agents-remediation-controller-manager spec: replicas: 2 selector: matchLabels: app.kubernetes.io/name: fence-agents-remediation-operator control-plane: controller-manager strategy: {} template: metadata: annotations: kubectl.kubernetes.io/default-container: manager creationTimestamp: null labels: app.kubernetes.io/name: fence-agents-remediation-operator control-plane: controller-manager spec: affinity: podAntiAffinity: preferredDuringSchedulingIgnoredDuringExecution: - podAffinityTerm: labelSelector: matchExpressions: - key: control-plane operator: In values: - controller-manager - key: app.kubernetes.io/name operator: In values: - fence-agents-remediation-operator topologyKey: kubernetes.io/hostname weight: 100 containers: - args: - --secure-listen-address=0.0.0.0:8443 - --http2-disable - --upstream=http://127.0.0.1:8080/ - --logtostderr=true - --v=0 image: registry.redhat.io/openshift4/ose-kube-rbac-proxy-rhel9@sha256:d37a6d10b0fa07370066a31fdaffe2ea553faf4e4e98be7fcef5ec40d62ffe29 name: kube-rbac-proxy ports: - containerPort: 8443 name: https protocol: TCP resources: limits: cpu: 500m memory: 128Mi requests: cpu: 5m memory: 64Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL - args: - --health-probe-bind-address=:8081 - --metrics-bind-address=127.0.0.1:8080 - --leader-elect command: - /manager env: - name: DEPLOYMENT_NAMESPACE valueFrom: fieldRef: fieldPath: metadata.namespace image: registry.redhat.io/workload-availability/fence-agents-remediation-rhel9-operator@sha256:eef1a298718650a22a3da4a07c140ce21b6f287d87af79801610317fa8f63d52 livenessProbe: httpGet: path: /healthz port: 8081 initialDelaySeconds: 15 periodSeconds: 20 name: manager readinessProbe: httpGet: path: /readyz port: 8081 initialDelaySeconds: 5 periodSeconds: 10 resources: limits: cpu: 500m memory: 512Mi requests: cpu: 10m memory: 64Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL priorityClassName: system-cluster-critical securityContext: runAsNonRoot: true seccompProfile: type: RuntimeDefault serviceAccountName: fence-agents-remediation-controller-manager terminationGracePeriodSeconds: 10 permissions: - rules: - apiGroups: - "" resources: - configmaps verbs: - get - list - watch - create - update - patch - delete - apiGroups: - coordination.k8s.io resources: - leases verbs: - get - list - watch - create - update - patch - delete - apiGroups: - "" resources: - events verbs: - create - patch - apiGroups: - "" resources: - secrets verbs: - get - list - watch serviceAccountName: fence-agents-remediation-controller-manager strategy: deployment installModes: - supported: false type: OwnNamespace - supported: false type: SingleNamespace - supported: false type: MultiNamespace - supported: true type: AllNamespaces keywords: - medik8s - fencing - auto-healing - recovery - high-availability - fence-agents - fence-agents-remediation - remediation - far - baremetal links: - name: Fence Agents Remediation url: https://access.redhat.com/documentation/en-us/workload_availability_for_red_hat_openshift/25.8/html/remediation_fencing_and_maintenance/about-remediation-fencing-maintenance - name: Source Code url: https://www.github.com/medik8s/fence-agents-remediation maintainers: - email: team-dragonfly@redhat.com name: Dragonfly Team maturity: alpha minKubeVersion: 1.24.0 provider: name: Red Hat url: https://www.redhat.com relatedImages: - image: registry.redhat.io/workload-availability/fence-agents-remediation-rhel9-operator@sha256:eef1a298718650a22a3da4a07c140ce21b6f287d87af79801610317fa8f63d52 name: manager - image: registry.redhat.io/openshift4/ose-kube-rbac-proxy-rhel9@sha256:d37a6d10b0fa07370066a31fdaffe2ea553faf4e4e98be7fcef5ec40d62ffe29 name: kube-rbac-proxy replaces: fence-agents-remediation.v0.5.1 version: 0.6.0 webhookdefinitions: - admissionReviewVersions: - v1 containerPort: 443 deploymentName: fence-agents-remediation-controller-manager failurePolicy: Fail generateName: mfenceagentsremediationtemplate.kb.io rules: - apiGroups: - fence-agents-remediation.medik8s.io apiVersions: - v1alpha1 operations: - CREATE - UPDATE resources: - fenceagentsremediationtemplates sideEffects: None targetPort: 9443 type: MutatingAdmissionWebhook webhookPath: /mutate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediationtemplate - admissionReviewVersions: - v1 containerPort: 443 deploymentName: fence-agents-remediation-controller-manager failurePolicy: Fail generateName: vfenceagentsremediation.kb.io rules: - apiGroups: - fence-agents-remediation.medik8s.io apiVersions: - v1alpha1 operations: - CREATE - UPDATE resources: - fenceagentsremediations sideEffects: None targetPort: 9443 type: ValidatingAdmissionWebhook webhookPath: /validate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediation - admissionReviewVersions: - v1 containerPort: 443 deploymentName: fence-agents-remediation-controller-manager failurePolicy: Fail generateName: vfenceagentsremediationtemplate.kb.io rules: - apiGroups: - fence-agents-remediation.medik8s.io apiVersions: - v1alpha1 operations: - CREATE - UPDATE resources: - fenceagentsremediationtemplates sideEffects: None targetPort: 9443 type: ValidatingAdmissionWebhook webhookPath: /validate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediationtemplate status: certsLastUpdated: "2025-09-09T10:16:04Z" certsRotateAt: "2027-09-08T10:16:04Z" cleanup: {} conditions: - lastTransitionTime: "2025-09-09T10:16:01Z" lastUpdateTime: "2025-09-09T10:16:01Z" message: requirements not yet checked phase: Pending reason: RequirementsUnknown - lastTransitionTime: "2025-09-09T10:16:01Z" lastUpdateTime: "2025-09-09T10:16:01Z" message: one or more requirements couldn't be found phase: Pending reason: RequirementsNotMet - lastTransitionTime: "2025-09-09T10:16:03Z" lastUpdateTime: "2025-09-09T10:16:03Z" message: all requirements found, attempting install phase: InstallReady reason: AllRequirementsMet - lastTransitionTime: "2025-09-09T10:16:04Z" lastUpdateTime: "2025-09-09T10:16:04Z" message: waiting for install components to report healthy phase: Installing reason: InstallSucceeded - lastTransitionTime: "2025-09-09T10:16:04Z" lastUpdateTime: "2025-09-09T10:16:05Z" message: 'installing: waiting for deployment fence-agents-remediation-controller-manager to become ready: deployment "fence-agents-remediation-controller-manager" not available: Deployment does not have minimum availability.' phase: Installing reason: InstallWaiting - lastTransitionTime: "2025-09-09T10:16:25Z" lastUpdateTime: "2025-09-09T10:16:25Z" message: install strategy completed with no errors phase: Succeeded reason: InstallSucceeded lastTransitionTime: "2025-09-09T10:16:25Z" lastUpdateTime: "2025-09-09T10:16:25Z" message: install strategy completed with no errors phase: Succeeded reason: InstallSucceeded requirementStatus: - group: operators.coreos.com kind: ClusterServiceVersion message: CSV minKubeVersion (1.24.0) less than server version (v1.29.14+c68a663) name: fence-agents-remediation.v0.6.0 status: Present version: v1alpha1 - group: apiextensions.k8s.io kind: CustomResourceDefinition message: CRD is present and Established condition is true name: fenceagentsremediations.fence-agents-remediation.medik8s.io status: Present uuid: f1ee6aa8-739c-4da0-9fb5-3939ddd8ca84 version: v1 - group: apiextensions.k8s.io kind: CustomResourceDefinition message: CRD is present and Established condition is true name: fenceagentsremediationtemplates.fence-agents-remediation.medik8s.io status: Present uuid: 8976b0f8-6f45-4cd2-a020-ea6b66a422e8 version: v1 - dependents: - group: rbac.authorization.k8s.io kind: PolicyRule message: cluster rule:{"verbs":["get","list","watch"],"apiGroups":[""],"resources":["namespaces"]} status: Satisfied version: v1 - group: rbac.authorization.k8s.io kind: PolicyRule message: cluster rule:{"verbs":["delete","get","list","update","watch"],"apiGroups":[""],"resources":["nodes"]} status: Satisfied version: v1 - group: rbac.authorization.k8s.io kind: PolicyRule message: cluster rule:{"verbs":["delete","deletecollection","get","list","update","watch"],"apiGroups":[""],"resources":["pods"]} status: Satisfied version: v1 - group: rbac.authorization.k8s.io kind: PolicyRule message: cluster rule:{"verbs":["create"],"apiGroups":[""],"resources":["pods/exec"]} status: Satisfied version: v1 - group: rbac.authorization.k8s.io kind: PolicyRule message: cluster rule:{"verbs":["create","delete","get","list","patch","update","watch"],"apiGroups":["fence-agents-remediation.medik8s.io"],"resources":["fenceagentsremediations"]} status: Satisfied version: v1 - group: rbac.authorization.k8s.io kind: PolicyRule message: cluster rule:{"verbs":["update"],"apiGroups":["fence-agents-remediation.medik8s.io"],"resources":["fenceagentsremediations/finalizers"]} status: Satisfied version: v1 - group: rbac.authorization.k8s.io kind: PolicyRule message: cluster rule:{"verbs":["get","patch","update"],"apiGroups":["fence-agents-remediation.medik8s.io"],"resources":["fenceagentsremediations/status"]} status: Satisfied version: v1 - group: rbac.authorization.k8s.io kind: PolicyRule message: cluster rule:{"verbs":["delete","get","list","watch"],"apiGroups":["storage.k8s.io"],"resources":["volumeattachments"]} status: Satisfied version: v1 - group: rbac.authorization.k8s.io kind: PolicyRule message: cluster rule:{"verbs":["create"],"apiGroups":["authentication.k8s.io"],"resources":["tokenreviews"]} status: Satisfied version: v1 - group: rbac.authorization.k8s.io kind: PolicyRule message: cluster rule:{"verbs":["create"],"apiGroups":["authorization.k8s.io"],"resources":["subjectaccessreviews"]} status: Satisfied version: v1 group: "" kind: ServiceAccount message: "" name: fence-agents-remediation-controller-manager status: Present version: v1 [kni@cert-rhosp-02 ~]$ for p in $PODS; do echo "== $p"; oc get "$p" -o json | jq .spec.nodeName; done == pod/fence-agents-remediation-controller-manager-5f76bb6467-8m5mv "worker-0-0" == pod/fence-agents-remediation-controller-manager-5f76bb6467-gcxjs "worker-0-0" [kni@cert-rhosp-02 ~]$ oc delete pod/fence-agents-remediation-controller-manager-5f76bb6467-gcxjs pod "fence-agents-remediation-controller-manager-5f76bb6467-gcxjs" deleted [kni@cert-rhosp-02 ~]$ PODS=$(oc get pods -o name | grep fence-agents-remediation-controller-manager) [kni@cert-rhosp-02 ~]$ echo $PODS pod/fence-agents-remediation-controller-manager-5f76bb6467-8m5mv pod/fence-agents-remediation-controller-manager-5f76bb6467-j27z4 [kni@cert-rhosp-02 ~]$ for p in $PODS; do echo "== $p"; oc get "$p" -o json | jq .spec.nodeName; done == pod/fence-agents-remediation-controller-manager-5f76bb6467-8m5mv "worker-0-0" == pod/fence-agents-remediation-controller-manager-5f76bb6467-j27z4 "worker-0-2" [kni@cert-rhosp-02 ~]$ oc get Deployment -o yaml apiVersion: v1 items: - apiVersion: apps/v1 kind: Deployment metadata: annotations: deployment.kubernetes.io/revision: "1" creationTimestamp: "2025-09-09T10:16:04Z" generation: 2 labels: app.kubernetes.io/name: fence-agents-remediation-operator control-plane: controller-manager olm.deployment-spec-hash: 6nTT1BF0O7TlTgcmFDQZIm0VeZk3RHTZSoNFiW olm.managed: "true" olm.owner: fence-agents-remediation.v0.6.0 olm.owner.kind: ClusterServiceVersion olm.owner.namespace: openshift-workload-availability operators.coreos.com/fence-agents-remediation.openshift-workload-availability: "" name: fence-agents-remediation-controller-manager namespace: openshift-workload-availability ownerReferences: - apiVersion: operators.coreos.com/v1alpha1 blockOwnerDeletion: false controller: false kind: ClusterServiceVersion name: fence-agents-remediation.v0.6.0 uid: c2edd0c8-158a-4c97-b372-035b585683fb resourceVersion: "940177" uid: 9b0189ff-0eb9-498c-b9b2-ab7d2eabca35 spec: progressDeadlineSeconds: 600 replicas: 2 revisionHistoryLimit: 1 selector: matchLabels: app.kubernetes.io/name: fence-agents-remediation-operator control-plane: controller-manager strategy: rollingUpdate: maxSurge: 25% maxUnavailable: 25% type: RollingUpdate template: metadata: annotations: alm-examples: |- [ { "apiVersion": "fence-agents-remediation.medik8s.io/v1alpha1", "kind": "FenceAgentsRemediation", "metadata": { "name": "worker-1" }, "spec": { "agent": "fence_ipmilan", "nodeSecretNames": { "master-0": "fence-agents-credentials-master0", "master-1": "fence-agents-credentials-master1", "master-2": "fence-agents-credentials-master2", "worker-0": "fence-agents-credentials-worker0", "worker-1": "fence-agents-credentials-worker1", "worker-2": "fence-agents-credentials-worker2" }, "nodeparameters": { "--ipport": { "master-0": "6230", "master-1": "6231", "master-2": "6232", "worker-0": "6233", "worker-1": "6234", "worker-2": "6235" } }, "remediationStrategy": "ResourceDeletion", "retrycount": 5, "retryinterval": "5s", "sharedSecretName": "fence-agents-credentials-shared", "sharedparameters": { "--action": "reboot", "--ip": "192.168.111.1", "--lanplus": "", "--username": "admin" }, "timeout": "60s" } }, { "apiVersion": "fence-agents-remediation.medik8s.io/v1alpha1", "kind": "FenceAgentsRemediationTemplate", "metadata": { "name": "fenceagentsremediationtemplate-default" }, "spec": { "template": { "spec": { "agent": "fence_ipmilan", "nodeSecretNames": { "master-0": "fence-agents-credentials-master0", "master-1": "fence-agents-credentials-master1", "master-2": "fence-agents-credentials-master2", "worker-0": "fence-agents-credentials-worker0", "worker-1": "fence-agents-credentials-worker1", "worker-2": "fence-agents-credentials-worker2" }, "nodeparameters": { "--ipport": { "master-0": "6230", "master-1": "6231", "master-2": "6232", "worker-0": "6233", "worker-1": "6234", "worker-2": "6235" } }, "remediationStrategy": "ResourceDeletion", "retrycount": 5, "retryinterval": "5s", "sharedSecretName": "fence-agents-credentials-shared", "sharedparameters": { "--action": "reboot", "--ip": "192.168.111.1", "--lanplus": "", "--username": "admin" }, "timeout": "60s" } } } } ] capabilities: Basic Install categories: OpenShift Optional containerImage: "" createdAt: "2025-09-08 09:19:00" description: Fence Agents Remediation Operator uses well-known agents to fence and remediate unhealthy nodes. The remediation includes rebooting the unhealthy node using a fence agent, and then evicting workloads from the unhealthy node. features.operators.openshift.io/cnf: "false" features.operators.openshift.io/cni: "false" features.operators.openshift.io/csi: "false" features.operators.openshift.io/disconnected: "true" features.operators.openshift.io/fips-compliant: "true" features.operators.openshift.io/proxy-aware: "false" features.operators.openshift.io/tls-profiles: "false" features.operators.openshift.io/token-auth-aws: "false" features.operators.openshift.io/token-auth-azure: "false" features.operators.openshift.io/token-auth-gcp: "false" kubectl.kubernetes.io/default-container: manager olm.operatorGroup: openshift-workload-availability-4p9hd olm.operatorNamespace: openshift-workload-availability olm.skipRange: '>=0.5.0 <0.6.0' olm.targetNamespaces: "" olmcahash: 8472fc1923910ab028c90f2549a21f84b3857acd13e1da8db05eea6527458960 operatorframework.io/properties: '{"properties":[{"type":"olm.gvk","value":{"group":"fence-agents-remediation.medik8s.io","kind":"FenceAgentsRemediation","version":"v1alpha1"}},{"type":"olm.gvk","value":{"group":"fence-agents-remediation.medik8s.io","kind":"FenceAgentsRemediationTemplate","version":"v1alpha1"}},{"type":"olm.package","value":{"packageName":"fence-agents-remediation","version":"0.6.0"}}]}' operatorframework.io/suggested-namespace: openshift-workload-availability operatorframework.io/suggested-namespace-template: '{"kind":"Namespace","apiVersion":"v1","metadata":{"name":"openshift-workload-availability","annotations":{"openshift.io/node-selector":""}}}' operators.openshift.io/valid-subscription: '["OpenShift Kubernetes Engine", "OpenShift Container Platform", "OpenShift Platform Plus"]' operators.operatorframework.io/builder: operator-sdk-v1.32.0 operators.operatorframework.io/project_layout: go.kubebuilder.io/v3 repository: https://github.com/medik8s/fence-agents-remediation support: Red Hat creationTimestamp: null labels: app.kubernetes.io/name: fence-agents-remediation-operator control-plane: controller-manager spec: affinity: podAntiAffinity: preferredDuringSchedulingIgnoredDuringExecution: - podAffinityTerm: labelSelector: matchExpressions: - key: control-plane operator: In values: - controller-manager - key: app.kubernetes.io/name operator: In values: - fence-agents-remediation-operator topologyKey: kubernetes.io/hostname weight: 100 containers: - args: - --secure-listen-address=0.0.0.0:8443 - --http2-disable - --upstream=http://127.0.0.1:8080/ - --logtostderr=true - --v=0 env: - name: OPERATOR_CONDITION_NAME value: fence-agents-remediation.v0.6.0 image: registry.redhat.io/openshift4/ose-kube-rbac-proxy-rhel9@sha256:d37a6d10b0fa07370066a31fdaffe2ea553faf4e4e98be7fcef5ec40d62ffe29 imagePullPolicy: IfNotPresent name: kube-rbac-proxy ports: - containerPort: 8443 name: https protocol: TCP resources: limits: cpu: 500m memory: 128Mi requests: cpu: 5m memory: 64Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL terminationMessagePath: /dev/termination-log terminationMessagePolicy: File volumeMounts: - mountPath: /apiserver.local.config/certificates name: apiservice-cert - mountPath: /tmp/k8s-webhook-server/serving-certs name: webhook-cert - args: - --health-probe-bind-address=:8081 - --metrics-bind-address=127.0.0.1:8080 - --leader-elect command: - /manager env: - name: DEPLOYMENT_NAMESPACE valueFrom: fieldRef: apiVersion: v1 fieldPath: metadata.namespace - name: OPERATOR_CONDITION_NAME value: fence-agents-remediation.v0.6.0 image: registry.redhat.io/workload-availability/fence-agents-remediation-rhel9-operator@sha256:eef1a298718650a22a3da4a07c140ce21b6f287d87af79801610317fa8f63d52 imagePullPolicy: IfNotPresent livenessProbe: failureThreshold: 3 httpGet: path: /healthz port: 8081 scheme: HTTP initialDelaySeconds: 15 periodSeconds: 20 successThreshold: 1 timeoutSeconds: 1 name: manager readinessProbe: failureThreshold: 3 httpGet: path: /readyz port: 8081 scheme: HTTP initialDelaySeconds: 5 periodSeconds: 10 successThreshold: 1 timeoutSeconds: 1 resources: limits: cpu: 500m memory: 512Mi requests: cpu: 10m memory: 64Mi securityContext: allowPrivilegeEscalation: false capabilities: drop: - ALL terminationMessagePath: /dev/termination-log terminationMessagePolicy: File volumeMounts: - mountPath: /apiserver.local.config/certificates name: apiservice-cert - mountPath: /tmp/k8s-webhook-server/serving-certs name: webhook-cert dnsPolicy: ClusterFirst priorityClassName: system-cluster-critical restartPolicy: Always schedulerName: default-scheduler securityContext: runAsNonRoot: true seccompProfile: type: RuntimeDefault serviceAccount: fence-agents-remediation-controller-manager serviceAccountName: fence-agents-remediation-controller-manager terminationGracePeriodSeconds: 10 volumes: - name: apiservice-cert secret: defaultMode: 420 items: - key: tls.crt path: apiserver.crt - key: tls.key path: apiserver.key secretName: fence-agents-remediation-controller-manager-service-cert - name: webhook-cert secret: defaultMode: 420 items: - key: tls.crt path: tls.crt - key: tls.key path: tls.key secretName: fence-agents-remediation-controller-manager-service-cert status: availableReplicas: 2 conditions: - lastTransitionTime: "2025-09-09T10:16:04Z" lastUpdateTime: "2025-09-09T10:16:25Z" message: ReplicaSet "fence-agents-remediation-controller-manager-5f76bb6467" has successfully progressed. reason: NewReplicaSetAvailable status: "True" type: Progressing - lastTransitionTime: "2025-09-09T10:37:15Z" lastUpdateTime: "2025-09-09T10:37:15Z" message: Deployment has minimum availability. reason: MinimumReplicasAvailable status: "True" type: Available observedGeneration: 2 readyReplicas: 2 replicas: 2 updatedReplicas: 2 kind: List metadata: resourceVersion: ""