============================================================================================================================ [root@cert-rhosp-02 ~]# oc get clusterversion NAME VERSION AVAILABLE PROGRESSING SINCE STATUS version 4.21.0-0.nightly-2025-12-22-170804 True False 6d12h Cluster version is 4.21.0-0.nightly-2025-12-22-170804 ============================================================================================================================ [root@cert-rhosp-02 ~]# oc get csv NAME DISPLAY VERSION REPLACES PHASE fence-agents-remediation.v0.6.0 Fence Agents Remediation Operator 0.6.0 fence-agents-remediation.v0.5.1 Succeeded machine-deletion-remediation.v0.5.0 Machine Deletion Remediation operator 0.5.0 machine-deletion-remediation.v0.4.1 Succeeded node-healthcheck-operator.v0.10.1 Node Health Check Operator 0.10.1 node-healthcheck-operator.v0.10.0 Succeeded node-maintenance-operator.v5.5.0 Node Maintenance Operator 5.5.0 node-maintenance-operator.v5.4.1 Succeeded self-node-remediation.v0.11.0 Self Node Remediation Operator 0.11.0 self-node-remediation.v0.10.2 Succeeded ============================================================================================================================ [root@cert-rhosp-02 ~]# oc get pods NAME READY STATUS RESTARTS AGE fence-agents-remediation-controller-manager-74bfb5d6bd-2fvcp 0/2 ContainerCreating 0 6s fence-agents-remediation-controller-manager-74bfb5d6bd-bqlgd 1/2 Running 0 6s machine-deletion-remediation-controller-manager-649b8f7d4dzxcqw 1/2 Running 0 6s node-healthcheck-controller-manager-6d9f47cbbb-8rd7n 1/2 Running 0 6s node-healthcheck-controller-manager-6d9f47cbbb-gfps4 1/2 Running 0 6s node-healthcheck-node-remediation-console-plugin-7c8589484mg6fm 0/1 ContainerCreating 0 6s node-maintenance-operator-controller-manager-855c67587f-pcf2d 0/1 ContainerCreating 0 6s self-node-remediation-controller-manager-57c976f744-m4xfw 1/2 Running 0 6s self-node-remediation-controller-manager-57c976f744-m77c5 1/2 Running 0 6s self-node-remediation-ds-5hp9f 1/1 Running 0 6h30m self-node-remediation-ds-9pcg9 1/1 Running 0 6h30m self-node-remediation-ds-grtnv 1/1 Running 0 6h30m self-node-remediation-ds-k7dcz 1/1 Running 0 63m self-node-remediation-ds-nh5jb 1/1 Running 0 6h30m self-node-remediation-ds-t4bpw 1/1 Running 0 6h30m --- apiVersion: fence-agents-remediation.medik8s.io/v1alpha1 kind: FenceAgentsRemediationTemplate metadata: name: far-template-short-timeout namespace: openshift-workload-availability spec: template: spec: agent: fence_ipmilan retrycount: 1 retryinterval: 1s timeout: 1s # Very short timeout to ensure it fails quickly nodeparameters: '--ipport': master-0-0: '6230' master-0-1: '6231' master-0-2: '6232' worker-0-0: '6233' worker-0-1: '6234' worker-0-2: '6235' sharedparameters: '--action': reboot '--lanplus': '' '--ip': 192.168.123.1 '--password': password '--username': admin ============================================================================================================================ [root@cert-rhosp-02 ~]# cat test.yaml --- apiVersion: fence-agents-remediation.medik8s.io/v1alpha1 kind: FenceAgentsRemediationTemplate metadata: name: far-template-short-timeout namespace: openshift-workload-availability spec: template: spec: agent: fence_ipmilan retrycount: 1 retryinterval: 1s timeout: 1s # Very short timeout to ensure it fails quickly nodeparameters: '--ipport': master-0-0: '6230' master-0-1: '6231' master-0-2: '6232' worker-0-0: '6233' worker-0-1: '6234' worker-0-2: '6235' sharedparameters: '--action': reboot '--lanplus': '' '--ip': 192.168.123.1 '--password': password '--username': admin --- apiVersion: self-node-remediation.medik8s.io/v1alpha1 kind: SelfNodeRemediationTemplate metadata: namespace: openshift-workload-availability name: snr-template-short-timeout spec: template: spec: remediationStrategy: Automatic --- apiVersion: remediation.medik8s.io/v1alpha1 kind: NodeHealthCheck metadata: name: nhc-test-taint-cleanup spec: minHealthy: 30% healthyDelay: 5s escalatingRemediations: - remediationTemplate: apiVersion: fence-agents-remediation.medik8s.io/v1alpha1 kind: FenceAgentsRemediationTemplate name: far-template-short-timeout namespace: openshift-workload-availability order: 0 timeout: 60s - remediationTemplate: apiVersion: self-node-remediation.medik8s.io/v1alpha1 kind: SelfNodeRemediationTemplate name: snr-template-short-timeout namespace: openshift-workload-availability order: 1 timeout: 60s selector: matchExpressions: - key: node-role.kubernetes.io/worker operator: Exists unhealthyConditions: - duration: 30s status: "False" type: Ready - duration: 30s status: Unknown type: Ready ============================================================================================================================ [root@cert-rhosp-02 ~]# oc apply -f test.yaml fenceagentsremediationtemplate.fence-agents-remediation.medik8s.io/far-template-short-timeout created selfnoderemediationtemplate.self-node-remediation.medik8s.io/snr-template-short-timeout created nodehealthcheck.remediation.medik8s.io/nhc-test-taint-cleanup created ============================================================================================================================ [root@cert-rhosp-02 ~]# oc get nodes NAME STATUS ROLES AGE VERSION master-0-0 Ready control-plane,master 7d v1.34.2 master-0-1 Ready control-plane,master 7d v1.34.2 master-0-2 Ready control-plane,master 7d v1.34.2 worker-0-0 Ready worker 6d23h v1.34.2 worker-0-1 Ready worker 74m v1.34.2 worker-0-2 Ready worker 6d23h v1.34.2 ============================================================================================================================ [root@cert-rhosp-02 ~]# PODS=$(oc get pods -o name -n openshift-workload-availability | grep fence-agents-remediation-controller-manager) ============================================================================================================================ [root@cert-rhosp-02 ~]# echo $PODS pod/fence-agents-remediation-controller-manager-74bfb5d6bd-2fvcp pod/fence-agents-remediation-controller-manager-74bfb5d6bd-bqlgd ============================================================================================================================ [root@cert-rhosp-02 ~]# for p in $PODS; do > echo "== $p" > oc get "$p" -n openshift-workload-availability -o json | jq .spec.nodeName > done == pod/fence-agents-remediation-controller-manager-74bfb5d6bd-2fvcp "worker-0-1" == pod/fence-agents-remediation-controller-manager-74bfb5d6bd-bqlgd "worker-0-0" ============================================================================================================================ [root@cert-rhosp-02 ~]# PODS=$(oc get pods -o name -n openshift-workload-availability | grep node-healthcheck-controller-manager) ============================================================================================================================ [root@cert-rhosp-02 ~]# echo $PODS pod/node-healthcheck-controller-manager-6d9f47cbbb-8rd7n pod/node-healthcheck-controller-manager-6d9f47cbbb-gfps4 ============================================================================================================================ [root@cert-rhosp-02 ~]# for p in $PODS; do > echo "== $p" > oc get "$p" -n openshift-workload-availability -o json | jq .spec.nodeName > done == pod/node-healthcheck-controller-manager-6d9f47cbbb-8rd7n "master-0-1" == pod/node-healthcheck-controller-manager-6d9f47cbbb-gfps4 "master-0-2" ============================================================================================================================ [root@cert-rhosp-02 ~]# PODS=$(oc get pods -o name -n openshift-workload-availability | grep self-node-remediation-controller-manager) ============================================================================================================================ [root@cert-rhosp-02 ~]# echo $PODS pod/self-node-remediation-controller-manager-57c976f744-m4xfw pod/self-node-remediation-controller-manager-57c976f744-m77c5 ============================================================================================================================ [root@cert-rhosp-02 ~]# for p in $PODS; do > echo "== $p" > oc get "$p" -n openshift-workload-availability -o json | jq .spec.nodeName > done == pod/self-node-remediation-controller-manager-57c976f744-m4xfw "worker-0-0" == pod/self-node-remediation-controller-manager-57c976f744-m77c5 "worker-0-2" ============================================================================================================================ [root@cert-rhosp-02 ~]# PODS=$(oc get pods -o name -n openshift-workload-availability | grep machine-deletion-remediation-controller) ============================================================================================================================ [root@cert-rhosp-02 ~]# echo $PODS pod/machine-deletion-remediation-controller-manager-649b8f7d4dzxcqw ============================================================================================================================ [root@cert-rhosp-02 ~]# for p in $PODS; do > echo "== $p" > oc get "$p" -n openshift-workload-availability -o json | jq .spec.nodeName > done == pod/machine-deletion-remediation-controller-manager-649b8f7d4dzxcqw "worker-0-2" ============================================================================================================================ [root@cert-rhosp-02 ~]# PODS=$(oc get pods -o name -n openshift-workload-availability | grep node-maintenance-operator-controller-manager) ============================================================================================================================ [root@cert-rhosp-02 ~]# echo $PODS pod/node-maintenance-operator-controller-manager-855c67587f-pcf2d ============================================================================================================================ [root@cert-rhosp-02 ~]# for p in $PODS; do > echo "== $p" > oc get "$p" -n openshift-workload-availability -o json | jq .spec.nodeName > done == pod/node-maintenance-operator-controller-manager-855c67587f-pcf2d "master-0-1" ============================================================================================================================ [root@cert-rhosp-02 ~]# oc get nodes NAME STATUS ROLES AGE VERSION master-0-0 Ready control-plane,master 7d v1.34.2 master-0-1 Ready control-plane,master 7d v1.34.2 master-0-2 Ready control-plane,master 7d v1.34.2 worker-0-0 Ready worker 6d23h v1.34.2 worker-0-1 Ready worker 77m v1.34.2 worker-0-2 NotReady worker 6d23h v1.34.2 ============================================================================================================================ [root@cert-rhosp-02 ~]# oc get nhc ; oc get snr; oc get far NAME AGE nhc-test-taint-cleanup 5m20s NAME AGE worker-0-2-f8g7w 48s NAME AGE worker-0-2-mb2sz 50s ============================================================================================================================ [root@cert-rhosp-02 ~]# oc get node worker-0-2 -o jsonpath='{.spec.taints}' | jq '.' [ { "effect": "NoSchedule", "key": "node.kubernetes.io/unreachable", "timeAdded": "2025-12-30T20:40:48Z" }, { "effect": "NoExecute", "key": "node.kubernetes.io/unreachable", "timeAdded": "2025-12-30T20:40:48Z" }, { "effect": "NoExecute", "key": "medik8s.io/fence-agents-remediation", "timeAdded": "2025-12-30T20:41:19Z" }, { "effect": "NoExecute", "key": "medik8s.io/remediation", "timeAdded": "2025-12-30T20:41:21Z", "value": "self-node-remediation" }, { "effect": "NoSchedule", "key": "node.kubernetes.io/unschedulable", "timeAdded": "2025-12-30T20:41:21Z" } ] ============================================================================================================================ [root@cert-rhosp-02 ~]# oc get node worker-0-2 -o jsonpath='{.spec.taints}' | jq '.[] | select(.key == "medik8s.io/fence-agents-remediation" or .key == "medik8s.io/remediation")' { "effect": "NoExecute", "key": "medik8s.io/fence-agents-remediation", "timeAdded": "2025-12-30T20:41:19Z" } { "effect": "NoExecute", "key": "medik8s.io/remediation", "timeAdded": "2025-12-30T20:41:21Z", "value": "self-node-remediation" } ============================================================================================================================ [root@cert-rhosp-02 ~]# oc get events --field-selector involvedObject.name=nhc-test-taint-cleanup | grep -i "failed\|cleanup" No resources found in openshift-workload-availability namespace. ============================================================================================================================ [root@cert-rhosp-02 ~]# oc get node worker-0-2 -o jsonpath='{.metadata.annotations}' | jq '.' | grep -i remediation "is-reboot-capable.self-node-remediation.medik8s.io": "true", "self-node-remediation.medik8s.io/watchdog-timeout": "0", NHC Logs: ============================================================================================================================ [root@cert-rhosp-02 ~]# oc logs node-healthcheck-controller-manager-6d9f47cbbb-8rd7n 2025-12-30T20:26:42.938387515Z INFO setup Go Version: go1.24.4 (Red Hat 1.24.4-2.el9) X:strictfipsruntime 2025-12-30T20:26:42.938517243Z INFO setup Go OS/Arch: linux/amd64 2025-12-30T20:26:42.938521076Z INFO setup Operator Version: 2ef588d 2025-12-30T20:26:42.93852323Z INFO setup Git Commit: 2ef588d65de4087c46447401cd6e757459d79210 2025-12-30T20:26:42.938525328Z INFO setup Build Date: 2025-11-11T13:40:45+00:00 2025-12-30T20:26:42.938527441Z INFO setup HTTP/2 for metrics and webhook server disabled 2025-12-30T20:26:42.967996705Z INFO setup supported control plane topology {"topology": "HighlyAvailable"} 2025-12-30T20:26:42.968044609Z INFO setup Cluster capabilities {"IsOnOpenshift": true, "HasMachineAPI": true} 2025-12-30T20:26:42.968414635Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "remediation.medik8s.io/v1alpha1, Kind=NodeHealthCheck", "path": "/validate-remediation-medik8s-io-v1alpha1-nodehealthcheck"} 2025-12-30T20:26:42.96852303Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-remediation-medik8s-io-v1alpha1-nodehealthcheck"} 2025-12-30T20:26:42.96857188Z INFO setup starting manager 2025-12-30T20:26:42.96862399Z INFO controller-runtime.metrics Starting metrics server 2025-12-30T20:26:42.96870545Z INFO starting server {"name": "health probe", "addr": "[::]:8081"} 2025-12-30T20:26:42.968715661Z INFO controller-runtime.metrics Serving metrics server {"bindAddress": "127.0.0.1:8080", "secure": false} 2025-12-30T20:26:42.96874961Z INFO controller-runtime.webhook Starting webhook server 2025-12-30T20:26:42.969102449Z INFO controller-runtime.certwatcher Updated current TLS certificate 2025-12-30T20:26:42.969215167Z INFO controller-runtime.webhook Serving webhook server {"host": "", "port": 9443} 2025-12-30T20:26:42.969402043Z INFO controller-runtime.certwatcher Starting certificate poll+watcher {"interval": "10s"} I1230 20:26:43.069691 1 leaderelection.go:257] attempting to acquire leader lease openshift-workload-availability/e1f13584.medik8s.io... I1230 20:26:58.924207 1 leaderelection.go:271] successfully acquired lease openshift-workload-availability/e1f13584.medik8s.io 2025-12-30T20:26:58.924240887Z DEBUG events node-healthcheck-controller-manager-6d9f47cbbb-8rd7n_39f80cf5-2c6d-4df5-8e63-35e700ce6eca became leader {"type": "Normal", "object": {"kind":"Lease","namespace":"openshift-workload-availability","name":"e1f13584.medik8s.io","uid":"ba5e9ac2-85d9-4886-81f8-d83c5442ac35","apiVersion":"coordination.k8s.io/v1","resourceVersion":"3658482"}, "reason": "LeaderElection"} 2025-12-30T20:26:58.924588516Z INFO Starting EventSource {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "source": "channel source: 0xc0002f0770"} 2025-12-30T20:26:58.924628052Z INFO Starting EventSource {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "source": "kind source: *v1alpha1.NodeHealthCheck"} 2025-12-30T20:26:58.924659301Z INFO Starting EventSource {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "source": "kind source: *v1.Node"} 2025-12-30T20:26:58.924672744Z INFO Starting EventSource {"controller": "machinehealthcheck", "controllerGroup": "machine.openshift.io", "controllerKind": "MachineHealthCheck", "source": "kind source: *v1beta1.MachineHealthCheck"} 2025-12-30T20:26:58.924646363Z INFO Starting EventSource {"controller": "machinehealthcheck", "controllerGroup": "machine.openshift.io", "controllerKind": "MachineHealthCheck", "source": "channel source: 0xc0002f0af0"} 2025-12-30T20:26:58.924675731Z INFO Starting EventSource {"controller": "machinehealthcheck", "controllerGroup": "machine.openshift.io", "controllerKind": "MachineHealthCheck", "source": "kind source: *v1.Node"} 2025-12-30T20:26:58.924692204Z INFO Starting EventSource {"controller": "machinehealthcheck", "controllerGroup": "machine.openshift.io", "controllerKind": "MachineHealthCheck", "source": "kind source: *v1beta1.Machine"} I1230 20:26:58.925599 1 shared_informer.go:313] Waiting for caches to sync for feature gate accessor 2025-12-30T20:26:59.025120145Z INFO MHCChecker found termination handler MHC, will ignore Nodes with Terminating condition 2025-12-30T20:26:59.025163925Z INFO MHCChecker MHC Checker status changed, notifying NHC controller 2025-12-30T20:26:59.025248497Z INFO adding all NHCs to reconcile queue for handling MHC event 2025-12-30T20:26:59.025272352Z INFO Starting Controller {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck"} 2025-12-30T20:26:59.025292438Z INFO Starting workers {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "worker count": 1} 2025-12-30T20:26:59.02538062Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-mdr-snr"} I1230 20:26:59.025672 1 shared_informer.go:320] Caches are synced for feature gate accessor I1230 20:26:59.025708 1 simple_featuregate_reader.go:171] Starting feature-gate-detector 2025-12-30T20:26:59.025807564Z INFO FeatureGateAccessor FeatureGates initialized I1230 20:26:59.025903 1 recorder_logging.go:49] &Event{ObjectMeta:{dummy.1886181d8f3de39c.4b2a179c dummy 0 0001-01-01 00:00:00 +0000 UTC map[] map[] [] [] []},InvolvedObject:ObjectReference{Kind:Pod,Namespace:dummy,Name:dummy,UID:,APIVersion:v1,ResourceVersion:,FieldPath:,},Reason:FeatureGatesInitialized,Message:FeatureGates updated to featuregates.Features{Enabled:[]v1.FeatureGateName{"AdditionalRoutingCapabilities", "AdminNetworkPolicy", "AlibabaPlatform", "AzureWorkloadIdentity", "BuildCSIVolumes", "CPMSMachineNamePrefix", "ConsolePluginContentSecurityPolicy", "ExternalOIDC", "ExternalOIDCWithUIDAndExtraClaimMappings", "GCPClusterHostedDNSInstall", "GatewayAPI", "GatewayAPIController", "HighlyAvailableArbiter", "ImageStreamImportMode", "ImageVolume", "KMSv1", "MachineConfigNodes", "ManagedBootImages", "ManagedBootImagesAWS", "ManagedBootImagesAzure", "ManagedBootImagesvSphere", "MetricsCollectionProfiles", "NetworkDiagnosticsConfig", "NetworkLiveMigration", "NetworkSegmentation", "NewOLM", "NewOLMWebhookProviderOpenshiftServiceCA", "OpenShiftPodSecurityAdmission", "PinnedImages", "PreconfiguredUDNAddresses", "ProcMountType", "RouteAdvertisements", "RouteExternalCertificate", "ServiceAccountTokenNodeBinding", "SigstoreImageVerification", "SigstoreImageVerificationPKI", "StoragePerformantSecurityPolicy", "UpgradeStatus", "UserNamespacesPodSecurityStandards", "UserNamespacesSupport", "VSphereMultiDisk", "VSphereMultiNetworks", "VolumeAttributesClass"}, Disabled:[]v1.FeatureGateName{"AWSClusterHostedDNS", "AWSClusterHostedDNSInstall", "AWSDedicatedHosts", "AWSDualStackInstall", "AWSServiceLBNetworkSecurityGroup", "AutomatedEtcdBackup", "AzureClusterHostedDNSInstall", "AzureDedicatedHosts", "AzureDualStackInstall", "AzureMultiDisk", "BootImageSkewEnforcement", "BootcNodeManagement", "CBORServingAndStorage", "CRDCompatibilityRequirementOperator", "ClientsAllowCBOR", "ClientsPreferCBOR", "ClusterAPIInstall", "ClusterAPIInstallIBMCloud", "ClusterAPIMachineManagement", "ClusterAPIMachineManagementVSphere", "ClusterMonitoringConfig", "ClusterVersionOperatorConfiguration", "DNSNameResolver", "DualReplica", "DyanmicServiceEndpointIBMCloud", "EtcdBackendQuota", "EventTTL", "EventedPLEG", "Example", "Example2", "ExternalSnapshotMetadata", "GCPClusterHostedDNS", "GCPCustomAPIEndpoints", "GCPCustomAPIEndpointsInstall", "GCPDualStackInstall", "HyperShiftOnlyDynamicResourceAllocation", "ImageModeStatusReporting", "IngressControllerDynamicConfigurationManager", "InsightsConfig", "InsightsOnDemandDataGather", "IrreconcilableMachineConfig", "KMSEncryptionProvider", "MachineAPIMigration", "MachineAPIOperatorDisableMachineHealthCheckController", "ManagedBootImagesCPMS", "MaxUnavailableStatefulSet", "MinimumKubeletVersion", "MixedCPUsAllocation", "MultiArchInstallAzure", "MultiDiskSetup", "MutableCSINodeAllocatableCount", "MutatingAdmissionPolicy", "NewOLMBoxCutterRuntime", "NewOLMCatalogdAPIV1Metas", "NewOLMOwnSingleNamespace", "NewOLMPreflightPermissionChecks", "NoRegistryClusterInstall", "NutanixMultiSubnets", "OSStreams", "OVNObservability", "OnPremDNSRecords", "ProvisioningRequestAvailable", "SELinuxMount", "ShortCertRotation", "SignatureStores", "TranslateStreamCloseWebsocketRequests", "VSphereConfigurableMaxAllowedBlockVolumesPerNode", "VSphereHostVMGroupZonal", "VSphereMixedNodeEnv", "VolumeGroupSnapshot"}},Source:EventSource{Component:,Host:,},FirstTimestamp:2025-12-30 20:26:59.025814428 +0000 UTC m=+16.120150060,LastTimestamp:2025-12-30 20:26:59.025814428 +0000 UTC m=+16.120150060,Count:1,Type:Normal,EventTime:0001-01-01 00:00:00 +0000 UTC,Series:nil,Action:,Related:nil,ReportingController:,ReportingInstance:,} 2025-12-30T20:26:59.033580143Z INFO Starting EventSource {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "source": "kind source: *unstructured.Unstructured"} 2025-12-30T20:26:59.033620552Z INFO controllers.NodeHealthCheck.WatchManager added watch for remediation template CRs {"kind": "SelfNodeRemediationTemplate"} 2025-12-30T20:26:59.033629028Z INFO Starting EventSource {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "source": "kind source: *unstructured.Unstructured"} 2025-12-30T20:26:59.033634058Z INFO controllers.NodeHealthCheck.WatchManager added watch for remediation CRs {"kind": "SelfNodeRemediation"} 2025-12-30T20:26:59.033640162Z INFO Starting EventSource {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "source": "kind source: *unstructured.Unstructured"} 2025-12-30T20:26:59.03364476Z INFO controllers.NodeHealthCheck.WatchManager added watch for remediation template CRs {"kind": "FenceAgentsRemediationTemplate"} 2025-12-30T20:26:59.033649248Z INFO Starting EventSource {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "source": "kind source: *unstructured.Unstructured"} 2025-12-30T20:26:59.03365256Z INFO controllers.NodeHealthCheck.WatchManager added watch for remediation CRs {"kind": "FenceAgentsRemediation"} 2025-12-30T20:26:59.03365766Z INFO Starting EventSource {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "source": "kind source: *unstructured.Unstructured"} 2025-12-30T20:26:59.033660482Z INFO controllers.NodeHealthCheck.WatchManager added watch for remediation template CRs {"kind": "MachineDeletionRemediationTemplate"} 2025-12-30T20:26:59.033664471Z INFO Starting EventSource {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "source": "kind source: *unstructured.Unstructured"} 2025-12-30T20:26:59.033667336Z INFO controllers.NodeHealthCheck.WatchManager added watch for remediation CRs {"kind": "MachineDeletionRemediation"} 2025-12-30T20:26:59.04560648Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-0"} time="2025-12-30T20:26:59Z" level=info msg="invalidating lease" time="2025-12-30T20:26:59Z" level=info msg="getting lease" 2025-12-30T20:26:59.125774121Z INFO Starting Controller {"controller": "machinehealthcheck", "controllerGroup": "machine.openshift.io", "controllerKind": "MachineHealthCheck"} 2025-12-30T20:26:59.12580821Z INFO Starting workers {"controller": "machinehealthcheck", "controllerGroup": "machine.openshift.io", "controllerKind": "MachineHealthCheck", "worker count": 1} 2025-12-30T20:26:59.150878661Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-1"} time="2025-12-30T20:26:59Z" level=info msg="invalidating lease" time="2025-12-30T20:26:59Z" level=info msg="getting lease" 2025-12-30T20:26:59.158049299Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-mdr-snr", "node": "worker-0-2"} time="2025-12-30T20:26:59Z" level=info msg="invalidating lease" time="2025-12-30T20:26:59Z" level=info msg="getting lease" 2025-12-30T20:26:59.163091068Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-mdr-snr", "error": null, "requeue": false, "requeuAfter": "0s"} 2025-12-30T20:26:59.336864984Z INFO console-plugin successfully created / updated console plugin resources 2025-12-30T20:34:44.244799676Z INFO controllers.NodeHealthCheck.WatchManager adding NHC to reconcile queue for handling remediation template {"template": "far-template", "NHC": "nhc-mdr-snr"} 2025-12-30T20:34:44.244902316Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-mdr-snr"} 2025-12-30T20:34:44.251071161Z INFO controllers.NodeHealthCheck disabling NHC {"NodeHealthCheck name": "nhc-mdr-snr", "reason": "RemediationTemplateNotFound", "message": "Remediation template not found: \"failed to get external remediation template openshift-workload-availability/far-template: fenceagentsremediationtemplates.fence-agents-remediation.medik8s.io \\\"far-template\\\" not found\""} 2025-12-30T20:34:44.251234005Z DEBUG events [remediation] Disabling NHC. Reason: RemediationTemplateNotFound, Message: Remediation template not found: "failed to get external remediation template openshift-workload-availability/far-template: fenceagentsremediationtemplates.fence-agents-remediation.medik8s.io \"far-template\" not found" {"type": "Warning", "object": {"kind":"NodeHealthCheck","name":"nhc-mdr-snr","uid":"054d5047-6762-4082-b08d-a52405f32d1e","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"3631657"}, "reason": "Disabled"} 2025-12-30T20:34:44.251396624Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-mdr-snr", "new status": {"observedNodes":0,"healthyNodes":0,"conditions":[{"type":"Disabled","status":"True","lastTransitionTime":"2025-12-30T20:34:44Z","reason":"RemediationTemplateNotFound","message":"Remediation template not found: \"failed to get external remediation template openshift-workload-availability/far-template: fenceagentsremediationtemplates.fence-agents-remediation.medik8s.io \\\"far-template\\\" not found\""}],"phase":"Disabled","reason":"NHC is disabled: RemediationTemplateNotFound: Remediation template not found: \"failed to get external remediation template openshift-workload-availability/far-template: fenceagentsremediationtemplates.fence-agents-remediation.medik8s.io \\\"far-template\\\" not found\"","lastUpdateTime":"2025-12-30T19:23:32Z"}, "patch": "{\"status\":{\"conditions\":[{\"lastTransitionTime\":\"2025-12-30T20:34:44Z\",\"message\":\"Remediation template not found: \\\"failed to get external remediation template openshift-workload-availability/far-template: fenceagentsremediationtemplates.fence-agents-remediation.medik8s.io \\\\\\\"far-template\\\\\\\" not found\\\"\",\"reason\":\"RemediationTemplateNotFound\",\"status\":\"True\",\"type\":\"Disabled\"}],\"healthyNodes\":0,\"observedNodes\":0,\"phase\":\"Disabled\",\"reason\":\"NHC is disabled: RemediationTemplateNotFound: Remediation template not found: \\\"failed to get external remediation template openshift-workload-availability/far-template: fenceagentsremediationtemplates.fence-agents-remediation.medik8s.io \\\\\\\"far-template\\\\\\\" not found\\\"\"}}"} 2025-12-30T20:34:44.456801845Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-mdr-snr", "error": null, "requeue": false, "requeuAfter": "15s"} 2025-12-30T20:34:59.457132879Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-mdr-snr"} 2025-12-30T20:34:59.463637955Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-mdr-snr", "error": null, "requeue": false, "requeuAfter": "15s"} 2025-12-30T20:35:14.464188516Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-mdr-snr"} 2025-12-30T20:35:14.471376044Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-mdr-snr", "error": null, "requeue": false, "requeuAfter": "15s"} 2025-12-30T20:35:15.954952313Z INFO controllers.NodeHealthCheck.WatchManager adding NHC to reconcile queue for handling remediation template {"template": "mdr-template", "NHC": "nhc-mdr-snr"} 2025-12-30T20:35:15.955031286Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-mdr-snr"} 2025-12-30T20:35:15.961249026Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-mdr-snr", "error": null, "requeue": false, "requeuAfter": "15s"} 2025-12-30T20:35:29.471838482Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-mdr-snr"} 2025-12-30T20:35:29.478183974Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-mdr-snr", "error": null, "requeue": false, "requeuAfter": "15s"} 2025-12-30T20:35:44.478973367Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-mdr-snr"} 2025-12-30T20:35:44.487396652Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-mdr-snr", "error": null, "requeue": false, "requeuAfter": "15s"} 2025-12-30T20:35:59.48828814Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-mdr-snr"} 2025-12-30T20:35:59.496008424Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-mdr-snr", "error": null, "requeue": false, "requeuAfter": "15s"} 2025-12-30T20:36:00.934593568Z INFO controllers.NodeHealthCheck.WatchManager adding NHC to reconcile queue for handling remediation template {"template": "selfnoderemediationtemplate-sample", "NHC": "nhc-mdr-snr"} 2025-12-30T20:36:00.934694232Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-mdr-snr"} 2025-12-30T20:36:00.938450393Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-mdr-snr", "error": null, "requeue": false, "requeuAfter": "15s"} 2025-12-30T20:36:14.49619671Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-mdr-snr"} 2025-12-30T20:36:14.500579316Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-mdr-snr", "error": null, "requeue": false, "requeuAfter": "15s"} 2025-12-30T20:36:24.682536913Z INFO nodehealthcheck-resource validate delete {"name": "nhc-mdr-snr"} 2025-12-30T20:36:24.685861451Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-mdr-snr"} 2025-12-30T20:36:24.685888921Z INFO controllers.NodeHealthCheck NodeHealthCheck CR not found {"NodeHealthCheck name": "nhc-mdr-snr", "name": "nhc-mdr-snr"} 2025-12-30T20:36:29.501668227Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-mdr-snr"} 2025-12-30T20:36:29.501704148Z INFO controllers.NodeHealthCheck NodeHealthCheck CR not found {"NodeHealthCheck name": "nhc-mdr-snr", "name": "nhc-mdr-snr"} 2025-12-30T20:36:49.16160802Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-test-taint-cleanup"} 2025-12-30T20:36:49.168354962Z INFO controllers.NodeHealthCheck enabling NHC, valid config, no conflicting MHC configured in the cluster {"NodeHealthCheck name": "nhc-test-taint-cleanup"} 2025-12-30T20:36:49.168478755Z DEBUG events [remediation] No issues found, NodeHealthCheck is enabled. {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-test-taint-cleanup","uid":"468874c9-1128-4a8a-a90e-c3909e2e2e3e","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"3662473"}, "reason": "Enabled"} 2025-12-30T20:36:49.175009233Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-test-taint-cleanup", "node": "worker-0-0"} time="2025-12-30T20:36:49Z" level=info msg="invalidating lease" time="2025-12-30T20:36:49Z" level=info msg="getting lease" 2025-12-30T20:36:49.178036248Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-test-taint-cleanup", "node": "worker-0-1"} time="2025-12-30T20:36:49Z" level=info msg="invalidating lease" time="2025-12-30T20:36:49Z" level=info msg="getting lease" 2025-12-30T20:36:49.180945242Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-test-taint-cleanup", "node": "worker-0-2"} time="2025-12-30T20:36:49Z" level=info msg="invalidating lease" time="2025-12-30T20:36:49Z" level=info msg="getting lease" 2025-12-30T20:36:49.184029647Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-test-taint-cleanup", "new status": {"observedNodes":3,"healthyNodes":3,"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-12-30T20:36:49Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Enabled","reason":"NHC is enabled, no ongoing remediation"}, "patch": "{\"status\":{\"conditions\":[{\"lastTransitionTime\":\"2025-12-30T20:36:49Z\",\"message\":\"No issues found, NodeHealthCheck is enabled.\",\"reason\":\"NodeHealthCheckEnabled\",\"status\":\"False\",\"type\":\"Disabled\"}],\"healthyNodes\":3,\"observedNodes\":3,\"phase\":\"Enabled\",\"reason\":\"NHC is enabled, no ongoing remediation\"}}"} 2025-12-30T20:36:49.389914807Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-test-taint-cleanup", "error": null, "requeue": false, "requeuAfter": "0s"} 2025-12-30T20:40:48.34069386Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0-2", "NHC": "nhc-test-taint-cleanup"} 2025-12-30T20:40:48.340787853Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0-2", "NHC": "nhc-test-taint-cleanup"} 2025-12-30T20:40:48.340853025Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-test-taint-cleanup"} 2025-12-30T20:40:48.349485072Z INFO controllers.NodeHealthCheck Node is going to match unhealthy condition {"node": "worker-0-2", "condition type": "Ready", "condition status": "Unknown", "duration left": "29.650518005s"} 2025-12-30T20:40:48.357398534Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-test-taint-cleanup", "node": "worker-0-1"} time="2025-12-30T20:40:48Z" level=info msg="invalidating lease" time="2025-12-30T20:40:48Z" level=info msg="getting lease" 2025-12-30T20:40:48.461828584Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-test-taint-cleanup", "node": "worker-0-0"} time="2025-12-30T20:40:48Z" level=info msg="invalidating lease" time="2025-12-30T20:40:48Z" level=info msg="getting lease" 2025-12-30T20:40:48.561805758Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-test-taint-cleanup", "new status": {"observedNodes":3,"healthyNodes":2,"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-12-30T20:36:49Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Enabled","reason":"NHC is enabled, no ongoing remediation","lastUpdateTime":"2025-12-30T20:36:49Z"}, "patch": "{\"status\":{\"healthyNodes\":2}}"} 2025-12-30T20:40:48.770499358Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-test-taint-cleanup", "error": null, "requeue": false, "requeuAfter": "30.650518005s"} 2025-12-30T20:41:19.422232586Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-test-taint-cleanup"} 2025-12-30T20:41:19.428883187Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-2", "condition type": "Ready", "condition status": "Unknown"} 2025-12-30T20:41:19.429051893Z DEBUG events [remediation] Node matches unhealthy condition. Node "worker-0-2", condition type "Ready", condition status "Unknown" {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-test-taint-cleanup","uid":"468874c9-1128-4a8a-a90e-c3909e2e2e3e","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"3664069"}, "reason": "DetectedUnhealthy"} 2025-12-30T20:41:19.436463313Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-test-taint-cleanup", "node": "worker-0-1"} time="2025-12-30T20:41:19Z" level=info msg="invalidating lease" time="2025-12-30T20:41:19Z" level=info msg="getting lease" 2025-12-30T20:41:19.439395414Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-test-taint-cleanup", "node": "worker-0-0"} time="2025-12-30T20:41:19Z" level=info msg="invalidating lease" time="2025-12-30T20:41:19Z" level=info msg="getting lease" 2025-12-30T20:41:19.442282275Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-test-taint-cleanup", "node": "worker-0-2"} 2025-12-30T20:41:19.445904937Z INFO controllers.NodeHealthCheck.resource manager Attempting to obtain Node Lease {"Node name": "worker-0-2"} time="2025-12-30T20:41:19Z" level=info msg="request lease" time="2025-12-30T20:41:19Z" level=info msg="getting lease" time="2025-12-30T20:41:19Z" level=info msg="create lease" 2025-12-30T20:41:19.448824237Z INFO controllers.NodeHealthCheck.resource manager Creating a remediation CR {"CR name": "", "CR kind": "FenceAgentsRemediation", "namespace": "openshift-workload-availability"} 2025-12-30T20:41:19.457670236Z DEBUG events [remediation] Created remediation object for node worker-0-2 {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-test-taint-cleanup","uid":"468874c9-1128-4a8a-a90e-c3909e2e2e3e","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"3664069"}, "reason": "RemediationCreated"} 2025-12-30T20:41:19.537139824Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-test-taint-cleanup", "new status": {"observedNodes":3,"healthyNodes":2,"unhealthyNodes":[{"name":"worker-0-2","remediations":[{"resource":{"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-2-mb2sz","uid":"96b52e5c-0cc8-4bbb-a425-7a20717b5067","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1"},"started":"2025-12-30T20:41:19Z","templateName":"far-template-short-timeout"}]}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-12-30T20:36:49Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Remediating","reason":"NHC is remediating 1 nodes","lastUpdateTime":"2025-12-30T20:40:48Z"}, "patch": "{\"status\":{\"phase\":\"Remediating\",\"reason\":\"NHC is remediating 1 nodes\",\"unhealthyNodes\":[{\"name\":\"worker-0-2\",\"remediations\":[{\"resource\":{\"apiVersion\":\"fence-agents-remediation.medik8s.io/v1alpha1\",\"kind\":\"FenceAgentsRemediation\",\"name\":\"worker-0-2-mb2sz\",\"namespace\":\"openshift-workload-availability\",\"uid\":\"96b52e5c-0cc8-4bbb-a425-7a20717b5067\"},\"started\":\"2025-12-30T20:41:19Z\",\"templateName\":\"far-template-short-timeout\"}]}]}}"} 2025-12-30T20:41:19.570068281Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-test-taint-cleanup","uid":"468874c9-1128-4a8a-a90e-c3909e2e2e3e","controller":false}} 2025-12-30T20:41:19.570204508Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-test-taint-cleanup", "Remediation CR Name": "worker-0-2-mb2sz", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-12-30T20:41:19.570227879Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-test-taint-cleanup","uid":"468874c9-1128-4a8a-a90e-c3909e2e2e3e","controller":false}} 2025-12-30T20:41:19.570243411Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-test-taint-cleanup", "Remediation CR Name": "worker-0-2-mb2sz", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-12-30T20:41:19.578044547Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-test-taint-cleanup","uid":"468874c9-1128-4a8a-a90e-c3909e2e2e3e","controller":false}} 2025-12-30T20:41:19.578099497Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-test-taint-cleanup", "Remediation CR Name": "worker-0-2-mb2sz", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-12-30T20:41:19.57811067Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-test-taint-cleanup","uid":"468874c9-1128-4a8a-a90e-c3909e2e2e3e","controller":false}} 2025-12-30T20:41:19.57811546Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-test-taint-cleanup", "Remediation CR Name": "worker-0-2-mb2sz", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-12-30T20:41:19.743089127Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-test-taint-cleanup", "error": null, "requeue": false, "requeuAfter": "1m0s"} 2025-12-30T20:41:19.743279563Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-test-taint-cleanup"} 2025-12-30T20:41:19.751719405Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-2", "condition type": "Ready", "condition status": "Unknown"} 2025-12-30T20:41:19.759618083Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-test-taint-cleanup", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-12-30T20:41:19Z"} 2025-12-30T20:41:19.759655974Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-test-taint-cleanup", "node": "worker-0-0"} time="2025-12-30T20:41:19Z" level=info msg="invalidating lease" time="2025-12-30T20:41:19Z" level=info msg="getting lease" 2025-12-30T20:41:19.763624764Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-test-taint-cleanup", "node": "worker-0-1"} time="2025-12-30T20:41:19Z" level=info msg="invalidating lease" time="2025-12-30T20:41:19Z" level=info msg="getting lease" 2025-12-30T20:41:19.76744979Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-test-taint-cleanup", "node": "worker-0-2"} 2025-12-30T20:41:19.7715201Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists{"CR name": "worker-0-2-mb2sz", "kind": "FenceAgentsRemediation", "namespace": "openshift-workload-availability"} time="2025-12-30T20:41:19Z" level=info msg="getting lease" 2025-12-30T20:41:19.771597502Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-test-taint-cleanup", "lease name": "node-worker-0-2", "NHC is lease owner": true, "lease expiration time": "1m0s"} 2025-12-30T20:41:19.771629819Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-test-taint-cleanup", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-12-30T20:41:19Z"} 2025-12-30T20:41:19.775118239Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-test-taint-cleanup", "error": null, "requeue": false, "requeuAfter": "59.228375828s"} 2025-12-30T20:41:20.997662283Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-test-taint-cleanup","uid":"468874c9-1128-4a8a-a90e-c3909e2e2e3e","controller":false}} 2025-12-30T20:41:20.997701967Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-test-taint-cleanup", "Remediation CR Name": "worker-0-2-mb2sz", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-12-30T20:41:20.997717068Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-test-taint-cleanup","uid":"468874c9-1128-4a8a-a90e-c3909e2e2e3e","controller":false}} 2025-12-30T20:41:20.997723574Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-test-taint-cleanup", "Remediation CR Name": "worker-0-2-mb2sz", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-12-30T20:41:20.997781253Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-test-taint-cleanup"} 2025-12-30T20:41:21.005332638Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-2", "condition type": "Ready", "condition status": "Unknown"} 2025-12-30T20:41:21.01214124Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-test-taint-cleanup", "type": "Succeeded", "status": "False", "reason": "", "message": "", "lastTransition": "2025-12-30T20:41:20Z"} 2025-12-30T20:41:21.01219204Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-test-taint-cleanup", "node": "worker-0-0"} time="2025-12-30T20:41:21Z" level=info msg="invalidating lease" time="2025-12-30T20:41:21Z" level=info msg="getting lease" 2025-12-30T20:41:21.016508954Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-test-taint-cleanup", "node": "worker-0-1"} time="2025-12-30T20:41:21Z" level=info msg="invalidating lease" time="2025-12-30T20:41:21Z" level=info msg="getting lease" 2025-12-30T20:41:21.02045011Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-test-taint-cleanup", "node": "worker-0-2"} 2025-12-30T20:41:21.025748331Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists{"CR name": "worker-0-2-mb2sz", "kind": "FenceAgentsRemediation", "namespace": "openshift-workload-availability"} time="2025-12-30T20:41:21Z" level=info msg="getting lease" 2025-12-30T20:41:21.025836714Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-test-taint-cleanup", "lease name": "node-worker-0-2", "NHC is lease owner": true, "lease expiration time": "1m0s"} 2025-12-30T20:41:21.025862759Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-test-taint-cleanup", "type": "Succeeded", "status": "False", "reason": "", "message": "", "lastTransition": "2025-12-30T20:41:20Z"} 2025-12-30T20:41:21.025874857Z INFO controllers.NodeHealthCheck remediation failed {"NodeHealthCheck name": "nhc-test-taint-cleanup"} 2025-12-30T20:41:21.025881975Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-test-taint-cleanup", "type": "Succeeded", "status": "False", "reason": "", "message": "", "lastTransition": "2025-12-30T20:41:20Z"} 2025-12-30T20:41:21.033110559Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-test-taint-cleanup","uid":"468874c9-1128-4a8a-a90e-c3909e2e2e3e","controller":false}} 2025-12-30T20:41:21.033141238Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-test-taint-cleanup", "Remediation CR Name": "worker-0-2-mb2sz", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-12-30T20:41:21.033152906Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-test-taint-cleanup","uid":"468874c9-1128-4a8a-a90e-c3909e2e2e3e","controller":false}} 2025-12-30T20:41:21.033156854Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-test-taint-cleanup", "Remediation CR Name": "worker-0-2-mb2sz", "Remediation CR Kind": "FenceAgentsRemediation"} 2025-12-30T20:41:21.113560358Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-test-taint-cleanup", "new status": {"observedNodes":3,"healthyNodes":2,"unhealthyNodes":[{"name":"worker-0-2","remediations":[{"resource":{"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-2-mb2sz","uid":"96b52e5c-0cc8-4bbb-a425-7a20717b5067","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1"},"started":"2025-12-30T20:41:19Z","timedOut":"2025-12-30T20:41:21Z","templateName":"far-template-short-timeout"}]}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-12-30T20:36:49Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Remediating","reason":"NHC is remediating 1 nodes","lastUpdateTime":"2025-12-30T20:41:19Z"}, "patch": "{\"status\":{\"unhealthyNodes\":[{\"name\":\"worker-0-2\",\"remediations\":[{\"resource\":{\"apiVersion\":\"fence-agents-remediation.medik8s.io/v1alpha1\",\"kind\":\"FenceAgentsRemediation\",\"name\":\"worker-0-2-mb2sz\",\"namespace\":\"openshift-workload-availability\",\"uid\":\"96b52e5c-0cc8-4bbb-a425-7a20717b5067\"},\"started\":\"2025-12-30T20:41:19Z\",\"templateName\":\"far-template-short-timeout\",\"timedOut\":\"2025-12-30T20:41:21Z\"}]}]}}"} 2025-12-30T20:41:21.320861007Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-test-taint-cleanup", "error": null, "requeue": false, "requeuAfter": "1s"} 2025-12-30T20:41:21.320999091Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-test-taint-cleanup"} 2025-12-30T20:41:21.328717453Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-2", "condition type": "Ready", "condition status": "Unknown"} 2025-12-30T20:41:21.336362005Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-test-taint-cleanup", "type": "Succeeded", "status": "False", "reason": "", "message": "", "lastTransition": "2025-12-30T20:41:20Z"} 2025-12-30T20:41:21.336471593Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-test-taint-cleanup", "node": "worker-0-0"} time="2025-12-30T20:41:21Z" level=info msg="invalidating lease" time="2025-12-30T20:41:21Z" level=info msg="getting lease" 2025-12-30T20:41:21.340706752Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-test-taint-cleanup", "node": "worker-0-1"} time="2025-12-30T20:41:21Z" level=info msg="invalidating lease" time="2025-12-30T20:41:21Z" level=info msg="getting lease" 2025-12-30T20:41:21.344710865Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-test-taint-cleanup", "node": "worker-0-2"} 2025-12-30T20:41:21.349116155Z INFO controllers.NodeHealthCheck.resource manager Attempting to obtain Node Lease {"Node name": "worker-0-2"} time="2025-12-30T20:41:21Z" level=info msg="request lease" time="2025-12-30T20:41:21Z" level=info msg="getting lease" time="2025-12-30T20:41:21Z" level=info msg="renew lease owned by NodeHealthCheck-nhc-test-taint-cleanup setAcquireTime=false" 2025-12-30T20:41:21.352772873Z INFO controllers.NodeHealthCheck.resource manager Creating a remediation CR {"CR name": "", "CR kind": "SelfNodeRemediation", "namespace": "openshift-workload-availability"} 2025-12-30T20:41:21.363380664Z DEBUG events [remediation] Created remediation object for node worker-0-2 {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-test-taint-cleanup","uid":"468874c9-1128-4a8a-a90e-c3909e2e2e3e","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"3665190"}, "reason": "RemediationCreated"} 2025-12-30T20:41:21.437374958Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-test-taint-cleanup", "new status": {"observedNodes":3,"healthyNodes":2,"unhealthyNodes":[{"name":"worker-0-2","remediations":[{"resource":{"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-2-mb2sz","uid":"96b52e5c-0cc8-4bbb-a425-7a20717b5067","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1"},"started":"2025-12-30T20:41:19Z","timedOut":"2025-12-30T20:41:21Z","templateName":"far-template-short-timeout"},{"resource":{"kind":"SelfNodeRemediation","namespace":"openshift-workload-availability","name":"worker-0-2-f8g7w","uid":"4797751d-233c-4314-a4ec-c677f3696daf","apiVersion":"self-node-remediation.medik8s.io/v1alpha1"},"started":"2025-12-30T20:41:21Z","templateName":"snr-template-short-timeout"}]}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-12-30T20:36:49Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Remediating","reason":"NHC is remediating 1 nodes","lastUpdateTime":"2025-12-30T20:41:21Z"}, "patch": "{\"status\":{\"unhealthyNodes\":[{\"name\":\"worker-0-2\",\"remediations\":[{\"resource\":{\"apiVersion\":\"fence-agents-remediation.medik8s.io/v1alpha1\",\"kind\":\"FenceAgentsRemediation\",\"name\":\"worker-0-2-mb2sz\",\"namespace\":\"openshift-workload-availability\",\"uid\":\"96b52e5c-0cc8-4bbb-a425-7a20717b5067\"},\"started\":\"2025-12-30T20:41:19Z\",\"templateName\":\"far-template-short-timeout\",\"timedOut\":\"2025-12-30T20:41:21Z\"},{\"resource\":{\"apiVersion\":\"self-node-remediation.medik8s.io/v1alpha1\",\"kind\":\"SelfNodeRemediation\",\"name\":\"worker-0-2-f8g7w\",\"namespace\":\"openshift-workload-availability\",\"uid\":\"4797751d-233c-4314-a4ec-c677f3696daf\"},\"started\":\"2025-12-30T20:41:21Z\",\"templateName\":\"snr-template-short-timeout\"}]}]}}"} 2025-12-30T20:41:21.573542933Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-test-taint-cleanup","uid":"468874c9-1128-4a8a-a90e-c3909e2e2e3e","controller":false}} 2025-12-30T20:41:21.573590713Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-test-taint-cleanup", "Remediation CR Name": "worker-0-2-f8g7w", "Remediation CR Kind": "SelfNodeRemediation"} 2025-12-30T20:41:21.573602727Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-test-taint-cleanup","uid":"468874c9-1128-4a8a-a90e-c3909e2e2e3e","controller":false}} 2025-12-30T20:41:21.573606816Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-test-taint-cleanup", "Remediation CR Name": "worker-0-2-f8g7w", "Remediation CR Kind": "SelfNodeRemediation"} 2025-12-30T20:41:21.578519382Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-test-taint-cleanup","uid":"468874c9-1128-4a8a-a90e-c3909e2e2e3e","controller":false}} 2025-12-30T20:41:21.578571566Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-test-taint-cleanup", "Remediation CR Name": "worker-0-2-f8g7w", "Remediation CR Kind": "SelfNodeRemediation"} 2025-12-30T20:41:21.578584014Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-test-taint-cleanup","uid":"468874c9-1128-4a8a-a90e-c3909e2e2e3e","controller":false}} 2025-12-30T20:41:21.578589767Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-test-taint-cleanup", "Remediation CR Name": "worker-0-2-f8g7w", "Remediation CR Kind": "SelfNodeRemediation"} 2025-12-30T20:41:21.603677443Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-test-taint-cleanup","uid":"468874c9-1128-4a8a-a90e-c3909e2e2e3e","controller":false}} 2025-12-30T20:41:21.603716558Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-test-taint-cleanup", "Remediation CR Name": "worker-0-2-f8g7w", "Remediation CR Kind": "SelfNodeRemediation"} 2025-12-30T20:41:21.60372808Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-test-taint-cleanup","uid":"468874c9-1128-4a8a-a90e-c3909e2e2e3e","controller":false}} 2025-12-30T20:41:21.603731973Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-test-taint-cleanup", "Remediation CR Name": "worker-0-2-f8g7w", "Remediation CR Kind": "SelfNodeRemediation"} 2025-12-30T20:41:21.644815011Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-test-taint-cleanup", "error": null, "requeue": false, "requeuAfter": "1m0s"} 2025-12-30T20:41:21.644911399Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-test-taint-cleanup"} 2025-12-30T20:41:21.653167839Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-2", "condition type": "Ready", "condition status": "Unknown"} 2025-12-30T20:41:21.665581357Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-test-taint-cleanup", "type": "Succeeded", "status": "False", "reason": "", "message": "", "lastTransition": "2025-12-30T20:41:20Z"} 2025-12-30T20:41:21.665623998Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-test-taint-cleanup", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-12-30T20:41:21Z"} 2025-12-30T20:41:21.665630288Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-test-taint-cleanup", "node": "worker-0-0"} time="2025-12-30T20:41:21Z" level=info msg="invalidating lease" time="2025-12-30T20:41:21Z" level=info msg="getting lease" 2025-12-30T20:41:21.767283041Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-test-taint-cleanup", "node": "worker-0-1"} time="2025-12-30T20:41:21Z" level=info msg="invalidating lease" time="2025-12-30T20:41:21Z" level=info msg="getting lease" 2025-12-30T20:41:21.771550415Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-test-taint-cleanup", "node": "worker-0-2"} 2025-12-30T20:41:21.775191167Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists{"CR name": "worker-0-2-f8g7w", "kind": "SelfNodeRemediation", "namespace": "openshift-workload-availability"} time="2025-12-30T20:41:21Z" level=info msg="getting lease" 2025-12-30T20:41:21.775256153Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-test-taint-cleanup", "lease name": "node-worker-0-2", "NHC is lease owner": true, "lease expiration time": "1m0s"} 2025-12-30T20:41:21.775281975Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-test-taint-cleanup", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-12-30T20:41:21Z"} 2025-12-30T20:41:21.779482736Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-test-taint-cleanup", "error": null, "requeue": false, "requeuAfter": "59.224721976s"} 2025-12-30T20:41:22.322257372Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-test-taint-cleanup"} 2025-12-30T20:41:22.329540514Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-2", "condition type": "Ready", "condition status": "Unknown"} 2025-12-30T20:41:22.336812215Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-test-taint-cleanup", "type": "Succeeded", "status": "False", "reason": "", "message": "", "lastTransition": "2025-12-30T20:41:20Z"} 2025-12-30T20:41:22.336919935Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-test-taint-cleanup", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-12-30T20:41:21Z"} 2025-12-30T20:41:22.336937614Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-test-taint-cleanup", "node": "worker-0-0"} time="2025-12-30T20:41:22Z" level=info msg="invalidating lease" time="2025-12-30T20:41:22Z" level=info msg="getting lease" 2025-12-30T20:41:22.341359619Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-test-taint-cleanup", "node": "worker-0-1"} time="2025-12-30T20:41:22Z" level=info msg="invalidating lease" time="2025-12-30T20:41:22Z" level=info msg="getting lease" 2025-12-30T20:41:22.345627459Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-test-taint-cleanup", "node": "worker-0-2"} 2025-12-30T20:41:22.34930954Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists{"CR name": "worker-0-2-f8g7w", "kind": "SelfNodeRemediation", "namespace": "openshift-workload-availability"} time="2025-12-30T20:41:22Z" level=info msg="getting lease" 2025-12-30T20:41:22.349387615Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-test-taint-cleanup", "lease name": "node-worker-0-2", "NHC is lease owner": true, "lease expiration time": "1m0s"} 2025-12-30T20:41:22.34943982Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-test-taint-cleanup", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-12-30T20:41:21Z"} 2025-12-30T20:41:22.353156675Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-test-taint-cleanup", "error": null, "requeue": false, "requeuAfter": "58.650563428s"} 2025-12-30T20:41:22.611712661Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-test-taint-cleanup","uid":"468874c9-1128-4a8a-a90e-c3909e2e2e3e","controller":false}} 2025-12-30T20:41:22.611757321Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-test-taint-cleanup", "Remediation CR Name": "worker-0-2-f8g7w", "Remediation CR Kind": "SelfNodeRemediation"} 2025-12-30T20:41:22.611775336Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-test-taint-cleanup","uid":"468874c9-1128-4a8a-a90e-c3909e2e2e3e","controller":false}} 2025-12-30T20:41:22.611782258Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-test-taint-cleanup", "Remediation CR Name": "worker-0-2-f8g7w", "Remediation CR Kind": "SelfNodeRemediation"} 2025-12-30T20:41:22.611827599Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-test-taint-cleanup"} 2025-12-30T20:41:22.618866005Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-2", "condition type": "Ready", "condition status": "Unknown"} 2025-12-30T20:41:22.624970426Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-test-taint-cleanup", "type": "Succeeded", "status": "False", "reason": "", "message": "", "lastTransition": "2025-12-30T20:41:20Z"} 2025-12-30T20:41:22.625000948Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-test-taint-cleanup", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-12-30T20:41:21Z"} 2025-12-30T20:41:22.625006646Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-test-taint-cleanup", "node": "worker-0-1"} time="2025-12-30T20:41:22Z" level=info msg="invalidating lease" time="2025-12-30T20:41:22Z" level=info msg="getting lease" 2025-12-30T20:41:22.635270538Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-test-taint-cleanup", "node": "worker-0-0"} time="2025-12-30T20:41:22Z" level=info msg="invalidating lease" time="2025-12-30T20:41:22Z" level=info msg="getting lease" 2025-12-30T20:41:22.639351719Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-test-taint-cleanup", "node": "worker-0-2"} 2025-12-30T20:41:22.64355438Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists{"CR name": "worker-0-2-f8g7w", "kind": "SelfNodeRemediation", "namespace": "openshift-workload-availability"} time="2025-12-30T20:41:22Z" level=info msg="getting lease" 2025-12-30T20:41:22.643654175Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-test-taint-cleanup", "lease name": "node-worker-0-2", "NHC is lease owner": true, "lease expiration time": "1m0s"} 2025-12-30T20:41:22.64367836Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-test-taint-cleanup", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-12-30T20:41:21Z"} 2025-12-30T20:41:22.648762073Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-test-taint-cleanup", "error": null, "requeue": false, "requeuAfter": "58.356325335s"} 2025-12-30T20:42:21.003924547Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-test-taint-cleanup"} 2025-12-30T20:42:21.01028845Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-2", "condition type": "Ready", "condition status": "Unknown"} 2025-12-30T20:42:21.016302749Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-test-taint-cleanup", "type": "Succeeded", "status": "False", "reason": "", "message": "", "lastTransition": "2025-12-30T20:41:20Z"} 2025-12-30T20:42:21.016366957Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-test-taint-cleanup", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-12-30T20:41:21Z"} 2025-12-30T20:42:21.01638417Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-test-taint-cleanup", "node": "worker-0-0"} time="2025-12-30T20:42:21Z" level=info msg="invalidating lease" time="2025-12-30T20:42:21Z" level=info msg="getting lease" 2025-12-30T20:42:21.019807266Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-test-taint-cleanup", "node": "worker-0-1"} time="2025-12-30T20:42:21Z" level=info msg="invalidating lease" time="2025-12-30T20:42:21Z" level=info msg="getting lease" 2025-12-30T20:42:21.023323405Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-test-taint-cleanup", "node": "worker-0-2"} 2025-12-30T20:42:21.026532689Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists{"CR name": "worker-0-2-f8g7w", "kind": "SelfNodeRemediation", "namespace": "openshift-workload-availability"} time="2025-12-30T20:42:21Z" level=info msg="getting lease" 2025-12-30T20:42:21.026625313Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-test-taint-cleanup", "lease name": "node-worker-0-2", "NHC is lease owner": true, "lease expiration time": "1m0s"} 2025-12-30T20:42:21.026658286Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-test-taint-cleanup", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-12-30T20:41:21Z"} 2025-12-30T20:42:21.026675053Z INFO controllers.NodeHealthCheck remediation timed out {"NodeHealthCheck name": "nhc-test-taint-cleanup"} 2025-12-30T20:42:21.026687207Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-test-taint-cleanup", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2025-12-30T20:41:21Z"} 2025-12-30T20:42:21.033491378Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-test-taint-cleanup","uid":"468874c9-1128-4a8a-a90e-c3909e2e2e3e","controller":false}} 2025-12-30T20:42:21.03358705Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-test-taint-cleanup", "Remediation CR Name": "worker-0-2-f8g7w", "Remediation CR Kind": "SelfNodeRemediation"} 2025-12-30T20:42:21.033610292Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-test-taint-cleanup","uid":"468874c9-1128-4a8a-a90e-c3909e2e2e3e","controller":false}} 2025-12-30T20:42:21.033625027Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-test-taint-cleanup", "Remediation CR Name": "worker-0-2-f8g7w", "Remediation CR Kind": "SelfNodeRemediation"} 2025-12-30T20:42:21.039472404Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-test-taint-cleanup","uid":"468874c9-1128-4a8a-a90e-c3909e2e2e3e","controller":false}} 2025-12-30T20:42:21.039501246Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-test-taint-cleanup", "Remediation CR Name": "worker-0-2-f8g7w", "Remediation CR Kind": "SelfNodeRemediation"} 2025-12-30T20:42:21.039511717Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-test-taint-cleanup","uid":"468874c9-1128-4a8a-a90e-c3909e2e2e3e","controller":false}} 2025-12-30T20:42:21.039516467Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-test-taint-cleanup", "Remediation CR Name": "worker-0-2-f8g7w", "Remediation CR Kind": "SelfNodeRemediation"} 2025-12-30T20:42:21.118167396Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-test-taint-cleanup", "new status": {"observedNodes":3,"healthyNodes":2,"unhealthyNodes":[{"name":"worker-0-2","remediations":[{"resource":{"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-2-mb2sz","uid":"96b52e5c-0cc8-4bbb-a425-7a20717b5067","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1"},"started":"2025-12-30T20:41:19Z","timedOut":"2025-12-30T20:41:21Z","templateName":"far-template-short-timeout"},{"resource":{"kind":"SelfNodeRemediation","namespace":"openshift-workload-availability","name":"worker-0-2-f8g7w","uid":"4797751d-233c-4314-a4ec-c677f3696daf","apiVersion":"self-node-remediation.medik8s.io/v1alpha1"},"started":"2025-12-30T20:41:21Z","timedOut":"2025-12-30T20:42:21Z","templateName":"snr-template-short-timeout"}]}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2025-12-30T20:36:49Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."}],"phase":"Remediating","reason":"NHC is remediating 1 nodes","lastUpdateTime":"2025-12-30T20:41:21Z"}, "patch": "{\"status\":{\"unhealthyNodes\":[{\"name\":\"worker-0-2\",\"remediations\":[{\"resource\":{\"apiVersion\":\"fence-agents-remediation.medik8s.io/v1alpha1\",\"kind\":\"FenceAgentsRemediation\",\"name\":\"worker-0-2-mb2sz\",\"namespace\":\"openshift-workload-availability\",\"uid\":\"96b52e5c-0cc8-4bbb-a425-7a20717b5067\"},\"started\":\"2025-12-30T20:41:19Z\",\"templateName\":\"far-template-short-timeout\",\"timedOut\":\"2025-12-30T20:41:21Z\"},{\"resource\":{\"apiVersion\":\"self-node-remediation.medik8s.io/v1alpha1\",\"kind\":\"SelfNodeRemediation\",\"name\":\"worker-0-2-f8g7w\",\"namespace\":\"openshift-workload-availability\",\"uid\":\"4797751d-233c-4314-a4ec-c677f3696daf\"},\"started\":\"2025-12-30T20:41:21Z\",\"templateName\":\"snr-template-short-timeout\",\"timedOut\":\"2025-12-30T20:42:21Z\"}]}]}}"} 2025-12-30T20:42:21.324520094Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-test-taint-cleanup", "error": null, "requeue": false, "requeuAfter": "1s"} 2025-12-30T20:42:21.324620576Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-test-taint-cleanup"} 2025-12-30T20:42:21.331184351Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-2", "condition type": "Ready", "condition status": "Unknown"} 2025-12-30T20:42:21.337099863Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-test-taint-cleanup", "type": "Succeeded", "status": "False", "reason": "", "message": "", "lastTransition": "2025-12-30T20:41:20Z"} 2025-12-30T20:42:21.337123382Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-test-taint-cleanup", "type": "Succeeded", "status": "False", "reason": "", "message": "", "lastTransition": "2025-12-30T20:42:21Z"} 2025-12-30T20:42:21.337128985Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-test-taint-cleanup", "node": "worker-0-0"} time="2025-12-30T20:42:21Z" level=info msg="invalidating lease" time="2025-12-30T20:42:21Z" level=info msg="getting lease" 2025-12-30T20:42:21.340491711Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-test-taint-cleanup", "node": "worker-0-1"} time="2025-12-30T20:42:21Z" level=info msg="invalidating lease" time="2025-12-30T20:42:21Z" level=info msg="getting lease" 2025-12-30T20:42:21.343772642Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-test-taint-cleanup", "node": "worker-0-2"} 2025-12-30T20:42:21.343786399Z ERROR controllers.NodeHealthCheck Remediation timed out, and no template left to try {"NodeHealthCheck name": "nhc-test-taint-cleanup", "error": "didn't find a template to use for NHC nhc-test-taint-cleanup and node worker-0-2"} github.com/medik8s/node-healthcheck-operator/controllers.(*NodeHealthCheckReconciler).remediate /app/node-healthcheck-operator/controllers/nodehealthcheck_controller.go:536 github.com/medik8s/node-healthcheck-operator/controllers.(*NodeHealthCheckReconciler).Reconcile /app/node-healthcheck-operator/controllers/nodehealthcheck_controller.go:365 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Reconcile /app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:119 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler /app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:334 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem /app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:294 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.2 /app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:255 2025-12-30T20:42:21.343910945Z DEBUG events [remediation] Remediation timed out, and no template left to try. didn't find a template to use for NHC nhc-test-taint-cleanup and node worker-0-2 {"type": "Warning", "object": {"kind":"NodeHealthCheck","name":"nhc-test-taint-cleanup","uid":"468874c9-1128-4a8a-a90e-c3909e2e2e3e","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"3666024"}, "reason": "NoTemplateLeft"} 2025-12-30T20:42:21.438658738Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-test-taint-cleanup", "error": null, "requeue": false, "requeuAfter": "47h59m57.561565164s"} 2025-12-30T20:42:22.324696157Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-test-taint-cleanup"} 2025-12-30T20:42:22.331642004Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0-2", "condition type": "Ready", "condition status": "Unknown"} 2025-12-30T20:42:22.337826933Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-test-taint-cleanup", "type": "Succeeded", "status": "False", "reason": "", "message": "", "lastTransition": "2025-12-30T20:41:20Z"} 2025-12-30T20:42:22.337852883Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-test-taint-cleanup", "type": "Succeeded", "status": "False", "reason": "", "message": "", "lastTransition": "2025-12-30T20:42:21Z"} 2025-12-30T20:42:22.337860026Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-test-taint-cleanup", "node": "worker-0-0"} time="2025-12-30T20:42:22Z" level=info msg="invalidating lease" time="2025-12-30T20:42:22Z" level=info msg="getting lease" 2025-12-30T20:42:22.341260347Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-test-taint-cleanup", "node": "worker-0-1"} time="2025-12-30T20:42:22Z" level=info msg="invalidating lease" time="2025-12-30T20:42:22Z" level=info msg="getting lease" 2025-12-30T20:42:22.344672336Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-test-taint-cleanup", "node": "worker-0-2"} 2025-12-30T20:42:22.344696519Z ERROR controllers.NodeHealthCheck Remediation timed out, and no template left to try {"NodeHealthCheck name": "nhc-test-taint-cleanup", "error": "didn't find a template to use for NHC nhc-test-taint-cleanup and node worker-0-2"} github.com/medik8s/node-healthcheck-operator/controllers.(*NodeHealthCheckReconciler).remediate /app/node-healthcheck-operator/controllers/nodehealthcheck_controller.go:536 github.com/medik8s/node-healthcheck-operator/controllers.(*NodeHealthCheckReconciler).Reconcile /app/node-healthcheck-operator/controllers/nodehealthcheck_controller.go:365 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Reconcile /app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:119 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler /app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:334 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem /app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:294 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.2 /app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:255 2025-12-30T20:42:22.344853133Z DEBUG events [remediation] Remediation timed out, and no template left to try. didn't find a template to use for NHC nhc-test-taint-cleanup and node worker-0-2 {"type": "Warning", "object": {"kind":"NodeHealthCheck","name":"nhc-test-taint-cleanup","uid":"468874c9-1128-4a8a-a90e-c3909e2e2e3e","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"3666024"}, "reason": "NoTemplateLeft"} 2025-12-30T20:42:22.348406489Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-test-taint-cleanup", "error": null, "requeue": false, "requeuAfter": "47h59m56.651805496s"} SNR: ============================================================================================================================ [root@cert-rhosp-02 ~]# oc logs self-node-remediation-controller-manager-57c976f744-m4xfw Defaulted container "manager" out of: manager, kube-rbac-proxy 2025-12-30T20:26:42.767735767Z INFO setup Go Version: go1.24.4 (Red Hat 1.24.4-2.el9) X:strictfipsruntime 2025-12-30T20:26:42.767820493Z INFO setup Go OS/Arch: linux/amd64 2025-12-30T20:26:42.76782342Z INFO setup Operator Version: d80706e 2025-12-30T20:26:42.767825229Z INFO setup Git Commit: d80706e1c305b9b65e00dee1b7cb653dcb1705e7 2025-12-30T20:26:42.767827044Z INFO setup Build Date: 2025-11-03T07:49:14+00:00 2025-12-30T20:26:42.767828786Z INFO setup HTTP/2 for metrics and webhook server disabled 2025-12-30T20:26:42.780834353Z INFO utils-taints out of service taint strategy {"isSupported": true, "k8sMajorVersion": 1, "k8sMinorVersion": 34} 2025-12-30T20:26:42.780871767Z INFO utils-taints out of service taint strategy {"isGA": true, "k8sMajorVersion": 1, "k8sMinorVersion": 34} 2025-12-30T20:26:42.780879977Z INFO setup Starting as a manager that installs the daemonset 2025-12-30T20:26:42.78089213Z INFO controller-runtime.builder skip registering a mutating webhook, object does not implement admission.Defaulter or WithDefaulter wasn't called {"GVK": "self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediationConfig"} 2025-12-30T20:26:42.780934896Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediationConfig", "path": "/validate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediationconfig"} 2025-12-30T20:26:42.7810601Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediationconfig"} 2025-12-30T20:26:42.781185603Z INFO controller-runtime.builder Registering a mutating webhook {"GVK": "self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediationTemplate", "path": "/mutate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediationtemplate"} 2025-12-30T20:26:42.781229291Z INFO controller-runtime.webhook Registering webhook {"path": "/mutate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediationtemplate"} 2025-12-30T20:26:42.781255315Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediationTemplate", "path": "/validate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediationtemplate"} 2025-12-30T20:26:42.781295149Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediationtemplate"} 2025-12-30T20:26:42.781324956Z INFO controller-runtime.builder skip registering a mutating webhook, object does not implement admission.Defaulter or WithDefaulter wasn't called {"GVK": "self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediation"} 2025-12-30T20:26:42.781350831Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediation", "path": "/validate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediation"} 2025-12-30T20:26:42.781391777Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediation"} 2025-12-30T20:26:42.781468373Z INFO setup starting manager 2025-12-30T20:26:42.781658151Z INFO controller-runtime.metrics Starting metrics server 2025-12-30T20:26:42.781721071Z INFO controller-runtime.webhook Starting webhook server I1230 20:26:42.781793 1 leaderelection.go:257] attempting to acquire leader lease openshift-workload-availability/547f6cb6.medik8s.io... 2025-12-30T20:26:42.781847954Z INFO controller-runtime.metrics Serving metrics server {"bindAddress": "127.0.0.1:8080", "secure": false} 2025-12-30T20:26:42.781692276Z INFO starting server {"name": "health probe", "addr": "[::]:8081"} 2025-12-30T20:26:42.781987063Z INFO controller-runtime.certwatcher Updated current TLS certificate 2025-12-30T20:26:42.782166239Z INFO controller-runtime.certwatcher Starting certificate poll+watcher {"interval": "10s"} 2025-12-30T20:26:42.782076049Z INFO controller-runtime.webhook Serving webhook server {"host": "", "port": 9443} I1230 20:26:57.872179 1 leaderelection.go:271] successfully acquired lease openshift-workload-availability/547f6cb6.medik8s.io 2025-12-30T20:26:57.872753013Z INFO Starting EventSource {"controller": "selfnoderemediationconfig", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediationConfig", "source": "kind source: *v1alpha1.SelfNodeRemediationConfig"} 2025-12-30T20:26:57.872812959Z INFO Starting EventSource {"controller": "selfnoderemediationconfig", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediationConfig", "source": "kind source: *v1.DaemonSet"} 2025-12-30T20:26:57.872874019Z INFO Starting Controller {"controller": "selfnoderemediationconfig", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediationConfig"} 2025-12-30T20:26:57.873048914Z INFO Starting EventSource {"controller": "selfnoderemediation", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediation", "source": "kind source: *v1alpha1.SelfNodeRemediation"} 2025-12-30T20:26:57.873083524Z INFO Starting Controller {"controller": "selfnoderemediation", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediation"} 2025-12-30T20:26:57.872368024Z DEBUG events self-node-remediation-controller-manager-57c976f744-m4xfw_96a0a92d-89e6-4fcc-ba37-9f444480bdc2 became leader {"type": "Normal", "object": {"kind":"Lease","namespace":"openshift-workload-availability","name":"547f6cb6.medik8s.io","uid":"f1b17f5d-a2c6-4566-b046-79074e3c6bc5","apiVersion":"coordination.k8s.io/v1","resourceVersion":"3658471"}, "reason": "LeaderElection"} 2025-12-30T20:26:57.881388121Z INFO selfnoderemediationconfig-resource validate create {"name": "self-node-remediation-config"} 2025-12-30T20:26:57.976714344Z INFO Starting workers {"controller": "selfnoderemediationconfig", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediationConfig", "worker count": 1} 2025-12-30T20:26:57.976745441Z INFO Starting workers {"controller": "selfnoderemediation", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediation", "worker count": 1} 2025-12-30T20:26:57.976909164Z INFO controllers.SelfNodeRemediationConfig Syncing certs 2025-12-30T20:26:58.079784666Z INFO controllers.SelfNodeRemediationConfig Cert secret already exists 2025-12-30T20:26:58.079836095Z INFO controllers.SelfNodeRemediationConfig.syncConfigDaemonset Start to sync config daemonset 2025-12-30T20:26:58.080572738Z INFO controllers.SelfNodeRemediationConfig Updating DS tolerations 2025/12/30 20:26:58 reconciling (apps/v1, Kind=DaemonSet) openshift-workload-availability/self-node-remediation-ds 2025/12/30 20:26:58 update was successful 2025-12-30T20:26:58.088627476Z INFO controllers.SelfNodeRemediationConfig Syncing certs 2025-12-30T20:26:58.088680972Z INFO controllers.SelfNodeRemediationConfig Cert secret already exists 2025-12-30T20:26:58.088688258Z INFO controllers.SelfNodeRemediationConfig.syncConfigDaemonset Start to sync config daemonset 2025-12-30T20:26:58.089237933Z INFO controllers.SelfNodeRemediationConfig Updating DS tolerations 2025/12/30 20:26:58 reconciling (apps/v1, Kind=DaemonSet) openshift-workload-availability/self-node-remediation-ds 2025/12/30 20:26:58 update was successful 2025-12-30T20:36:49.145019622Z INFO selfnoderemediationtemplate-resource default {"name": "snr-template-short-timeout"} 2025-12-30T20:41:21.360410421Z INFO selfnoderemediation-resource validate create {"name": "worker-0-2-f8g7w"} 2025-12-30T20:41:21.464563444Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-f8g7w","namespace":"openshift-workload-availability"}} 2025-12-30T20:41:21.464667021Z INFO controllers.SelfNodeRemediation pre-reboot not completed yet, prepare for rebooting {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-f8g7w","namespace":"openshift-workload-availability"}} 2025-12-30T20:41:21.464607555Z DEBUG events [remediation] Remediation started by SNR manager {"type": "Normal", "object": {"kind":"SelfNodeRemediation","namespace":"openshift-workload-availability","name":"worker-0-2-f8g7w","uid":"4797751d-233c-4314-a4ec-c677f3696daf","apiVersion":"self-node-remediation.medik8s.io/v1alpha1","resourceVersion":"3665196"}, "reason": "RemediationStarted"} 2025-12-30T20:41:21.570794436Z INFO selfnoderemediation-resource validate update {"name": "worker-0-2-f8g7w"} 2025-12-30T20:41:21.573244121Z INFO controllers.SelfNodeRemediation finalizer added {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-f8g7w","namespace":"openshift-workload-availability"}} 2025-12-30T20:41:21.573380702Z DEBUG events [remediation] Remediation process - successful adding finalizer {"type": "Normal", "object": {"kind":"SelfNodeRemediation","namespace":"openshift-workload-availability","name":"worker-0-2-f8g7w","uid":"4797751d-233c-4314-a4ec-c677f3696daf","apiVersion":"self-node-remediation.medik8s.io/v1alpha1","resourceVersion":"3665201"}, "reason": "AddFinalizer"} 2025-12-30T20:41:21.578745088Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-f8g7w","namespace":"openshift-workload-availability"}} 2025-12-30T20:41:21.578758267Z INFO controllers.SelfNodeRemediation pre-reboot not completed yet, prepare for rebooting {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-f8g7w","namespace":"openshift-workload-availability"}} 2025-12-30T20:41:21.586596828Z INFO controllers.SelfNodeRemediation NoExecute taint added {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-f8g7w","namespace":"openshift-workload-availability"}, "new taints": [{"key":"node.kubernetes.io/unreachable","effect":"NoSchedule","timeAdded":"2025-12-30T20:40:48Z"},{"key":"node.kubernetes.io/unreachable","effect":"NoExecute","timeAdded":"2025-12-30T20:40:48Z"},{"key":"medik8s.io/fence-agents-remediation","effect":"NoExecute","timeAdded":"2025-12-30T20:41:19Z"},{"key":"medik8s.io/remediation","value":"self-node-remediation","effect":"NoExecute","timeAdded":"2025-12-30T20:41:21Z"}]} 2025-12-30T20:41:21.586655204Z INFO controllers.SelfNodeRemediation Marking node as unschedulable {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-f8g7w","namespace":"openshift-workload-availability"}, "node name": "worker-0-2"} 2025-12-30T20:41:21.586945232Z DEBUG events [remediation] Remediation process - NoExecute taint added to the unhealthy node {"type": "Normal", "object": {"kind":"Node","name":"worker-0-2","uid":"687944a6-de97-4df2-9ecb-c35d8a54374e","apiVersion":"v1","resourceVersion":"3665204"}, "reason": "AddNoExecute"} 2025-12-30T20:41:21.597787527Z DEBUG events [remediation] Remediation process - unhealthy node marked as unschedulable {"type": "Normal", "object": {"kind":"Node","name":"worker-0-2","uid":"687944a6-de97-4df2-9ecb-c35d8a54374e","apiVersion":"v1","resourceVersion":"3665207"}, "reason": "MarkUnschedulable"} 2025-12-30T20:41:21.604444005Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-f8g7w","namespace":"openshift-workload-availability"}} 2025-12-30T20:41:21.604463092Z INFO controllers.SelfNodeRemediation pre-reboot not completed yet, prepare for rebooting {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-f8g7w","namespace":"openshift-workload-availability"}} 2025-12-30T20:41:21.604804731Z INFO controllers.SelfNodeRemediation waiting for unschedulable taint to appear {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-f8g7w","namespace":"openshift-workload-availability"}, "node name": "worker-0-2"} 2025-12-30T20:41:22.605455773Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-f8g7w","namespace":"openshift-workload-availability"}} 2025-12-30T20:41:22.60547849Z INFO controllers.SelfNodeRemediation pre-reboot not completed yet, prepare for rebooting {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-f8g7w","namespace":"openshift-workload-availability"}} 2025-12-30T20:41:22.605758358Z INFO rebootDurationCalculator No SafeTimeToAssumeNodeRebootedSeconds specified, using calculated minimum safe reboot time {"calculated minimum time in seconds": 120} 2025-12-30T20:41:22.605771313Z INFO controllers.SelfNodeRemediation setting SNR's time to assume node has been rebooted {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-f8g7w","namespace":"openshift-workload-availability"}, "node name": "worker-0-2", "time": "2025-12-30 20:43:22.605770886 +0000 UTC m=+999.852795496"} 2025-12-30T20:41:22.605914793Z DEBUG events [remediation] Remediation process - about to update required fencing time on snr {"type": "Normal", "object": {"kind":"SelfNodeRemediation","namespace":"openshift-workload-availability","name":"worker-0-2-f8g7w","uid":"4797751d-233c-4314-a4ec-c677f3696daf","apiVersion":"self-node-remediation.medik8s.io/v1alpha1","resourceVersion":"3665209"}, "reason": "UpdateTimeAssumedRebooted"} 2025-12-30T20:41:22.612075171Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-f8g7w","namespace":"openshift-workload-availability"}} 2025-12-30T20:41:22.612096201Z INFO controllers.SelfNodeRemediation Node didn't reboot yet, waiting for it to reboot {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-f8g7w","namespace":"openshift-workload-availability"}, "node name": "worker-0-2", "time left": "2m0.387904839s"} 2025-12-30T20:42:21.03085419Z INFO selfnoderemediation-resource validate update {"name": "worker-0-2-f8g7w"} 2025-12-30T20:42:21.033821529Z INFO controllers.SelfNodeRemediation NHC added the timed-out annotation, remediation will be stopped {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-f8g7w","namespace":"openshift-workload-availability"}} 2025-12-30T20:42:21.033933423Z DEBUG events [remediation] NHC added the timed-out annotation, remediation will be stopped {"type": "Normal", "object": {"kind":"SelfNodeRemediation","namespace":"openshift-workload-availability","name":"worker-0-2-f8g7w","uid":"4797751d-233c-4314-a4ec-c677f3696daf","apiVersion":"self-node-remediation.medik8s.io/v1alpha1","resourceVersion":"3666020"}, "reason": "RemediationStopped"} 2025-12-30T20:42:21.039538323Z INFO controllers.SelfNodeRemediation NHC added the timed-out annotation, remediation will be stopped {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-f8g7w","namespace":"openshift-workload-availability"}} 2025-12-30T20:42:21.039681748Z DEBUG events [remediation] NHC added the timed-out annotation, remediation will be stopped {"type": "Normal", "object": {"kind":"SelfNodeRemediation","namespace":"openshift-workload-availability","name":"worker-0-2-f8g7w","uid":"4797751d-233c-4314-a4ec-c677f3696daf","apiVersion":"self-node-remediation.medik8s.io/v1alpha1","resourceVersion":"3666022"}, "reason": "RemediationStopped"} 2025-12-30T20:43:23.006149969Z INFO controllers.SelfNodeRemediation NHC added the timed-out annotation, remediation will be stopped {"pod": "manager", "selfnoderemediation": {"name":"worker-0-2-f8g7w","namespace":"openshift-workload-availability"}} 2025-12-30T20:43:23.006384119Z DEBUG events [remediation] NHC added the timed-out annotation, remediation will be stopped {"type": "Normal", "object": {"kind":"SelfNodeRemediation","namespace":"openshift-workload-availability","name":"worker-0-2-f8g7w","uid":"4797751d-233c-4314-a4ec-c677f3696daf","apiVersion":"self-node-remediation.medik8s.io/v1alpha1","resourceVersion":"3666022"}, "reason": "RemediationStopped"} Far Logs: ============================================================================================================================ [root@cert-rhosp-02 ~]# oc logs fence-agents-remediation-controller-manager-74bfb5d6bd-bqlgd 2025-12-30T20:26:42.823895564Z INFO setup Go Version: go1.24.4 (Red Hat 1.24.4-2.el9) X:strictfipsruntime 2025-12-30T20:26:42.824024716Z INFO setup Go OS/Arch: linux/amd64 2025-12-30T20:26:42.824029006Z INFO setup Operator Version: bd73055e 2025-12-30T20:26:42.824031811Z INFO setup Git Commit: bd73055ef2c68bfdc865d2c54179f4448bd454da 2025-12-30T20:26:42.824034462Z INFO setup Build Date: 2025-09-08T09:09:10+00:00 2025-12-30T20:26:42.824061945Z INFO setup HTTP/2 for webhooks disabled 2025-12-30T20:26:42.83393215Z INFO validation out of service taint strategy {"isSupported": true, "k8sMajorVersion": 1, "k8sMinorVersion": 34} 2025-12-30T20:26:42.833961724Z INFO setup out-of-service taint is supported on this cluster 2025-12-30T20:26:42.833993389Z INFO controller-runtime.builder skip registering a mutating webhook, object does not implement admission.Defaulter or WithDefaulter wasn't called {"GVK": "fence-agents-remediation.medik8s.io/v1alpha1, Kind=FenceAgentsRemediation"} 2025-12-30T20:26:42.834026564Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "fence-agents-remediation.medik8s.io/v1alpha1, Kind=FenceAgentsRemediation", "path": "/validate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediation"} 2025-12-30T20:26:42.834089265Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediation"} 2025-12-30T20:26:42.834150401Z INFO controller-runtime.builder Registering a mutating webhook {"GVK": "fence-agents-remediation.medik8s.io/v1alpha1, Kind=FenceAgentsRemediationTemplate", "path": "/mutate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediationtemplate"} 2025-12-30T20:26:42.834197541Z INFO controller-runtime.webhook Registering webhook {"path": "/mutate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediationtemplate"} 2025-12-30T20:26:42.834216317Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "fence-agents-remediation.medik8s.io/v1alpha1, Kind=FenceAgentsRemediationTemplate", "path": "/validate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediationtemplate"} 2025-12-30T20:26:42.834254566Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-fence-agents-remediation-medik8s-io-v1alpha1-fenceagentsremediationtemplate"} 2025-12-30T20:26:42.834278457Z INFO setup starting manager 2025-12-30T20:26:42.834506468Z INFO starting server {"name": "health probe", "addr": "[::]:8081"} 2025-12-30T20:26:42.834466214Z INFO controller-runtime.metrics Starting metrics server 2025-12-30T20:26:42.834909324Z INFO controller-runtime.metrics Serving metrics server {"bindAddress": ":8080", "secure": false} I1230 20:26:42.834734 1 leaderelection.go:257] attempting to acquire leader lease openshift-workload-availability/cb305759.medik8s.io... 2025-12-30T20:26:42.83455536Z INFO controller-runtime.webhook Starting webhook server 2025-12-30T20:26:42.835324396Z INFO controller-runtime.certwatcher Updated current TLS certificate 2025-12-30T20:26:42.835391696Z INFO controller-runtime.webhook Serving webhook server {"host": "", "port": 9443} 2025-12-30T20:26:42.835522852Z INFO controller-runtime.certwatcher Starting certificate poll+watcher {"interval": "10s"} I1230 20:26:59.494953 1 leaderelection.go:271] successfully acquired lease openshift-workload-availability/cb305759.medik8s.io 2025-12-30T20:26:59.495043386Z DEBUG events fence-agents-remediation-controller-manager-74bfb5d6bd-bqlgd_debcc5dd-c05d-46dd-bcb6-8040fc3f9456 became leader {"type": "Normal", "object": {"kind":"Lease","namespace":"openshift-workload-availability","name":"cb305759.medik8s.io","uid":"e609d66a-d089-4051-8bed-58a90e97fb4a","apiVersion":"coordination.k8s.io/v1","resourceVersion":"3658501"}, "reason": "LeaderElection"} 2025-12-30T20:26:59.495552215Z INFO Starting EventSource {"controller": "fenceagentsremediation", "controllerGroup": "fence-agents-remediation.medik8s.io", "controllerKind": "FenceAgentsRemediation", "source": "kind source: *v1alpha1.FenceAgentsRemediation"} 2025-12-30T20:26:59.495571032Z INFO Starting Controller {"controller": "fenceagentsremediation", "controllerGroup": "fence-agents-remediation.medik8s.io", "controllerKind": "FenceAgentsRemediation"} 2025-12-30T20:26:59.596821262Z INFO Starting workers {"controller": "fenceagentsremediation", "controllerGroup": "fence-agents-remediation.medik8s.io", "controllerKind": "FenceAgentsRemediation", "worker count": 1} 2025-12-30T20:36:49.127996675Z INFO fenceagentsremediationtemplate-resource default {"name": "far-template-short-timeout"} 2025-12-30T20:41:19.458250392Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-12-30T20:41:19.458335533Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-12-30T20:41:19.566960356Z INFO fenceagentsremediation-resource validate update {"name": "worker-0-2-mb2sz"} 2025-12-30T20:41:19.570540488Z INFO controllers.FenceAgentsRemediation Finalizer was added {"CR Name": "worker-0-2-mb2sz"} 2025-12-30T20:41:19.570577035Z INFO controllers.FenceAgentsRemediation Updating Status Condition {"processingConditionStatus": "True", "fenceAgentActionSucceededConditionStatus": "Unknown", "succeededConditionStatus": "Unknown", "reason": "RemediationStarted", "LastUpdateTime": "2025-12-30T20:41:19.570575395Z"} 2025-12-30T20:41:19.57063337Z DEBUG events [remediation] Remediation started {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-2-mb2sz","uid":"96b52e5c-0cc8-4bbb-a425-7a20717b5067","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"3664922"}, "reason": "RemediationStarted"} 2025-12-30T20:41:19.570690214Z DEBUG events [remediation] Finalizer was added {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-2-mb2sz","uid":"96b52e5c-0cc8-4bbb-a425-7a20717b5067","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"3664922"}, "reason": "AddFinalizer"} 2025-12-30T20:41:19.779303481Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-12-30T20:41:19.779450517Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-12-30T20:41:19.779465807Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-12-30T20:41:19.788466051Z INFO taints Taint was added {"taint effect": "NoExecute", "taint list": [{"key":"node.kubernetes.io/unreachable","effect":"NoSchedule","timeAdded":"2025-12-30T20:40:48Z"},{"key":"node.kubernetes.io/unreachable","effect":"NoExecute","timeAdded":"2025-12-30T20:40:48Z"},{"key":"medik8s.io/fence-agents-remediation","effect":"NoExecute","timeAdded":"2025-12-30T20:41:19Z"}]} 2025-12-30T20:41:19.788595413Z INFO controllers.FenceAgentsRemediation FAR remediation taint was added {"Node Name": "worker-0-2"} 2025-12-30T20:41:19.788641747Z INFO controllers.FenceAgentsRemediation Build fence agent command line {"Fence Agent": "fence_ipmilan", "Node Name": "worker-0-2"} 2025-12-30T20:41:19.788676013Z DEBUG events [remediation] Remediation taint was added {"type": "Normal", "object": {"kind":"Node","name":"worker-0-2","uid":"687944a6-de97-4df2-9ecb-c35d8a54374e","apiVersion":"v1","resourceVersion":"3664032"}, "reason": "AddRemediationTaint"} 2025-12-30T20:41:19.989524527Z INFO controllers.FenceAgentsRemediation Execute the fence agent {"Fence Agent": "fence_ipmilan", "Node Name": "worker-0-2", "FAR uid": "96b52e5c-0cc8-4bbb-a425-7a20717b5067", "Parameters": ["--lanplus","--password","--username","--action","--ip","--ipport"]} 2025-12-30T20:41:19.989706686Z INFO executer fence agent start {"uid": "96b52e5c-0cc8-4bbb-a425-7a20717b5067", "fence_agent": "fence_ipmilan", "retryCount": 1, "retryInterval": "1s", "timeout": "1s"} 2025-12-30T20:41:19.989785103Z DEBUG events [remediation] Fence agent was executed {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-2-mb2sz","uid":"96b52e5c-0cc8-4bbb-a425-7a20717b5067","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"3664924"}, "reason": "FenceAgentExecuted"} 2025-12-30T20:41:20.01046929Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-12-30T20:41:20.010547782Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-12-30T20:41:20.010560455Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-12-30T20:41:20.010624997Z INFO controllers.FenceAgentsRemediation A Fence Agent is already running {"Fence Agent": "fence_ipmilan", "Node Name": "worker-0-2", "FAR uid": "96b52e5c-0cc8-4bbb-a425-7a20717b5067"} 2025-12-30T20:41:20.018517367Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-12-30T20:41:20.990836957Z INFO executer command failed {"uid": "96b52e5c-0cc8-4bbb-a425-7a20717b5067", "response": "", "errMessage": "", "err": "signal: killed"} 2025-12-30T20:41:20.990901682Z INFO executer fence agent done {"uid": "96b52e5c-0cc8-4bbb-a425-7a20717b5067", "fence_agent": "fence_ipmilan", "stdout": "", "stderr": "", "err": "signal: killed"} 2025-12-30T20:41:20.990912578Z INFO executer fence agent context timed out 2025-12-30T20:41:20.99091506Z INFO executer updating status {"FAR uid": "96b52e5c-0cc8-4bbb-a425-7a20717b5067"} 2025-12-30T20:41:20.990964811Z INFO executer Updating Status Condition {"processingConditionStatus": "False", "fenceAgentActionSucceededConditionStatus": "False", "succeededConditionStatus": "False", "reason": "FenceAgentFailed", "LastUpdateTime": "2025-12-30T20:41:20.990963819Z"} 2025-12-30T20:41:20.997575091Z INFO executer status updated {"FAR uid": "96b52e5c-0cc8-4bbb-a425-7a20717b5067"} 2025-12-30T20:41:20.998237808Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-12-30T20:41:20.998260934Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-12-30T20:41:21.003615989Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile 2025-12-30T20:41:21.033349792Z INFO controllers.FenceAgentsRemediation Begin FenceAgentsRemediation Reconcile 2025-12-30T20:41:21.033382472Z INFO controllers.FenceAgentsRemediation Check FAR CR's name 2025-12-30T20:41:21.033412745Z INFO executer cancelling fence agent routine {"uid": "96b52e5c-0cc8-4bbb-a425-7a20717b5067"} 2025-12-30T20:41:21.033419962Z INFO controllers.FenceAgentsRemediation Remediation was stopped by the Node Healthcheck Operator 2025-12-30T20:41:21.033426181Z INFO controllers.FenceAgentsRemediation Updating Status Condition {"processingConditionStatus": "False", "fenceAgentActionSucceededConditionStatus": "False", "succeededConditionStatus": "False", "reason": "RemediationInterruptedByNHC", "LastUpdateTime": "2025-12-30T20:41:20Z"} 2025-12-30T20:41:21.033654054Z DEBUG events [remediation] NHC added the timed-out annotation, remediation will be stopped {"type": "Normal", "object": {"kind":"FenceAgentsRemediation","namespace":"openshift-workload-availability","name":"worker-0-2-mb2sz","uid":"96b52e5c-0cc8-4bbb-a425-7a20717b5067","apiVersion":"fence-agents-remediation.medik8s.io/v1alpha1","resourceVersion":"3665184"}, "reason": "RemediationStopped"} 2025-12-30T20:41:21.038708709Z INFO controllers.FenceAgentsRemediation Finish FenceAgentsRemediation Reconcile