[kni@cert-rhosp-02 ~]$ oc get clusterversion NAME VERSION AVAILABLE PROGRESSING SINCE STATUS version 4.21.0-0.nightly-2026-01-22-192129 True False 6d5h Error while reconciling 4.21.0-0.nightly-2026-01-22-192129: machine-config, network has an unknown error: ClusterOperatorsDegraded [kni@cert-rhosp-02 ~]$ oc project Using project "openshift-workload-availability" on server "https://api.ocp3m2w-ic4n21.qe.lab.redhat.com:6443". [kni@cert-rhosp-02 ~]$ oc get nodes NAME STATUS ROLES AGE VERSION master-0 Ready control-plane,master 6d6h v1.34.2 master-1 Ready control-plane,master 6d6h v1.34.2 master-2 Ready control-plane,master 6d6h v1.34.2 worker-0 Ready worker 6d5h v1.34.2 worker-1 Ready worker 6d5h v1.34.2 [kni@cert-rhosp-02 ~]$ oc get csv NAME DISPLAY VERSION REPLACES PHASE node-healthcheck-operator.v0.11.0 Node Health Check Operator 0.11.0 node-healthcheck-operator.v0.10.1 Succeeded self-node-remediation.v0.12.0 Self Node Remediation Operator 0.12.0 self-node-remediation.v0.11.0 Succeeded [kni@cert-rhosp-02 ~]$ cat test.yaml apiVersion: remediation.medik8s.io/v1alpha1 kind: NodeHealthCheck metadata: name: nhc-worker-test spec: maxUnhealthy: 1 healthyDelay: 5s stormCooldownDuration: 60s remediationTemplate: apiVersion: self-node-remediation.medik8s.io/v1alpha1 kind: SelfNodeRemediationTemplate name: selfnoderemediationtemplate-sample namespace: openshift-workload-availability selector: matchExpressions: - key: node-role.kubernetes.io/worker operator: Exists unhealthyConditions: - duration: 30s status: 'False' type: Ready - duration: 30s status: Unknown type: Ready --- apiVersion: self-node-remediation.medik8s.io/v1alpha1 kind: SelfNodeRemediationTemplate metadata: namespace: openshift-workload-availability name: selfnoderemediationtemplate-sample spec: template: spec: remediationStrategy: Automatic [kni@cert-rhosp-02 ~]$ oc apply -f test.yaml nodehealthcheck.remediation.medik8s.io/nhc-worker-test created selfnoderemediationtemplate.self-node-remediation.medik8s.io/selfnoderemediationtemplate-sample created [kni@cert-rhosp-02 ~]$ oc get nhc NAME AGE nhc-worker-test 9m36s [kni@cert-rhosp-02 ~]$ oc get snr No resources found in openshift-workload-availability namespace. [kni@cert-rhosp-02 ~]$ oc get snrt NAME AGE self-node-remediation-automatic-strategy-template 2d23h selfnoderemediationtemplate-sample 9m43s [kni@cert-rhosp-02 ~]$ oc get nhc -oyaml apiVersion: v1 items: - apiVersion: remediation.medik8s.io/v1alpha1 kind: NodeHealthCheck metadata: annotations: kubectl.kubernetes.io/last-applied-configuration: | {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","metadata":{"annotations":{},"name":"nhc-worker-test"},"spec":{"healthyDelay":"5s","maxUnhealthy":1,"remediationTemplate":{"apiVersion":"self-node-remediation.medik8s.io/v1alpha1","kind":"SelfNodeRemediationTemplate","name":"selfnoderemediationtemplate-sample","namespace":"openshift-workload-availability"},"selector":{"matchExpressions":[{"key":"node-role.kubernetes.io/worker","operator":"Exists"}]},"stormCooldownDuration":"60s","unhealthyConditions":[{"duration":"30s","status":"False","type":"Ready"},{"duration":"30s","status":"Unknown","type":"Ready"}]}} creationTimestamp: "2026-02-02T07:03:40Z" generation: 2 name: nhc-worker-test resourceVersion: "4300561" uid: f92dbd24-256c-4aa8-a5d4-dbe2298c21d2 spec: healthyDelay: 5s maxUnhealthy: 1 remediationTemplate: apiVersion: self-node-remediation.medik8s.io/v1alpha1 kind: SelfNodeRemediationTemplate name: selfnoderemediationtemplate-sample namespace: openshift-workload-availability selector: matchExpressions: - key: node-role.kubernetes.io/worker operator: Exists stormCooldownDuration: 60s unhealthyConditions: - duration: 30s status: "False" type: Ready - duration: 30s status: Unknown type: Ready status: conditions: - lastTransitionTime: "2026-02-02T07:03:56Z" message: No issues found, NodeHealthCheck is enabled. reason: NodeHealthCheckEnabled status: "False" type: Disabled - lastTransitionTime: "2026-02-02T07:13:04Z" message: Cooldown cleared - storm threshold triggered reason: HealthyNodeThresholdChange status: "False" type: StormCooldownActive - lastTransitionTime: "2026-02-02T07:13:04Z" message: Storm mode is activated - preventing any new remediation until the storm is over and cooldown duration expired reason: HealthyNodeThresholdChange status: "True" type: StormActive healthyNodes: 0 lastUpdateTime: "2026-02-02T07:13:19Z" observedNodes: 2 phase: Enabled reason: NHC is enabled, no ongoing remediation unhealthyNodes: - name: worker-1 - name: worker-0 kind: List metadata: resourceVersion: "" selfLink: "" [kni@cert-rhosp-02 ~]$ cat test.yaml apiVersion: remediation.medik8s.io/v1alpha1 kind: NodeHealthCheck metadata: name: nhc-worker-test spec: maxUnhealthy: 2 healthyDelay: 5s stormCooldownDuration: 60s remediationTemplate: apiVersion: self-node-remediation.medik8s.io/v1alpha1 kind: SelfNodeRemediationTemplate name: selfnoderemediationtemplate-sample namespace: openshift-workload-availability selector: matchExpressions: - key: node-role.kubernetes.io/worker operator: Exists unhealthyConditions: - duration: 30s status: 'False' type: Ready - duration: 30s status: Unknown type: Ready --- apiVersion: self-node-remediation.medik8s.io/v1alpha1 kind: SelfNodeRemediationTemplate metadata: namespace: openshift-workload-availability name: selfnoderemediationtemplate-sample spec: template: spec: remediationStrategy: Automatic [kni@cert-rhosp-02 ~]$ oc apply -f test.yaml nodehealthcheck.remediation.medik8s.io/nhc-worker-test configured selfnoderemediationtemplate.self-node-remediation.medik8s.io/selfnoderemediationtemplate-sample unchanged [kni@cert-rhosp-02 ~]$ oc get nodes NAME STATUS ROLES AGE VERSION master-0 Ready control-plane,master 6d7h v1.34.2 master-1 Ready control-plane,master 6d7h v1.34.2 master-2 Ready control-plane,master 6d7h v1.34.2 worker-0 NotReady worker 6d5h v1.34.2 worker-1 NotReady worker 6d5h v1.34.2 [kni@cert-rhosp-02 ~]$ oc get nhc -oyaml apiVersion: v1 items: - apiVersion: remediation.medik8s.io/v1alpha1 kind: NodeHealthCheck metadata: annotations: kubectl.kubernetes.io/last-applied-configuration: | {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","metadata":{"annotations":{},"name":"nhc-worker-test"},"spec":{"healthyDelay":"5s","maxUnhealthy":2,"remediationTemplate":{"apiVersion":"self-node-remediation.medik8s.io/v1alpha1","kind":"SelfNodeRemediationTemplate","name":"selfnoderemediationtemplate-sample","namespace":"openshift-workload-availability"},"selector":{"matchExpressions":[{"key":"node-role.kubernetes.io/worker","operator":"Exists"}]},"stormCooldownDuration":"60s","unhealthyConditions":[{"duration":"30s","status":"False","type":"Ready"},{"duration":"30s","status":"Unknown","type":"Ready"}]}} creationTimestamp: "2026-02-02T07:03:40Z" generation: 3 name: nhc-worker-test resourceVersion: "4312118" uid: f92dbd24-256c-4aa8-a5d4-dbe2298c21d2 spec: healthyDelay: 5s maxUnhealthy: 2 remediationTemplate: apiVersion: self-node-remediation.medik8s.io/v1alpha1 kind: SelfNodeRemediationTemplate name: selfnoderemediationtemplate-sample namespace: openshift-workload-availability selector: matchExpressions: - key: node-role.kubernetes.io/worker operator: Exists stormCooldownDuration: 60s unhealthyConditions: - duration: 30s status: "False" type: Ready - duration: 30s status: Unknown type: Ready status: conditions: - lastTransitionTime: "2026-02-02T07:03:56Z" message: No issues found, NodeHealthCheck is enabled. reason: NodeHealthCheckEnabled status: "False" type: Disabled - lastTransitionTime: "2026-02-02T07:33:51Z" message: Storm cooldown delay started - waiting before resuming normal remediation reason: HealthyNodeThresholdChange status: "True" type: StormCooldownActive - lastTransitionTime: "2026-02-02T07:13:04Z" message: Storm mode is activated - preventing any new remediation until the storm is over and cooldown duration expired reason: HealthyNodeThresholdChange status: "True" type: StormActive healthyNodes: 1 lastUpdateTime: "2026-02-02T07:33:51Z" observedNodes: 2 phase: Enabled reason: NHC is enabled, no ongoing remediation unhealthyNodes: - name: worker-1 kind: List metadata: resourceVersion: "" selfLink: "" [kni@cert-rhosp-02 ~]$ oc get nhc -oyaml apiVersion: v1 items: - apiVersion: remediation.medik8s.io/v1alpha1 kind: NodeHealthCheck metadata: annotations: kubectl.kubernetes.io/last-applied-configuration: | {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","metadata":{"annotations":{},"name":"nhc-worker-test"},"spec":{"healthyDelay":"5s","maxUnhealthy":2,"remediationTemplate":{"apiVersion":"self-node-remediation.medik8s.io/v1alpha1","kind":"SelfNodeRemediationTemplate","name":"selfnoderemediationtemplate-sample","namespace":"openshift-workload-availability"},"selector":{"matchExpressions":[{"key":"node-role.kubernetes.io/worker","operator":"Exists"}]},"stormCooldownDuration":"60s","unhealthyConditions":[{"duration":"30s","status":"False","type":"Ready"},{"duration":"30s","status":"Unknown","type":"Ready"}]}} creationTimestamp: "2026-02-02T07:03:40Z" generation: 3 name: nhc-worker-test resourceVersion: "4315087" uid: f92dbd24-256c-4aa8-a5d4-dbe2298c21d2 spec: healthyDelay: 5s maxUnhealthy: 2 remediationTemplate: apiVersion: self-node-remediation.medik8s.io/v1alpha1 kind: SelfNodeRemediationTemplate name: selfnoderemediationtemplate-sample namespace: openshift-workload-availability selector: matchExpressions: - key: node-role.kubernetes.io/worker operator: Exists stormCooldownDuration: 60s unhealthyConditions: - duration: 30s status: "False" type: Ready - duration: 30s status: Unknown type: Ready status: conditions: - lastTransitionTime: "2026-02-02T07:03:56Z" message: No issues found, NodeHealthCheck is enabled. reason: NodeHealthCheckEnabled status: "False" type: Disabled - lastTransitionTime: "2026-02-02T07:34:52Z" message: Storm cooldown completed reason: HealthyNodeThresholdChange status: "False" type: StormCooldownActive - lastTransitionTime: "2026-02-02T07:34:52Z" message: Storm mode is deactivated, remediation can occur normally reason: HealthyNodeThresholdChange status: "False" type: StormActive healthyNodes: 0 lastUpdateTime: "2026-02-02T07:36:27Z" observedNodes: 2 phase: Remediating reason: NHC is remediating 2 nodes unhealthyNodes: - name: worker-1 remediations: - resource: apiVersion: self-node-remediation.medik8s.io/v1alpha1 kind: SelfNodeRemediation name: worker-1-bpt6g namespace: openshift-workload-availability uid: b11f9261-b3b8-4065-829e-2ea1f13d2b09 started: "2026-02-02T07:34:52Z" templateName: selfnoderemediationtemplate-sample - name: worker-0 remediations: - resource: apiVersion: self-node-remediation.medik8s.io/v1alpha1 kind: SelfNodeRemediation name: worker-0-gc4nq namespace: openshift-workload-availability uid: cdc4efd5-cea0-46cc-a6dd-d8f0d2a1bd39 started: "2026-02-02T07:36:27Z" templateName: selfnoderemediationtemplate-sample kind: List metadata: resourceVersion: "" selfLink: "" [kni@cert-rhosp-02 ~]$ oc debug node/worker-1 -- chroot /host bash -c "systemctl stop kubelet" Starting pod/worker-1-debug ... To use host binaries, run `chroot /host` Removing debug pod ... NHC Logs: 2026-02-02T07:08:12.057225142Z INFO setup Go Version: go1.25.3 (Red Hat 1.25.3-1.el9_7) X:strictfipsruntime 2026-02-02T07:08:12.05730322Z INFO setup Go OS/Arch: linux/amd64 2026-02-02T07:08:12.05730602Z INFO setup Operator Version: acf7a94 2026-02-02T07:08:12.057307712Z INFO setup Git Commit: acf7a94540e8f527122bbd1dc102a46ca985cb82 2026-02-02T07:08:12.057309555Z INFO setup Build Date: 2026-01-28T09:36:53+00:00 2026-02-02T07:08:12.057311571Z INFO setup HTTP/2 for metrics and webhook server disabled 2026-02-02T07:08:12.211209336Z INFO setup supported control plane topology {"topology": "HighlyAvailable"} 2026-02-02T07:08:12.21126191Z INFO setup Cluster capabilities {"IsOnOpenshift": true, "HasMachineAPI": true} 2026-02-02T07:08:12.211646805Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "remediation.medik8s.io/v1alpha1, Kind=NodeHealthCheck", "path": "/validate-remediation-medik8s-io-v1alpha1-nodehealthcheck"} 2026-02-02T07:08:12.211780859Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-remediation-medik8s-io-v1alpha1-nodehealthcheck"} 2026-02-02T07:08:12.211839329Z INFO setup starting manager 2026-02-02T07:08:12.211914561Z INFO controller-runtime.metrics Starting metrics server 2026-02-02T07:08:12.211982515Z INFO starting server {"name": "health probe", "addr": "[::]:8081"} 2026-02-02T07:08:12.212044494Z INFO controller-runtime.metrics Serving metrics server {"bindAddress": "127.0.0.1:8080", "secure": false} 2026-02-02T07:08:12.212105678Z INFO controller-runtime.webhook Starting webhook server 2026-02-02T07:08:12.212569391Z INFO controller-runtime.certwatcher Updated current TLS certificate 2026-02-02T07:08:12.212660106Z INFO controller-runtime.webhook Serving webhook server {"host": "", "port": 9443} 2026-02-02T07:08:12.212786151Z INFO controller-runtime.certwatcher Starting certificate poll+watcher {"interval": "10s"} I0202 07:08:12.313250 1 leaderelection.go:257] attempting to acquire leader lease openshift-workload-availability/e1f13584.medik8s.io... 2026-02-02T07:10:34.789949346Z INFO nodehealthcheck-resource validate create {"name": "nhc-worker-test"} 2026-02-02T07:10:57.657247617Z INFO nodehealthcheck-resource validate update {"name": "nhc-worker-test"} I0202 07:27:20.704891 1 leaderelection.go:271] successfully acquired lease openshift-workload-availability/e1f13584.medik8s.io 2026-02-02T07:27:20.704950444Z DEBUG events node-healthcheck-controller-manager-cc94d66c-dlhf6_7ffaf5e0-fc6b-4b89-962a-5ad6a89c76e7 became leader {"type": "Normal", "object": {"kind":"Lease","namespace":"openshift-workload-availability","name":"e1f13584.medik8s.io","uid":"2745936a-9873-4d3e-ac88-9210082439c7","apiVersion":"coordination.k8s.io/v1","resourceVersion":"4308578"}, "reason": "LeaderElection"} 2026-02-02T07:27:20.705223968Z INFO Starting EventSource {"controller": "machinehealthcheck", "controllerGroup": "machine.openshift.io", "controllerKind": "MachineHealthCheck", "source": "kind source: *v1beta1.MachineHealthCheck"} 2026-02-02T07:27:20.705250472Z INFO Starting EventSource {"controller": "machinehealthcheck", "controllerGroup": "machine.openshift.io", "controllerKind": "MachineHealthCheck", "source": "kind source: *v1.Node"} 2026-02-02T07:27:20.705261603Z INFO Starting EventSource {"controller": "machinehealthcheck", "controllerGroup": "machine.openshift.io", "controllerKind": "MachineHealthCheck", "source": "channel source: 0xc00078aaf0"} 2026-02-02T07:27:20.705339603Z INFO Starting EventSource {"controller": "machinehealthcheck", "controllerGroup": "machine.openshift.io", "controllerKind": "MachineHealthCheck", "source": "kind source: *v1beta1.Machine"} I0202 07:27:20.705974 1 shared_informer.go:350] "Waiting for caches to sync" controller="feature gate accessor" 2026-02-02T07:27:20.706103752Z INFO Starting EventSource {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "source": "channel source: 0xc00078a8c0"} 2026-02-02T07:27:20.706168931Z INFO Starting EventSource {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "source": "kind source: *v1alpha1.NodeHealthCheck"} 2026-02-02T07:27:20.706438342Z INFO Starting EventSource {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "source": "kind source: *v1.Node"} 2026-02-02T07:27:20.805237585Z INFO MHCChecker found termination handler MHC, will ignore Nodes with Terminating condition 2026-02-02T07:27:20.805276645Z INFO MHCChecker MHC Checker status changed, notifying NHC controller 2026-02-02T07:27:20.805329001Z INFO adding all NHCs to reconcile queue for handling MHC event 2026-02-02T07:27:20.80548949Z INFO Starting Controller {"controller": "machinehealthcheck", "controllerGroup": "machine.openshift.io", "controllerKind": "MachineHealthCheck"} 2026-02-02T07:27:20.80549814Z INFO Starting workers {"controller": "machinehealthcheck", "controllerGroup": "machine.openshift.io", "controllerKind": "MachineHealthCheck", "worker count": 1} I0202 07:27:20.806097 1 shared_informer.go:357] "Caches are synced" controller="feature gate accessor" I0202 07:27:20.806119 1 simple_featuregate_reader.go:171] Starting feature-gate-detector I0202 07:27:20.806273 1 recorder_logging.go:49] &Event{ObjectMeta:{dummy.18905d4d74922670.ec1e899a dummy 0 0001-01-01 00:00:00 +0000 UTC map[] map[] [] [] []},InvolvedObject:ObjectReference{Kind:Pod,Namespace:dummy,Name:dummy,UID:,APIVersion:v1,ResourceVersion:,FieldPath:,},Reason:FeatureGatesInitialized,Message:FeatureGates updated to featuregates.Features{Enabled:[]v1.FeatureGateName{"AdditionalRoutingCapabilities", "AdminNetworkPolicy", "AlibabaPlatform", "AzureWorkloadIdentity", "BuildCSIVolumes", "CPMSMachineNamePrefix", "ConsolePluginContentSecurityPolicy", "ExternalOIDC", "ExternalOIDCWithUIDAndExtraClaimMappings", "GCPClusterHostedDNSInstall", "GatewayAPI", "GatewayAPIController", "HighlyAvailableArbiter", "ImageStreamImportMode", "ImageVolume", "KMSv1", "MachineConfigNodes", "ManagedBootImages", "ManagedBootImagesAWS", "ManagedBootImagesAzure", "ManagedBootImagesvSphere", "MetricsCollectionProfiles", "NetworkDiagnosticsConfig", "NetworkLiveMigration", "NetworkSegmentation", "NewOLM", "NewOLMWebhookProviderOpenshiftServiceCA", "PinnedImages", "PreconfiguredUDNAddresses", "ProcMountType", "RouteAdvertisements", "RouteExternalCertificate", "ServiceAccountTokenNodeBinding", "SigstoreImageVerification", "SigstoreImageVerificationPKI", "StoragePerformantSecurityPolicy", "UpgradeStatus", "UserNamespacesPodSecurityStandards", "UserNamespacesSupport", "VSphereMultiDisk", "VSphereMultiNetworks", "VolumeAttributesClass"}, Disabled:[]v1.FeatureGateName{"AWSClusterHostedDNS", "AWSClusterHostedDNSInstall", "AWSDedicatedHosts", "AWSDualStackInstall", "AWSServiceLBNetworkSecurityGroup", "AutomatedEtcdBackup", "AzureClusterHostedDNSInstall", "AzureDedicatedHosts", "AzureDualStackInstall", "AzureMultiDisk", "BootImageSkewEnforcement", "BootcNodeManagement", "CBORServingAndStorage", "CRDCompatibilityRequirementOperator", "ClientsAllowCBOR", "ClientsPreferCBOR", "ClusterAPIInstall", "ClusterAPIInstallIBMCloud", "ClusterAPIMachineManagement", "ClusterAPIMachineManagementVSphere", "ClusterMonitoringConfig", "ClusterVersionOperatorConfiguration", "DNSNameResolver", "DualReplica", "DyanmicServiceEndpointIBMCloud", "EtcdBackendQuota", "EventTTL", "EventedPLEG", "Example", "Example2", "ExternalSnapshotMetadata", "GCPClusterHostedDNS", "GCPCustomAPIEndpoints", "GCPCustomAPIEndpointsInstall", "GCPDualStackInstall", "HyperShiftOnlyDynamicResourceAllocation", "ImageModeStatusReporting", "IngressControllerDynamicConfigurationManager", "InsightsConfig", "InsightsOnDemandDataGather", "IrreconcilableMachineConfig", "KMSEncryptionProvider", "MachineAPIMigration", "MachineAPIOperatorDisableMachineHealthCheckController", "ManagedBootImagesCPMS", "MaxUnavailableStatefulSet", "MinimumKubeletVersion", "MixedCPUsAllocation", "MultiArchInstallAzure", "MultiDiskSetup", "MutableCSINodeAllocatableCount", "MutatingAdmissionPolicy", "NewOLMBoxCutterRuntime", "NewOLMCatalogdAPIV1Metas", "NewOLMOwnSingleNamespace", "NewOLMPreflightPermissionChecks", "NoRegistryClusterInstall", "NutanixMultiSubnets", "OSStreams", "OVNObservability", "OnPremDNSRecords", "OpenShiftPodSecurityAdmission", "ProvisioningRequestAvailable", "SELinuxMount", "ShortCertRotation", "SignatureStores", "TranslateStreamCloseWebsocketRequests", "VSphereConfigurableMaxAllowedBlockVolumesPerNode", "VSphereHostVMGroupZonal", "VSphereMixedNodeEnv", "VolumeGroupSnapshot"}},Source:EventSource{Component:,Host:,},FirstTimestamp:2026-02-02 07:27:20.80620504 +0000 UTC m=+1148.769435523,LastTimestamp:2026-02-02 07:27:20.80620504 +0000 UTC m=+1148.769435523,Count:1,Type:Normal,EventTime:0001-01-01 00:00:00 +0000 UTC,Series:nil,Action:,Related:nil,ReportingController:,ReportingInstance:,} 2026-02-02T07:27:20.806280736Z INFO FeatureGateAccessor FeatureGates initialized 2026-02-02T07:27:20.90777658Z INFO Starting Controller {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck"} 2026-02-02T07:27:20.907840051Z INFO Starting workers {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "worker count": 1} 2026-02-02T07:27:20.907901139Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T07:27:20.911613786Z ERROR controllers.NodeHealthCheck failed to validate template {"NodeHealthCheck name": "nhc-worker-test", "error": "failed to get external remediation template openshift-workload-availability/selfnoderemediationtemplate-sample: selfnoderemediationtemplates.self-node-remediation.medik8s.io \"selfnoderemediationtemplate-sample\" is forbidden: User \"system:serviceaccount:openshift-workload-availability:node-healthcheck-controller-manager\" cannot get resource \"selfnoderemediationtemplates\" in API group \"self-node-remediation.medik8s.io\" in the namespace \"openshift-workload-availability\"", "errorVerbose": "selfnoderemediationtemplates.self-node-remediation.medik8s.io \"selfnoderemediationtemplate-sample\" is forbidden: User \"system:serviceaccount:openshift-workload-availability:node-healthcheck-controller-manager\" cannot get resource \"selfnoderemediationtemplates\" in API group \"self-node-remediation.medik8s.io\" in the namespace \"openshift-workload-availability\"\nfailed to get external remediation template openshift-workload-availability/selfnoderemediationtemplate-sample\ngithub.com/medik8s/node-healthcheck-operator/controllers/resources.(*manager).getTemplateWithFallbackNamespace\n\t/app/node-healthcheck-operator/controllers/resources/templates.go:98\ngithub.com/medik8s/node-healthcheck-operator/controllers/resources.(*manager).getTemplate\n\t/app/node-healthcheck-operator/controllers/resources/templates.go:80\ngithub.com/medik8s/node-healthcheck-operator/controllers/resources.(*manager).ValidateTemplates\n\t/app/node-healthcheck-operator/controllers/resources/templates.go:120\ngithub.com/medik8s/node-healthcheck-operator/controllers.(*NodeHealthCheckReconciler).Reconcile\n\t/app/node-healthcheck-operator/controllers/nodehealthcheck_controller.go:200\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Reconcile\n\t/app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:119\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler\n\t/app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:340\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem\n\t/app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:300\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.1\n\t/app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:202\nruntime.goexit\n\t/usr/lib/golang/src/runtime/asm_amd64.s:1693"} github.com/medik8s/node-healthcheck-operator/controllers.(*NodeHealthCheckReconciler).Reconcile /app/node-healthcheck-operator/controllers/nodehealthcheck_controller.go:201 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Reconcile /app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:119 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler /app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:340 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem /app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:300 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.1 /app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:202 2026-02-02T07:27:20.912094797Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-worker-test", "new status": {"observedNodes":0,"healthyNodes":0,"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2026-02-02T07:03:56Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."},{"type":"StormCooldownActive","status":"True","lastTransitionTime":"2026-02-02T07:16:51Z","reason":"HealthyNodeThresholdChange","message":"Storm cooldown delay started - waiting before resuming normal remediation"},{"type":"StormActive","status":"True","lastTransitionTime":"2026-02-02T07:13:04Z","reason":"HealthyNodeThresholdChange","message":"Storm mode is activated - preventing any new remediation until the storm is over and cooldown duration expired"}],"phase":"Enabled","reason":"NHC is enabled, no ongoing remediation","lastUpdateTime":"2026-02-02T07:16:56Z"}, "patch": "{\"status\":{\"healthyNodes\":0,\"observedNodes\":0}}"} 2026-02-02T07:27:21.118414061Z INFO console-plugin successfully created / updated console plugin resources 2026-02-02T07:27:21.119390021Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": "failed to get external remediation template openshift-workload-availability/selfnoderemediationtemplate-sample: selfnoderemediationtemplates.self-node-remediation.medik8s.io \"selfnoderemediationtemplate-sample\" is forbidden: User \"system:serviceaccount:openshift-workload-availability:node-healthcheck-controller-manager\" cannot get resource \"selfnoderemediationtemplates\" in API group \"self-node-remediation.medik8s.io\" in the namespace \"openshift-workload-availability\"", "errorCauses": [{"error": "failed to get external remediation template openshift-workload-availability/selfnoderemediationtemplate-sample: selfnoderemediationtemplates.self-node-remediation.medik8s.io \"selfnoderemediationtemplate-sample\" is forbidden: User \"system:serviceaccount:openshift-workload-availability:node-healthcheck-controller-manager\" cannot get resource \"selfnoderemediationtemplates\" in API group \"self-node-remediation.medik8s.io\" in the namespace \"openshift-workload-availability\"", "errorVerbose": "selfnoderemediationtemplates.self-node-remediation.medik8s.io \"selfnoderemediationtemplate-sample\" is forbidden: User \"system:serviceaccount:openshift-workload-availability:node-healthcheck-controller-manager\" cannot get resource \"selfnoderemediationtemplates\" in API group \"self-node-remediation.medik8s.io\" in the namespace \"openshift-workload-availability\"\nfailed to get external remediation template openshift-workload-availability/selfnoderemediationtemplate-sample\ngithub.com/medik8s/node-healthcheck-operator/controllers/resources.(*manager).getTemplateWithFallbackNamespace\n\t/app/node-healthcheck-operator/controllers/resources/templates.go:98\ngithub.com/medik8s/node-healthcheck-operator/controllers/resources.(*manager).getTemplate\n\t/app/node-healthcheck-operator/controllers/resources/templates.go:80\ngithub.com/medik8s/node-healthcheck-operator/controllers/resources.(*manager).ValidateTemplates\n\t/app/node-healthcheck-operator/controllers/resources/templates.go:120\ngithub.com/medik8s/node-healthcheck-operator/controllers.(*NodeHealthCheckReconciler).Reconcile\n\t/app/node-healthcheck-operator/controllers/nodehealthcheck_controller.go:200\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Reconcile\n\t/app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:119\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler\n\t/app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:340\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem\n\t/app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:300\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.1\n\t/app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:202\nruntime.goexit\n\t/usr/lib/golang/src/runtime/asm_amd64.s:1693"}], "requeue": false, "requeuAfter": "0s"} 2026-02-02T07:27:21.119508976Z ERROR Reconciler error {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "NodeHealthCheck": {"name":"nhc-worker-test"}, "namespace": "", "name": "nhc-worker-test", "reconcileID": "e3b77331-b0b8-4fd5-8930-a9c15736d880", "error": "failed to get external remediation template openshift-workload-availability/selfnoderemediationtemplate-sample: selfnoderemediationtemplates.self-node-remediation.medik8s.io \"selfnoderemediationtemplate-sample\" is forbidden: User \"system:serviceaccount:openshift-workload-availability:node-healthcheck-controller-manager\" cannot get resource \"selfnoderemediationtemplates\" in API group \"self-node-remediation.medik8s.io\" in the namespace \"openshift-workload-availability\"", "errorCauses": [{"error": "failed to get external remediation template openshift-workload-availability/selfnoderemediationtemplate-sample: selfnoderemediationtemplates.self-node-remediation.medik8s.io \"selfnoderemediationtemplate-sample\" is forbidden: User \"system:serviceaccount:openshift-workload-availability:node-healthcheck-controller-manager\" cannot get resource \"selfnoderemediationtemplates\" in API group \"self-node-remediation.medik8s.io\" in the namespace \"openshift-workload-availability\"", "errorVerbose": "selfnoderemediationtemplates.self-node-remediation.medik8s.io \"selfnoderemediationtemplate-sample\" is forbidden: User \"system:serviceaccount:openshift-workload-availability:node-healthcheck-controller-manager\" cannot get resource \"selfnoderemediationtemplates\" in API group \"self-node-remediation.medik8s.io\" in the namespace \"openshift-workload-availability\"\nfailed to get external remediation template openshift-workload-availability/selfnoderemediationtemplate-sample\ngithub.com/medik8s/node-healthcheck-operator/controllers/resources.(*manager).getTemplateWithFallbackNamespace\n\t/app/node-healthcheck-operator/controllers/resources/templates.go:98\ngithub.com/medik8s/node-healthcheck-operator/controllers/resources.(*manager).getTemplate\n\t/app/node-healthcheck-operator/controllers/resources/templates.go:80\ngithub.com/medik8s/node-healthcheck-operator/controllers/resources.(*manager).ValidateTemplates\n\t/app/node-healthcheck-operator/controllers/resources/templates.go:120\ngithub.com/medik8s/node-healthcheck-operator/controllers.(*NodeHealthCheckReconciler).Reconcile\n\t/app/node-healthcheck-operator/controllers/nodehealthcheck_controller.go:200\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Reconcile\n\t/app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:119\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler\n\t/app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:340\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem\n\t/app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:300\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.1\n\t/app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:202\nruntime.goexit\n\t/usr/lib/golang/src/runtime/asm_amd64.s:1693"}]} sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler /app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:353 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem /app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:300 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.1 /app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:202 2026-02-02T07:27:21.125327787Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T07:27:21.129667229Z INFO Starting EventSource {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "source": "kind source: *unstructured.Unstructured"} 2026-02-02T07:27:21.129734726Z INFO controllers.NodeHealthCheck.WatchManager added watch for remediation template CRs {"kind": "SelfNodeRemediationTemplate"} 2026-02-02T07:27:21.129753784Z INFO Starting EventSource {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "source": "kind source: *unstructured.Unstructured"} 2026-02-02T07:27:21.129775163Z INFO controllers.NodeHealthCheck.WatchManager added watch for remediation CRs {"kind": "SelfNodeRemediation"} 2026-02-02T07:27:21.133732077Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} time="2026-02-02T07:27:21Z" level=info msg="invalidating lease" time="2026-02-02T07:27:21Z" level=info msg="getting lease" 2026-02-02T07:27:21.235614944Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-1"} time="2026-02-02T07:27:21Z" level=info msg="invalidating lease" time="2026-02-02T07:27:21Z" level=info msg="getting lease" 2026-02-02T07:27:21.238597597Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-worker-test", "new status": {"observedNodes":2,"healthyNodes":2,"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2026-02-02T07:03:56Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."},{"type":"StormCooldownActive","status":"True","lastTransitionTime":"2026-02-02T07:16:51Z","reason":"HealthyNodeThresholdChange","message":"Storm cooldown delay started - waiting before resuming normal remediation"},{"type":"StormActive","status":"True","lastTransitionTime":"2026-02-02T07:13:04Z","reason":"HealthyNodeThresholdChange","message":"Storm mode is activated - preventing any new remediation until the storm is over and cooldown duration expired"}],"phase":"Enabled","reason":"NHC is enabled, no ongoing remediation","lastUpdateTime":"2026-02-02T07:27:20Z"}, "patch": "{\"status\":{\"healthyNodes\":2,\"observedNodes\":2}}"} 2026-02-02T07:27:21.445858756Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "0s"} 2026-02-02T07:28:34.490147405Z INFO adding NHC to reconcile queue for handling node {"node": "worker-1", "NHC": "nhc-worker-test"} 2026-02-02T07:28:34.490205242Z INFO adding NHC to reconcile queue for handling node {"node": "worker-1", "NHC": "nhc-worker-test"} 2026-02-02T07:28:34.490247247Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T07:28:34.497693254Z INFO controllers.NodeHealthCheck Node is going to match unhealthy condition {"node": "worker-1", "condition type": "Ready", "condition status": "Unknown", "duration left": "29.502310548s"} 2026-02-02T07:28:34.508321613Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} time="2026-02-02T07:28:34Z" level=info msg="invalidating lease" time="2026-02-02T07:28:34Z" level=info msg="getting lease" 2026-02-02T07:28:34.51291718Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-worker-test", "new status": {"observedNodes":2,"healthyNodes":1,"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2026-02-02T07:03:56Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."},{"type":"StormCooldownActive","status":"True","lastTransitionTime":"2026-02-02T07:16:51Z","reason":"HealthyNodeThresholdChange","message":"Storm cooldown delay started - waiting before resuming normal remediation"},{"type":"StormActive","status":"True","lastTransitionTime":"2026-02-02T07:13:04Z","reason":"HealthyNodeThresholdChange","message":"Storm mode is activated - preventing any new remediation until the storm is over and cooldown duration expired"}],"phase":"Enabled","reason":"NHC is enabled, no ongoing remediation","lastUpdateTime":"2026-02-02T07:27:21Z"}, "patch": "{\"status\":{\"healthyNodes\":1}}"} 2026-02-02T07:28:34.722676053Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "30.502310548s"} 2026-02-02T07:28:39.97786607Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0", "NHC": "nhc-worker-test"} 2026-02-02T07:28:39.977950147Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0", "NHC": "nhc-worker-test"} 2026-02-02T07:28:39.978005785Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T07:28:39.986991123Z INFO controllers.NodeHealthCheck Node is going to match unhealthy condition {"node": "worker-0", "condition type": "Ready", "condition status": "Unknown", "duration left": "30.013011717s"} 2026-02-02T07:28:39.987035833Z INFO controllers.NodeHealthCheck Node is going to match unhealthy condition {"node": "worker-1", "condition type": "Ready", "condition status": "Unknown", "duration left": "24.012964652s"} 2026-02-02T07:28:40.008511241Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-worker-test", "new status": {"observedNodes":2,"healthyNodes":0,"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2026-02-02T07:03:56Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."},{"type":"StormCooldownActive","status":"True","lastTransitionTime":"2026-02-02T07:16:51Z","reason":"HealthyNodeThresholdChange","message":"Storm cooldown delay started - waiting before resuming normal remediation"},{"type":"StormActive","status":"True","lastTransitionTime":"2026-02-02T07:13:04Z","reason":"HealthyNodeThresholdChange","message":"Storm mode is activated - preventing any new remediation until the storm is over and cooldown duration expired"}],"phase":"Enabled","reason":"NHC is enabled, no ongoing remediation","lastUpdateTime":"2026-02-02T07:28:34Z"}, "patch": "{\"status\":{\"healthyNodes\":0}}"} 2026-02-02T07:28:40.217698751Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "25.012964652s"} 2026-02-02T07:29:05.225554956Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T07:29:05.229555413Z INFO controllers.NodeHealthCheck Node is going to match unhealthy condition {"node": "worker-0", "condition type": "Ready", "condition status": "Unknown", "duration left": "4.770447103s"} 2026-02-02T07:29:05.229596565Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-1", "condition type": "Ready", "condition status": "Unknown"} 2026-02-02T07:29:05.22974954Z DEBUG events [remediation] Node matches unhealthy condition. Node "worker-1", condition type "Ready", condition status "Unknown" {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"4309283"}, "reason": "DetectedUnhealthy"} 2026-02-02T07:29:05.234201557Z INFO controllers.NodeHealthCheck Skipped remediation because the number of healthy nodes selected by the selector is 0 and should equal or exceed 1 {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T07:29:05.234323167Z DEBUG events [remediation] Skipped remediation because the number of healthy nodes selected by the selector is 0 and should equal or exceed 1 {"type": "Warning", "object": {"kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"4309283"}, "reason": "RemediationSkipped"} 2026-02-02T07:29:05.234436655Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-worker-test", "new status": {"observedNodes":2,"healthyNodes":0,"unhealthyNodes":[{"name":"worker-1"}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2026-02-02T07:03:56Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."},{"type":"StormCooldownActive","status":"False","lastTransitionTime":"2026-02-02T07:29:05Z","reason":"HealthyNodeThresholdChange","message":"Cooldown cleared - storm threshold triggered"},{"type":"StormActive","status":"True","lastTransitionTime":"2026-02-02T07:13:04Z","reason":"HealthyNodeThresholdChange","message":"Storm mode is activated - preventing any new remediation until the storm is over and cooldown duration expired"}],"phase":"Enabled","reason":"NHC is enabled, no ongoing remediation","lastUpdateTime":"2026-02-02T07:28:40Z"}, "patch": "{\"status\":{\"conditions\":[{\"lastTransitionTime\":\"2026-02-02T07:03:56Z\",\"message\":\"No issues found, NodeHealthCheck is enabled.\",\"reason\":\"NodeHealthCheckEnabled\",\"status\":\"False\",\"type\":\"Disabled\"},{\"lastTransitionTime\":\"2026-02-02T07:29:05Z\",\"message\":\"Cooldown cleared - storm threshold triggered\",\"reason\":\"HealthyNodeThresholdChange\",\"status\":\"False\",\"type\":\"StormCooldownActive\"},{\"lastTransitionTime\":\"2026-02-02T07:13:04Z\",\"message\":\"Storm mode is activated - preventing any new remediation until the storm is over and cooldown duration expired\",\"reason\":\"HealthyNodeThresholdChange\",\"status\":\"True\",\"type\":\"StormActive\"}],\"unhealthyNodes\":[{\"name\":\"worker-1\"}]}}"} 2026-02-02T07:29:05.44017795Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "5.770447103s"} 2026-02-02T07:29:11.211460121Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T07:29:11.215403527Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0", "condition type": "Ready", "condition status": "Unknown"} 2026-02-02T07:29:11.215459693Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-1", "condition type": "Ready", "condition status": "Unknown"} 2026-02-02T07:29:11.215580278Z DEBUG events [remediation] Node matches unhealthy condition. Node "worker-0", condition type "Ready", condition status "Unknown" {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"4309967"}, "reason": "DetectedUnhealthy"} 2026-02-02T07:29:11.215614583Z DEBUG events [remediation] Node matches unhealthy condition. Node "worker-1", condition type "Ready", condition status "Unknown" {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"4309967"}, "reason": "DetectedUnhealthy"} 2026-02-02T07:29:11.21961558Z INFO controllers.NodeHealthCheck Skipped remediation because the number of healthy nodes selected by the selector is 0 and should equal or exceed 1 {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T07:29:11.21974233Z DEBUG events [remediation] Skipped remediation because the number of healthy nodes selected by the selector is 0 and should equal or exceed 1 {"type": "Warning", "object": {"kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"4309967"}, "reason": "RemediationSkipped"} 2026-02-02T07:29:11.219830299Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-worker-test", "new status": {"observedNodes":2,"healthyNodes":0,"unhealthyNodes":[{"name":"worker-1"},{"name":"worker-0"}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2026-02-02T07:03:56Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."},{"type":"StormCooldownActive","status":"False","lastTransitionTime":"2026-02-02T07:29:05Z","reason":"HealthyNodeThresholdChange","message":"Cooldown cleared - storm threshold triggered"},{"type":"StormActive","status":"True","lastTransitionTime":"2026-02-02T07:13:04Z","reason":"HealthyNodeThresholdChange","message":"Storm mode is activated - preventing any new remediation until the storm is over and cooldown duration expired"}],"phase":"Enabled","reason":"NHC is enabled, no ongoing remediation","lastUpdateTime":"2026-02-02T07:29:05Z"}, "patch": "{\"status\":{\"unhealthyNodes\":[{\"name\":\"worker-1\"},{\"name\":\"worker-0\"}]}}"} 2026-02-02T07:29:11.426002125Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "0s"} 2026-02-02T07:33:51.074848905Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0", "NHC": "nhc-worker-test"} 2026-02-02T07:33:51.074958696Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0", "NHC": "nhc-worker-test"} 2026-02-02T07:33:51.075022205Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T07:33:51.0803388Z INFO controllers.NodeHealthCheck Node is going to match unhealthy condition {"node": "worker-0", "condition type": "Ready", "condition status": "False", "duration left": "34.919662536s"} 2026-02-02T07:33:51.080390758Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-1", "condition type": "Ready", "condition status": "Unknown"} 2026-02-02T07:33:51.080501442Z DEBUG events [remediation] Node matches unhealthy condition. Node "worker-1", condition type "Ready", condition status "Unknown" {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"4309988"}, "reason": "DetectedUnhealthy"} 2026-02-02T07:33:51.08732442Z INFO controllers.NodeHealthCheck Ignoring node, because it was unhealthy, and is likely to be unhealthy again. {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} 2026-02-02T07:33:51.087380046Z INFO controllers.NodeHealthCheck Skipped remediation because the number of healthy nodes selected by the selector is 0 and should equal or exceed 1 {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T07:33:51.087484299Z DEBUG events [remediation] Skipped remediation because the number of healthy nodes selected by the selector is 0 and should equal or exceed 1 {"type": "Warning", "object": {"kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"4309988"}, "reason": "RemediationSkipped"} 2026-02-02T07:33:51.08776461Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "35.919662536s"} 2026-02-02T07:33:51.377244154Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T07:33:51.3817239Z INFO controllers.NodeHealthCheck Node is going to match unhealthy condition {"node": "worker-0", "condition type": "Ready", "condition status": "False", "duration left": "34.61827857s"} 2026-02-02T07:33:51.381765724Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-1", "condition type": "Ready", "condition status": "Unknown"} 2026-02-02T07:33:51.38187972Z DEBUG events [remediation] Node matches unhealthy condition. Node "worker-1", condition type "Ready", condition status "Unknown" {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"4312019"}, "reason": "DetectedUnhealthy"} 2026-02-02T07:33:51.386345624Z INFO controllers.NodeHealthCheck Ignoring node, because it was unhealthy, and is likely to be unhealthy again. {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} 2026-02-02T07:33:51.386382913Z INFO controllers.NodeHealthCheck The cluster regained health after the storm, a cooldown period now begins as a safety measure before normal operations resume. {"nhc": "nhc-worker-test"} 2026-02-02T07:33:51.386432534Z INFO controllers.NodeHealthCheck Storm recovery active: skipping creation of new remediations 2026-02-02T07:33:51.386527844Z DEBUG events [remediation] Storm recovery mode will exit after cooldown delay {"type": "Warning", "object": {"kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"4312019"}, "reason": "StormRecoveryCooldownStarted"} 2026-02-02T07:33:51.386582711Z DEBUG events [remediation] Storm recovery active: skipping creation of new remediations {"type": "Warning", "object": {"kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"4312019"}, "reason": "RemediationSkipped"} 2026-02-02T07:33:51.38673902Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-worker-test", "new status": {"observedNodes":2,"healthyNodes":0,"unhealthyNodes":[{"name":"worker-1"},{"name":"worker-0"}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2026-02-02T07:03:56Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."},{"type":"StormCooldownActive","status":"True","lastTransitionTime":"2026-02-02T07:33:51Z","reason":"HealthyNodeThresholdChange","message":"Storm cooldown delay started - waiting before resuming normal remediation"},{"type":"StormActive","status":"True","lastTransitionTime":"2026-02-02T07:13:04Z","reason":"HealthyNodeThresholdChange","message":"Storm mode is activated - preventing any new remediation until the storm is over and cooldown duration expired"}],"phase":"Enabled","reason":"NHC is enabled, no ongoing remediation","lastUpdateTime":"2026-02-02T07:29:11Z"}, "patch": "{\"status\":{\"conditions\":[{\"lastTransitionTime\":\"2026-02-02T07:03:56Z\",\"message\":\"No issues found, NodeHealthCheck is enabled.\",\"reason\":\"NodeHealthCheckEnabled\",\"status\":\"False\",\"type\":\"Disabled\"},{\"lastTransitionTime\":\"2026-02-02T07:33:51Z\",\"message\":\"Storm cooldown delay started - waiting before resuming normal remediation\",\"reason\":\"HealthyNodeThresholdChange\",\"status\":\"True\",\"type\":\"StormCooldownActive\"},{\"lastTransitionTime\":\"2026-02-02T07:13:04Z\",\"message\":\"Storm mode is activated - preventing any new remediation until the storm is over and cooldown duration expired\",\"reason\":\"HealthyNodeThresholdChange\",\"status\":\"True\",\"type\":\"StormActive\"}]}}"} 2026-02-02T07:33:51.501165341Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0", "NHC": "nhc-worker-test"} 2026-02-02T07:33:51.50123153Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0", "NHC": "nhc-worker-test"} 2026-02-02T07:33:51.593104209Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "35.61827857s"} 2026-02-02T07:33:51.593215852Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T07:33:51.598914774Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-1", "condition type": "Ready", "condition status": "Unknown"} 2026-02-02T07:33:51.599161757Z DEBUG events [remediation] Node matches unhealthy condition. Node "worker-1", condition type "Ready", condition status "Unknown" {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"4312022"}, "reason": "DetectedUnhealthy"} 2026-02-02T07:33:51.606001817Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} time="2026-02-02T07:33:51Z" level=info msg="invalidating lease" time="2026-02-02T07:33:51Z" level=info msg="getting lease" 2026-02-02T07:33:51.706970512Z INFO controllers.NodeHealthCheck Storm recovery active: skipping creation of new remediations 2026-02-02T07:33:51.707106564Z DEBUG events [remediation] Storm recovery active: skipping creation of new remediations {"type": "Warning", "object": {"kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"4312022"}, "reason": "RemediationSkipped"} 2026-02-02T07:33:51.707226863Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-worker-test", "new status": {"observedNodes":2,"healthyNodes":1,"unhealthyNodes":[{"name":"worker-1"}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2026-02-02T07:03:56Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."},{"type":"StormCooldownActive","status":"True","lastTransitionTime":"2026-02-02T07:33:51Z","reason":"HealthyNodeThresholdChange","message":"Storm cooldown delay started - waiting before resuming normal remediation"},{"type":"StormActive","status":"True","lastTransitionTime":"2026-02-02T07:13:04Z","reason":"HealthyNodeThresholdChange","message":"Storm mode is activated - preventing any new remediation until the storm is over and cooldown duration expired"}],"phase":"Enabled","reason":"NHC is enabled, no ongoing remediation","lastUpdateTime":"2026-02-02T07:33:51Z"}, "patch": "{\"status\":{\"healthyNodes\":1,\"unhealthyNodes\":[{\"name\":\"worker-1\"}]}}"} 2026-02-02T07:33:51.913660426Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "1m0.293031256s"} 2026-02-02T07:34:27.008070426Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T07:34:27.012890747Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-1", "condition type": "Ready", "condition status": "Unknown"} 2026-02-02T07:34:27.013043182Z DEBUG events [remediation] Node matches unhealthy condition. Node "worker-1", condition type "Ready", condition status "Unknown" {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"4312118"}, "reason": "DetectedUnhealthy"} 2026-02-02T07:34:27.02335314Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} time="2026-02-02T07:34:27Z" level=info msg="invalidating lease" time="2026-02-02T07:34:27Z" level=info msg="getting lease" 2026-02-02T07:34:27.121127785Z INFO controllers.NodeHealthCheck Storm recovery active: skipping creation of new remediations 2026-02-02T07:34:27.121251804Z DEBUG events [remediation] Storm recovery active: skipping creation of new remediations {"type": "Warning", "object": {"kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"4312118"}, "reason": "RemediationSkipped"} 2026-02-02T07:34:27.121406044Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "24.878874197s"} 2026-02-02T07:34:52.001123081Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T07:34:52.005641241Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-1", "condition type": "Ready", "condition status": "Unknown"} 2026-02-02T07:34:52.005836382Z DEBUG events [remediation] Node matches unhealthy condition. Node "worker-1", condition type "Ready", condition status "Unknown" {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"4312118"}, "reason": "DetectedUnhealthy"} 2026-02-02T07:34:52.010344915Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} time="2026-02-02T07:34:52Z" level=info msg="invalidating lease" time="2026-02-02T07:34:52Z" level=info msg="getting lease" 2026-02-02T07:34:52.111844549Z INFO controllers.NodeHealthCheck Storm recovery mode deactivated {"nhc": "nhc-worker-test"} 2026-02-02T07:34:52.1118936Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-1"} 2026-02-02T07:34:52.111916943Z DEBUG events [remediation] Storm recovery mode deactivated - normal remediation resumed {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"4312118"}, "reason": "StormRecoveryEnded"} 2026-02-02T07:34:52.211535153Z INFO controllers.NodeHealthCheck.resource manager Attempting to obtain Node Lease {"Node name": "worker-1"} time="2026-02-02T07:34:52Z" level=info msg="request lease" time="2026-02-02T07:34:52Z" level=info msg="getting lease" time="2026-02-02T07:34:52Z" level=info msg="create lease" 2026-02-02T07:34:52.215161004Z INFO controllers.NodeHealthCheck.resource manager Creating a remediation CR {"CR name": "", "CR kind": "SelfNodeRemediation", "namespace": "openshift-workload-availability"} 2026-02-02T07:34:52.222945528Z DEBUG events [remediation] Created remediation object for node worker-1 {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"4312118"}, "reason": "RemediationCreated"} 2026-02-02T07:34:52.312164525Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-worker-test", "new status": {"observedNodes":2,"healthyNodes":1,"unhealthyNodes":[{"name":"worker-1","remediations":[{"resource":{"kind":"SelfNodeRemediation","namespace":"openshift-workload-availability","name":"worker-1-bpt6g","uid":"b11f9261-b3b8-4065-829e-2ea1f13d2b09","apiVersion":"self-node-remediation.medik8s.io/v1alpha1"},"started":"2026-02-02T07:34:52Z","templateName":"selfnoderemediationtemplate-sample"}]}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2026-02-02T07:03:56Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."},{"type":"StormCooldownActive","status":"False","lastTransitionTime":"2026-02-02T07:34:52Z","reason":"HealthyNodeThresholdChange","message":"Storm cooldown completed"},{"type":"StormActive","status":"False","lastTransitionTime":"2026-02-02T07:34:52Z","reason":"HealthyNodeThresholdChange","message":"Storm mode is deactivated, remediation can occur normally"}],"phase":"Remediating","reason":"NHC is remediating 1 nodes","lastUpdateTime":"2026-02-02T07:33:51Z"}, "patch": "{\"status\":{\"conditions\":[{\"lastTransitionTime\":\"2026-02-02T07:03:56Z\",\"message\":\"No issues found, NodeHealthCheck is enabled.\",\"reason\":\"NodeHealthCheckEnabled\",\"status\":\"False\",\"type\":\"Disabled\"},{\"lastTransitionTime\":\"2026-02-02T07:34:52Z\",\"message\":\"Storm cooldown completed\",\"reason\":\"HealthyNodeThresholdChange\",\"status\":\"False\",\"type\":\"StormCooldownActive\"},{\"lastTransitionTime\":\"2026-02-02T07:34:52Z\",\"message\":\"Storm mode is deactivated, remediation can occur normally\",\"reason\":\"HealthyNodeThresholdChange\",\"status\":\"False\",\"type\":\"StormActive\"}],\"phase\":\"Remediating\",\"reason\":\"NHC is remediating 1 nodes\",\"unhealthyNodes\":[{\"name\":\"worker-1\",\"remediations\":[{\"resource\":{\"apiVersion\":\"self-node-remediation.medik8s.io/v1alpha1\",\"kind\":\"SelfNodeRemediation\",\"name\":\"worker-1-bpt6g\",\"namespace\":\"openshift-workload-availability\",\"uid\":\"b11f9261-b3b8-4065-829e-2ea1f13d2b09\"},\"started\":\"2026-02-02T07:34:52Z\",\"templateName\":\"selfnoderemediationtemplate-sample\"}]}]}}"} 2026-02-02T07:34:52.433617746Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","controller":false}} 2026-02-02T07:34:52.433672792Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-worker-test", "Remediation CR Name": "worker-1-bpt6g", "Remediation CR Kind": "SelfNodeRemediation"} 2026-02-02T07:34:52.433683055Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","controller":false}} 2026-02-02T07:34:52.433687068Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-worker-test", "Remediation CR Name": "worker-1-bpt6g", "Remediation CR Kind": "SelfNodeRemediation"} 2026-02-02T07:34:52.438245752Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","controller":false}} 2026-02-02T07:34:52.438284233Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-worker-test", "Remediation CR Name": "worker-1-bpt6g", "Remediation CR Kind": "SelfNodeRemediation"} 2026-02-02T07:34:52.438295204Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","controller":false}} 2026-02-02T07:34:52.438299413Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-worker-test", "Remediation CR Name": "worker-1-bpt6g", "Remediation CR Kind": "SelfNodeRemediation"} 2026-02-02T07:34:52.458528326Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","controller":false}} 2026-02-02T07:34:52.458571345Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-worker-test", "Remediation CR Name": "worker-1-bpt6g", "Remediation CR Kind": "SelfNodeRemediation"} 2026-02-02T07:34:52.458596755Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","controller":false}} 2026-02-02T07:34:52.458603301Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-worker-test", "Remediation CR Name": "worker-1-bpt6g", "Remediation CR Kind": "SelfNodeRemediation"} 2026-02-02T07:34:52.518586245Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "10m0s"} 2026-02-02T07:34:52.518666971Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T07:34:52.522662132Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-1", "condition type": "Ready", "condition status": "Unknown"} 2026-02-02T07:34:52.526716785Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-worker-test", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2026-02-02T07:34:57Z"} 2026-02-02T07:34:52.526738884Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} time="2026-02-02T07:34:52Z" level=info msg="invalidating lease" time="2026-02-02T07:34:52Z" level=info msg="getting lease" 2026-02-02T07:34:52.528503897Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-1"} 2026-02-02T07:34:52.532015266Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-1-bpt6g", "kind": "SelfNodeRemediation", "namespace": "openshift-workload-availability"} time="2026-02-02T07:34:52Z" level=info msg="getting lease" 2026-02-02T07:34:52.532082169Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-worker-test", "lease name": "node-worker-1", "NHC is lease owner": true, "lease expiration time": "10m0s"} 2026-02-02T07:34:52.534172919Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "10m0s"} 2026-02-02T07:35:55.841397638Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0", "NHC": "nhc-worker-test"} 2026-02-02T07:35:55.841463728Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0", "NHC": "nhc-worker-test"} 2026-02-02T07:35:55.841518026Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T07:35:55.848423931Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-1", "condition type": "Ready", "condition status": "Unknown"} 2026-02-02T07:35:55.848456597Z INFO controllers.NodeHealthCheck Node is going to match unhealthy condition {"node": "worker-0", "condition type": "Ready", "condition status": "Unknown", "duration left": "30.151544226s"} 2026-02-02T07:35:55.861536584Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-worker-test", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2026-02-02T07:34:57Z"} 2026-02-02T07:35:55.861571899Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-1"} 2026-02-02T07:35:55.962886226Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-1-bpt6g", "kind": "SelfNodeRemediation", "namespace": "openshift-workload-availability"} time="2026-02-02T07:35:55Z" level=info msg="getting lease" 2026-02-02T07:35:55.963061143Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-worker-test", "lease name": "node-worker-1", "NHC is lease owner": true, "lease expiration time": "10m0s"} time="2026-02-02T07:35:55Z" level=info msg="request lease" time="2026-02-02T07:35:55Z" level=info msg="getting lease" time="2026-02-02T07:35:55Z" level=info msg="renew lease owned by NodeHealthCheck-nhc-worker-test setAcquireTime=false" 2026-02-02T07:35:56.064534009Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-worker-test", "new status": {"observedNodes":2,"healthyNodes":0,"unhealthyNodes":[{"name":"worker-1","remediations":[{"resource":{"kind":"SelfNodeRemediation","namespace":"openshift-workload-availability","name":"worker-1-bpt6g","uid":"b11f9261-b3b8-4065-829e-2ea1f13d2b09","apiVersion":"self-node-remediation.medik8s.io/v1alpha1"},"started":"2026-02-02T07:34:52Z","templateName":"selfnoderemediationtemplate-sample"}]}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2026-02-02T07:03:56Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."},{"type":"StormCooldownActive","status":"False","lastTransitionTime":"2026-02-02T07:34:52Z","reason":"HealthyNodeThresholdChange","message":"Storm cooldown completed"},{"type":"StormActive","status":"False","lastTransitionTime":"2026-02-02T07:34:52Z","reason":"HealthyNodeThresholdChange","message":"Storm mode is deactivated, remediation can occur normally"}],"phase":"Remediating","reason":"NHC is remediating 1 nodes","lastUpdateTime":"2026-02-02T07:34:52Z"}, "patch": "{\"status\":{\"healthyNodes\":0}}"} 2026-02-02T07:35:56.277439881Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "31.151544226s"} 2026-02-02T07:36:27.429572982Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T07:36:27.433011211Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-1", "condition type": "Ready", "condition status": "Unknown"} 2026-02-02T07:36:27.433095311Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0", "condition type": "Ready", "condition status": "Unknown"} 2026-02-02T07:36:27.433225265Z DEBUG events [remediation] Node matches unhealthy condition. Node "worker-0", condition type "Ready", condition status "Unknown" {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"4314355"}, "reason": "DetectedUnhealthy"} 2026-02-02T07:36:27.437852366Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-worker-test", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2026-02-02T07:34:57Z"} 2026-02-02T07:36:27.437875256Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-1"} 2026-02-02T07:36:27.440989398Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-1-bpt6g", "kind": "SelfNodeRemediation", "namespace": "openshift-workload-availability"} time="2026-02-02T07:36:27Z" level=info msg="getting lease" 2026-02-02T07:36:27.441064359Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-worker-test", "lease name": "node-worker-1", "NHC is lease owner": true, "lease expiration time": "10m0s"} 2026-02-02T07:36:27.442734475Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} 2026-02-02T07:36:27.445996968Z INFO controllers.NodeHealthCheck.resource manager Attempting to obtain Node Lease {"Node name": "worker-0"} time="2026-02-02T07:36:27Z" level=info msg="request lease" time="2026-02-02T07:36:27Z" level=info msg="getting lease" time="2026-02-02T07:36:27Z" level=info msg="create lease" 2026-02-02T07:36:27.450662157Z INFO controllers.NodeHealthCheck.resource manager Creating a remediation CR {"CR name": "", "CR kind": "SelfNodeRemediation", "namespace": "openshift-workload-availability"} 2026-02-02T07:36:27.454836418Z DEBUG events [remediation] Created remediation object for node worker-0 {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"4314355"}, "reason": "RemediationCreated"} 2026-02-02T07:36:27.456933919Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-worker-test", "new status": {"observedNodes":2,"healthyNodes":0,"unhealthyNodes":[{"name":"worker-1","remediations":[{"resource":{"kind":"SelfNodeRemediation","namespace":"openshift-workload-availability","name":"worker-1-bpt6g","uid":"b11f9261-b3b8-4065-829e-2ea1f13d2b09","apiVersion":"self-node-remediation.medik8s.io/v1alpha1"},"started":"2026-02-02T07:34:52Z","templateName":"selfnoderemediationtemplate-sample"}]},{"name":"worker-0","remediations":[{"resource":{"kind":"SelfNodeRemediation","namespace":"openshift-workload-availability","name":"worker-0-gc4nq","uid":"cdc4efd5-cea0-46cc-a6dd-d8f0d2a1bd39","apiVersion":"self-node-remediation.medik8s.io/v1alpha1"},"started":"2026-02-02T07:36:27Z","templateName":"selfnoderemediationtemplate-sample"}]}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2026-02-02T07:03:56Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."},{"type":"StormCooldownActive","status":"False","lastTransitionTime":"2026-02-02T07:34:52Z","reason":"HealthyNodeThresholdChange","message":"Storm cooldown completed"},{"type":"StormActive","status":"False","lastTransitionTime":"2026-02-02T07:34:52Z","reason":"HealthyNodeThresholdChange","message":"Storm mode is deactivated, remediation can occur normally"}],"phase":"Remediating","reason":"NHC is remediating 2 nodes","lastUpdateTime":"2026-02-02T07:35:56Z"}, "patch": "{\"status\":{\"reason\":\"NHC is remediating 2 nodes\",\"unhealthyNodes\":[{\"name\":\"worker-1\",\"remediations\":[{\"resource\":{\"apiVersion\":\"self-node-remediation.medik8s.io/v1alpha1\",\"kind\":\"SelfNodeRemediation\",\"name\":\"worker-1-bpt6g\",\"namespace\":\"openshift-workload-availability\",\"uid\":\"b11f9261-b3b8-4065-829e-2ea1f13d2b09\"},\"started\":\"2026-02-02T07:34:52Z\",\"templateName\":\"selfnoderemediationtemplate-sample\"}]},{\"name\":\"worker-0\",\"remediations\":[{\"resource\":{\"apiVersion\":\"self-node-remediation.medik8s.io/v1alpha1\",\"kind\":\"SelfNodeRemediation\",\"name\":\"worker-0-gc4nq\",\"namespace\":\"openshift-workload-availability\",\"uid\":\"cdc4efd5-cea0-46cc-a6dd-d8f0d2a1bd39\"},\"started\":\"2026-02-02T07:36:27Z\",\"templateName\":\"selfnoderemediationtemplate-sample\"}]}]}}"} 2026-02-02T07:36:27.662549791Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "10m0s"} 2026-02-02T07:37:59.552266965Z INFO adding NHC to reconcile queue for handling node {"node": "worker-1", "NHC": "nhc-worker-test"} 2026-02-02T07:37:59.552346381Z INFO adding NHC to reconcile queue for handling node {"node": "worker-1", "NHC": "nhc-worker-test"} 2026-02-02T07:37:59.552399288Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T07:37:59.557420081Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0", "condition type": "Ready", "condition status": "Unknown"} 2026-02-02T07:37:59.557453439Z INFO controllers.NodeHealthCheck Node is going to match unhealthy condition {"node": "worker-1", "condition type": "Ready", "condition status": "False", "duration left": "35.442547318s"} 2026-02-02T07:37:59.563859763Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-worker-test", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2026-02-02T07:34:57Z"} 2026-02-02T07:37:59.563902442Z INFO controllers.NodeHealthCheck Ignoring node, because it was unhealthy, and is likely to be unhealthy again. {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-1"} 2026-02-02T07:37:59.56391012Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} 2026-02-02T07:37:59.571475595Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-gc4nq", "kind": "SelfNodeRemediation", "namespace": "openshift-workload-availability"} time="2026-02-02T07:37:59Z" level=info msg="getting lease" 2026-02-02T07:37:59.571558093Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-worker-test", "lease name": "node-worker-0", "NHC is lease owner": true, "lease expiration time": "10m0s"} time="2026-02-02T07:37:59Z" level=info msg="request lease" time="2026-02-02T07:37:59Z" level=info msg="getting lease" time="2026-02-02T07:37:59Z" level=info msg="renew lease owned by NodeHealthCheck-nhc-worker-test setAcquireTime=false" 2026-02-02T07:37:59.663681722Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "36.442547318s"} 2026-02-02T07:38:00.772684055Z INFO adding NHC to reconcile queue for handling node {"node": "worker-1", "NHC": "nhc-worker-test"} 2026-02-02T07:38:00.772751282Z INFO adding NHC to reconcile queue for handling node {"node": "worker-1", "NHC": "nhc-worker-test"} 2026-02-02T07:38:00.772801243Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T07:38:00.777504632Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0", "condition type": "Ready", "condition status": "Unknown"} 2026-02-02T07:38:00.797986504Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-worker-test", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2026-02-02T07:34:57Z"} 2026-02-02T07:38:00.798021082Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-1"} 2026-02-02T07:38:00.894920945Z INFO controllers.NodeHealthCheck.resource manager setting a delay for node getting healthy {"node name": "worker-1", "delay in seconds": 5} 2026-02-02T07:38:00.900268882Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","controller":false}} 2026-02-02T07:38:00.900318037Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-worker-test", "Remediation CR Name": "worker-1-bpt6g", "Remediation CR Kind": "SelfNodeRemediation"} 2026-02-02T07:38:00.900327923Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","controller":false}} 2026-02-02T07:38:00.900331637Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-worker-test", "Remediation CR Name": "worker-1-bpt6g", "Remediation CR Kind": "SelfNodeRemediation"} 2026-02-02T07:38:00.900537252Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} 2026-02-02T07:38:00.904367784Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-gc4nq", "kind": "SelfNodeRemediation", "namespace": "openshift-workload-availability"} time="2026-02-02T07:38:00Z" level=info msg="getting lease" 2026-02-02T07:38:00.904425338Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-worker-test", "lease name": "node-worker-0", "NHC is lease owner": true, "lease expiration time": "10m0s"} 2026-02-02T07:38:00.906593854Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-worker-test", "new status": {"observedNodes":2,"healthyNodes":0,"unhealthyNodes":[{"name":"worker-1","remediations":[{"resource":{"kind":"SelfNodeRemediation","namespace":"openshift-workload-availability","name":"worker-1-bpt6g","uid":"b11f9261-b3b8-4065-829e-2ea1f13d2b09","apiVersion":"self-node-remediation.medik8s.io/v1alpha1"},"started":"2026-02-02T07:34:52Z","templateName":"selfnoderemediationtemplate-sample"}],"conditionsHealthyTimestamp":"2026-02-02T07:38:00Z","healthyDelayed":true},{"name":"worker-0","remediations":[{"resource":{"kind":"SelfNodeRemediation","namespace":"openshift-workload-availability","name":"worker-0-gc4nq","uid":"cdc4efd5-cea0-46cc-a6dd-d8f0d2a1bd39","apiVersion":"self-node-remediation.medik8s.io/v1alpha1"},"started":"2026-02-02T07:36:27Z","templateName":"selfnoderemediationtemplate-sample"}]}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2026-02-02T07:03:56Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."},{"type":"StormCooldownActive","status":"False","lastTransitionTime":"2026-02-02T07:34:52Z","reason":"HealthyNodeThresholdChange","message":"Storm cooldown completed"},{"type":"StormActive","status":"False","lastTransitionTime":"2026-02-02T07:34:52Z","reason":"HealthyNodeThresholdChange","message":"Storm mode is deactivated, remediation can occur normally"}],"phase":"Remediating","reason":"NHC is remediating 2 nodes","lastUpdateTime":"2026-02-02T07:36:27Z"}, "patch": "{\"status\":{\"unhealthyNodes\":[{\"conditionsHealthyTimestamp\":\"2026-02-02T07:38:00Z\",\"healthyDelayed\":true,\"name\":\"worker-1\",\"remediations\":[{\"resource\":{\"apiVersion\":\"self-node-remediation.medik8s.io/v1alpha1\",\"kind\":\"SelfNodeRemediation\",\"name\":\"worker-1-bpt6g\",\"namespace\":\"openshift-workload-availability\",\"uid\":\"b11f9261-b3b8-4065-829e-2ea1f13d2b09\"},\"started\":\"2026-02-02T07:34:52Z\",\"templateName\":\"selfnoderemediationtemplate-sample\"}]},{\"name\":\"worker-0\",\"remediations\":[{\"resource\":{\"apiVersion\":\"self-node-remediation.medik8s.io/v1alpha1\",\"kind\":\"SelfNodeRemediation\",\"name\":\"worker-0-gc4nq\",\"namespace\":\"openshift-workload-availability\",\"uid\":\"cdc4efd5-cea0-46cc-a6dd-d8f0d2a1bd39\"},\"started\":\"2026-02-02T07:36:27Z\",\"templateName\":\"selfnoderemediationtemplate-sample\"}]}]}}"} 2026-02-02T07:38:01.112319599Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "6s"} 2026-02-02T07:38:01.112447001Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T07:38:01.122771101Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0", "condition type": "Ready", "condition status": "Unknown"} 2026-02-02T07:38:01.138745634Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-worker-test", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2026-02-02T07:34:57Z"} 2026-02-02T07:38:01.138777486Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-1"} 2026-02-02T07:38:01.140795786Z INFO controllers.NodeHealthCheck.resource manager delaying node getting healthy {"node name": "worker-1", "remaining time in seconds": 3.859205849} 2026-02-02T07:38:01.140820362Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} 2026-02-02T07:38:01.144993523Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-gc4nq", "kind": "SelfNodeRemediation", "namespace": "openshift-workload-availability"} time="2026-02-02T07:38:01Z" level=info msg="getting lease" 2026-02-02T07:38:01.145066635Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-worker-test", "lease name": "node-worker-0", "NHC is lease owner": true, "lease expiration time": "10m0s"} 2026-02-02T07:38:01.236305052Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "4.859205849s"} 2026-02-02T07:38:06.095879909Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T07:38:06.102354254Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0", "condition type": "Ready", "condition status": "Unknown"} 2026-02-02T07:38:06.107135514Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-worker-test", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2026-02-02T07:34:57Z"} 2026-02-02T07:38:06.107170184Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-1"} 2026-02-02T07:38:06.207677223Z INFO controllers.NodeHealthCheck.resource manager delaying for node getting healthy is done, about to remove the remediation CR {"node name": "worker-1"} 2026-02-02T07:38:06.214576288Z INFO controllers.NodeHealthCheck.resource manager deleted remediation CR {"name": "worker-1-bpt6g"} 2026-02-02T07:38:06.214606167Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} 2026-02-02T07:38:06.214790637Z DEBUG events [remediation] Deleted remediation CR of kind SelfNodeRemediation with name worker-1-bpt6g {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"4315493"}, "reason": "RemediationRemoved"} 2026-02-02T07:38:06.214969728Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","controller":false}} 2026-02-02T07:38:06.215015304Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-worker-test", "Remediation CR Name": "worker-1-bpt6g", "Remediation CR Kind": "SelfNodeRemediation"} 2026-02-02T07:38:06.215034541Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","controller":false}} 2026-02-02T07:38:06.215064149Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-worker-test", "Remediation CR Name": "worker-1-bpt6g", "Remediation CR Kind": "SelfNodeRemediation"} 2026-02-02T07:38:06.307924951Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-gc4nq", "kind": "SelfNodeRemediation", "namespace": "openshift-workload-availability"} time="2026-02-02T07:38:06Z" level=info msg="getting lease" 2026-02-02T07:38:06.308095172Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-worker-test", "lease name": "node-worker-0", "NHC is lease owner": true, "lease expiration time": "10m0s"} 2026-02-02T07:38:06.310220667Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "11s"} 2026-02-02T07:38:06.310295949Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T07:38:06.313382905Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0", "condition type": "Ready", "condition status": "Unknown"} 2026-02-02T07:38:06.316902564Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-1"} 2026-02-02T07:38:06.318625547Z INFO controllers.NodeHealthCheck.resource manager delaying for node getting healthy is done, about to remove the remediation CR {"node name": "worker-1"} 2026-02-02T07:38:06.320614596Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} 2026-02-02T07:38:06.32401083Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-gc4nq", "kind": "SelfNodeRemediation", "namespace": "openshift-workload-availability"} time="2026-02-02T07:38:06Z" level=info msg="getting lease" 2026-02-02T07:38:06.324071635Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-worker-test", "lease name": "node-worker-0", "NHC is lease owner": true, "lease expiration time": "10m0s"} 2026-02-02T07:38:06.325979321Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "17s"} 2026-02-02T07:38:17.31136136Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T07:38:17.315533507Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0", "condition type": "Ready", "condition status": "Unknown"} 2026-02-02T07:38:17.321408016Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-1"} 2026-02-02T07:38:17.323585397Z INFO controllers.NodeHealthCheck.resource manager delaying for node getting healthy is done, about to remove the remediation CR {"node name": "worker-1"} 2026-02-02T07:38:17.325301438Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} 2026-02-02T07:38:17.328565523Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-gc4nq", "kind": "SelfNodeRemediation", "namespace": "openshift-workload-availability"} time="2026-02-02T07:38:17Z" level=info msg="getting lease" 2026-02-02T07:38:17.328623759Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-worker-test", "lease name": "node-worker-0", "NHC is lease owner": true, "lease expiration time": "10m0s"} 2026-02-02T07:38:17.333560405Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "17s"} 2026-02-02T07:38:34.334530279Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T07:38:34.338777802Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0", "condition type": "Ready", "condition status": "Unknown"} 2026-02-02T07:38:34.343237036Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-1"} 2026-02-02T07:38:34.345192269Z INFO controllers.NodeHealthCheck.resource manager delaying for node getting healthy is done, about to remove the remediation CR {"node name": "worker-1"} 2026-02-02T07:38:34.347618183Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} 2026-02-02T07:38:34.351721586Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-gc4nq", "kind": "SelfNodeRemediation", "namespace": "openshift-workload-availability"} time="2026-02-02T07:38:34Z" level=info msg="getting lease" 2026-02-02T07:38:34.351830131Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-worker-test", "lease name": "node-worker-0", "NHC is lease owner": true, "lease expiration time": "10m0s"} 2026-02-02T07:38:34.353967371Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "17s"} 2026-02-02T07:38:51.354242487Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T07:38:51.357611408Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0", "condition type": "Ready", "condition status": "Unknown"} 2026-02-02T07:38:51.361801482Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-1"} 2026-02-02T07:38:51.363748093Z INFO controllers.NodeHealthCheck.resource manager delaying for node getting healthy is done, about to remove the remediation CR {"node name": "worker-1"} 2026-02-02T07:38:51.36580273Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} 2026-02-02T07:38:51.369559113Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-gc4nq", "kind": "SelfNodeRemediation", "namespace": "openshift-workload-availability"} time="2026-02-02T07:38:51Z" level=info msg="getting lease" 2026-02-02T07:38:51.369658151Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-worker-test", "lease name": "node-worker-0", "NHC is lease owner": true, "lease expiration time": "10m0s"} 2026-02-02T07:38:51.371828187Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "17s"} 2026-02-02T07:39:08.372439565Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T07:39:08.37630178Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0", "condition type": "Ready", "condition status": "Unknown"} 2026-02-02T07:39:08.38036787Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-1"} 2026-02-02T07:39:08.382378891Z INFO controllers.NodeHealthCheck.resource manager delaying for node getting healthy is done, about to remove the remediation CR {"node name": "worker-1"} 2026-02-02T07:39:08.384156646Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} 2026-02-02T07:39:08.387648323Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-gc4nq", "kind": "SelfNodeRemediation", "namespace": "openshift-workload-availability"} time="2026-02-02T07:39:08Z" level=info msg="getting lease" 2026-02-02T07:39:08.387714846Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-worker-test", "lease name": "node-worker-0", "NHC is lease owner": true, "lease expiration time": "10m0s"} time="2026-02-02T07:39:08Z" level=info msg="request lease" time="2026-02-02T07:39:08Z" level=info msg="getting lease" time="2026-02-02T07:39:08Z" level=info msg="renew lease owned by NodeHealthCheck-nhc-worker-test setAcquireTime=false" 2026-02-02T07:39:08.481208671Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "17s"} 2026-02-02T07:39:25.482041156Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T07:39:25.485473542Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0", "condition type": "Ready", "condition status": "Unknown"} 2026-02-02T07:39:25.490342046Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-1"} 2026-02-02T07:39:25.492659091Z INFO controllers.NodeHealthCheck.resource manager delaying for node getting healthy is done, about to remove the remediation CR {"node name": "worker-1"} 2026-02-02T07:39:25.49457432Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} 2026-02-02T07:39:25.497964995Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-gc4nq", "kind": "SelfNodeRemediation", "namespace": "openshift-workload-availability"} time="2026-02-02T07:39:25Z" level=info msg="getting lease" 2026-02-02T07:39:25.498037074Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-worker-test", "lease name": "node-worker-0", "NHC is lease owner": true, "lease expiration time": "10m0s"} 2026-02-02T07:39:25.500205142Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "17s"} 2026-02-02T07:39:42.501153695Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T07:39:42.504940573Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-0", "condition type": "Ready", "condition status": "Unknown"} 2026-02-02T07:39:42.508767429Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-1"} 2026-02-02T07:39:42.510731562Z INFO controllers.NodeHealthCheck.resource manager delaying for node getting healthy is done, about to remove the remediation CR {"node name": "worker-1"} 2026-02-02T07:39:42.512638637Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} 2026-02-02T07:39:42.516332017Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-0-gc4nq", "kind": "SelfNodeRemediation", "namespace": "openshift-workload-availability"} time="2026-02-02T07:39:42Z" level=info msg="getting lease" 2026-02-02T07:39:42.516388623Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-worker-test", "lease name": "node-worker-0", "NHC is lease owner": true, "lease expiration time": "10m0s"} 2026-02-02T07:39:42.518356206Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "17s"} 2026-02-02T07:39:46.317144776Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0", "NHC": "nhc-worker-test"} 2026-02-02T07:39:46.317198266Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0", "NHC": "nhc-worker-test"} 2026-02-02T07:39:46.317238847Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T07:39:46.322380072Z INFO controllers.NodeHealthCheck Node is going to match unhealthy condition {"node": "worker-0", "condition type": "Ready", "condition status": "False", "duration left": "34.677627463s"} 2026-02-02T07:39:46.329771274Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-1"} 2026-02-02T07:39:46.333351183Z INFO controllers.NodeHealthCheck.resource manager delaying for node getting healthy is done, about to remove the remediation CR {"node name": "worker-1"} 2026-02-02T07:39:46.338022833Z INFO controllers.NodeHealthCheck Ignoring node, because it was unhealthy, and is likely to be unhealthy again. {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} 2026-02-02T07:39:46.338329642Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "17s"} 2026-02-02T07:39:48.749421453Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0", "NHC": "nhc-worker-test"} 2026-02-02T07:39:48.74951274Z INFO adding NHC to reconcile queue for handling node {"node": "worker-0", "NHC": "nhc-worker-test"} 2026-02-02T07:39:48.749577793Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T07:39:48.765150491Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} 2026-02-02T07:39:48.864195728Z INFO controllers.NodeHealthCheck.resource manager setting a delay for node getting healthy {"node name": "worker-0", "delay in seconds": 5} 2026-02-02T07:39:48.870651189Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-1"} 2026-02-02T07:39:48.870694406Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","controller":false}} 2026-02-02T07:39:48.870749325Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-worker-test", "Remediation CR Name": "worker-0-gc4nq", "Remediation CR Kind": "SelfNodeRemediation"} 2026-02-02T07:39:48.870763735Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","controller":false}} 2026-02-02T07:39:48.870768031Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-worker-test", "Remediation CR Name": "worker-0-gc4nq", "Remediation CR Kind": "SelfNodeRemediation"} 2026-02-02T07:39:48.873410762Z INFO controllers.NodeHealthCheck.resource manager delaying for node getting healthy is done, about to remove the remediation CR {"node name": "worker-1"} 2026-02-02T07:39:48.876187115Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-worker-test", "new status": {"observedNodes":2,"healthyNodes":0,"unhealthyNodes":[{"name":"worker-1","remediations":[{"resource":{"kind":"SelfNodeRemediation","namespace":"openshift-workload-availability","name":"worker-1-bpt6g","uid":"b11f9261-b3b8-4065-829e-2ea1f13d2b09","apiVersion":"self-node-remediation.medik8s.io/v1alpha1"},"started":"2026-02-02T07:34:52Z","templateName":"selfnoderemediationtemplate-sample"}],"conditionsHealthyTimestamp":"2026-02-02T07:38:00Z","healthyDelayed":true},{"name":"worker-0","remediations":[{"resource":{"kind":"SelfNodeRemediation","namespace":"openshift-workload-availability","name":"worker-0-gc4nq","uid":"cdc4efd5-cea0-46cc-a6dd-d8f0d2a1bd39","apiVersion":"self-node-remediation.medik8s.io/v1alpha1"},"started":"2026-02-02T07:36:27Z","templateName":"selfnoderemediationtemplate-sample"}],"conditionsHealthyTimestamp":"2026-02-02T07:39:48Z","healthyDelayed":true}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2026-02-02T07:03:56Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."},{"type":"StormCooldownActive","status":"False","lastTransitionTime":"2026-02-02T07:34:52Z","reason":"HealthyNodeThresholdChange","message":"Storm cooldown completed"},{"type":"StormActive","status":"False","lastTransitionTime":"2026-02-02T07:34:52Z","reason":"HealthyNodeThresholdChange","message":"Storm mode is deactivated, remediation can occur normally"}],"phase":"Remediating","reason":"NHC is remediating 2 nodes","lastUpdateTime":"2026-02-02T07:38:00Z"}, "patch": "{\"status\":{\"unhealthyNodes\":[{\"conditionsHealthyTimestamp\":\"2026-02-02T07:38:00Z\",\"healthyDelayed\":true,\"name\":\"worker-1\",\"remediations\":[{\"resource\":{\"apiVersion\":\"self-node-remediation.medik8s.io/v1alpha1\",\"kind\":\"SelfNodeRemediation\",\"name\":\"worker-1-bpt6g\",\"namespace\":\"openshift-workload-availability\",\"uid\":\"b11f9261-b3b8-4065-829e-2ea1f13d2b09\"},\"started\":\"2026-02-02T07:34:52Z\",\"templateName\":\"selfnoderemediationtemplate-sample\"}]},{\"conditionsHealthyTimestamp\":\"2026-02-02T07:39:48Z\",\"healthyDelayed\":true,\"name\":\"worker-0\",\"remediations\":[{\"resource\":{\"apiVersion\":\"self-node-remediation.medik8s.io/v1alpha1\",\"kind\":\"SelfNodeRemediation\",\"name\":\"worker-0-gc4nq\",\"namespace\":\"openshift-workload-availability\",\"uid\":\"cdc4efd5-cea0-46cc-a6dd-d8f0d2a1bd39\"},\"started\":\"2026-02-02T07:36:27Z\",\"templateName\":\"selfnoderemediationtemplate-sample\"}]}]}}"} 2026-02-02T07:39:49.083882186Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "6s"} 2026-02-02T07:39:49.084003065Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T07:39:49.092111413Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} 2026-02-02T07:39:49.094013023Z INFO controllers.NodeHealthCheck.resource manager delaying node getting healthy {"node name": "worker-0", "remaining time in seconds": 3.905988884} 2026-02-02T07:39:49.094034954Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-1"} 2026-02-02T07:39:49.19276835Z INFO controllers.NodeHealthCheck.resource manager delaying for node getting healthy is done, about to remove the remediation CR {"node name": "worker-1"} 2026-02-02T07:39:49.195389557Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "4.905988884s"} 2026-02-02T07:39:54.10224321Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T07:39:54.109656129Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-1"} 2026-02-02T07:39:54.111722237Z INFO controllers.NodeHealthCheck.resource manager delaying for node getting healthy is done, about to remove the remediation CR {"node name": "worker-1"} 2026-02-02T07:39:54.113823014Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} 2026-02-02T07:39:54.115485774Z INFO controllers.NodeHealthCheck.resource manager delaying for node getting healthy is done, about to remove the remediation CR {"node name": "worker-0"} 2026-02-02T07:39:54.121293814Z INFO controllers.NodeHealthCheck.resource manager deleted remediation CR {"name": "worker-0-gc4nq"} 2026-02-02T07:39:54.12134947Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","controller":false}} 2026-02-02T07:39:54.121374645Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-worker-test", "Remediation CR Name": "worker-0-gc4nq", "Remediation CR Kind": "SelfNodeRemediation"} 2026-02-02T07:39:54.121380459Z DEBUG events [remediation] Deleted remediation CR of kind SelfNodeRemediation with name worker-0-gc4nq {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"4316402"}, "reason": "RemediationRemoved"} 2026-02-02T07:39:54.121523667Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "11s"} 2026-02-02T07:39:54.121580784Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T07:39:54.210016762Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} time="2026-02-02T07:39:54Z" level=info msg="invalidating lease" time="2026-02-02T07:39:54Z" level=info msg="getting lease" 2026-02-02T07:39:54.215478164Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-1"} 2026-02-02T07:39:54.310665213Z INFO controllers.NodeHealthCheck.resource manager delaying for node getting healthy is done, about to remove the remediation CR {"node name": "worker-1"} 2026-02-02T07:39:54.313481605Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-worker-test", "new status": {"observedNodes":2,"healthyNodes":1,"unhealthyNodes":[{"name":"worker-1","remediations":[{"resource":{"kind":"SelfNodeRemediation","namespace":"openshift-workload-availability","name":"worker-1-bpt6g","uid":"b11f9261-b3b8-4065-829e-2ea1f13d2b09","apiVersion":"self-node-remediation.medik8s.io/v1alpha1"},"started":"2026-02-02T07:34:52Z","templateName":"selfnoderemediationtemplate-sample"}],"conditionsHealthyTimestamp":"2026-02-02T07:38:00Z","healthyDelayed":true}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2026-02-02T07:03:56Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."},{"type":"StormCooldownActive","status":"False","lastTransitionTime":"2026-02-02T07:34:52Z","reason":"HealthyNodeThresholdChange","message":"Storm cooldown completed"},{"type":"StormActive","status":"False","lastTransitionTime":"2026-02-02T07:34:52Z","reason":"HealthyNodeThresholdChange","message":"Storm mode is deactivated, remediation can occur normally"}],"phase":"Remediating","reason":"NHC is remediating 1 nodes","lastUpdateTime":"2026-02-02T07:39:48Z"}, "patch": "{\"status\":{\"healthyNodes\":1,\"reason\":\"NHC is remediating 1 nodes\",\"unhealthyNodes\":[{\"conditionsHealthyTimestamp\":\"2026-02-02T07:38:00Z\",\"healthyDelayed\":true,\"name\":\"worker-1\",\"remediations\":[{\"resource\":{\"apiVersion\":\"self-node-remediation.medik8s.io/v1alpha1\",\"kind\":\"SelfNodeRemediation\",\"name\":\"worker-1-bpt6g\",\"namespace\":\"openshift-workload-availability\",\"uid\":\"b11f9261-b3b8-4065-829e-2ea1f13d2b09\"},\"started\":\"2026-02-02T07:34:52Z\",\"templateName\":\"selfnoderemediationtemplate-sample\"}]}]}}"} 2026-02-02T07:39:54.519593766Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "17s"} 2026-02-02T07:40:05.122121819Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T07:40:05.129864296Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} time="2026-02-02T07:40:05Z" level=info msg="invalidating lease" time="2026-02-02T07:40:05Z" level=info msg="getting lease" 2026-02-02T07:40:05.131692778Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-1"} 2026-02-02T07:40:05.133357353Z INFO controllers.NodeHealthCheck.resource manager delaying for node getting healthy is done, about to remove the remediation CR {"node name": "worker-1"} 2026-02-02T07:40:05.135766722Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "17s"} 2026-02-02T07:40:20.454734251Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","controller":false}} 2026-02-02T07:40:20.454797598Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-worker-test", "Remediation CR Name": "worker-1-bpt6g", "Remediation CR Kind": "SelfNodeRemediation"} 2026-02-02T07:40:20.454810923Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","controller":false}} 2026-02-02T07:40:20.454815797Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-worker-test", "Remediation CR Name": "worker-1-bpt6g", "Remediation CR Kind": "SelfNodeRemediation"} 2026-02-02T07:40:20.454849497Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T07:40:20.463373525Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} time="2026-02-02T07:40:20Z" level=info msg="invalidating lease" time="2026-02-02T07:40:20Z" level=info msg="getting lease" 2026-02-02T07:40:20.473429934Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-1"} 2026-02-02T07:40:20.572674096Z INFO controllers.NodeHealthCheck.resource manager delaying for node getting healthy is done, about to remove the remediation CR {"node name": "worker-1"} 2026-02-02T07:40:20.575510357Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "17s"} 2026-02-02T07:40:20.59362254Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","controller":false}} 2026-02-02T07:40:20.593668051Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-worker-test", "Remediation CR Name": "worker-1-bpt6g", "Remediation CR Kind": "SelfNodeRemediation"} 2026-02-02T07:40:20.593683593Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","controller":false}} 2026-02-02T07:40:20.593689983Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-worker-test", "Remediation CR Name": "worker-1-bpt6g", "Remediation CR Kind": "SelfNodeRemediation"} 2026-02-02T07:40:20.593726918Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T07:40:20.617080973Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} 2026-02-02T07:40:20.617258711Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","controller":false}} 2026-02-02T07:40:20.617293674Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-worker-test", "Remediation CR Name": "worker-1-bpt6g", "Remediation CR Kind": "SelfNodeRemediation"} time="2026-02-02T07:40:20Z" level=info msg="invalidating lease" time="2026-02-02T07:40:20Z" level=info msg="getting lease" 2026-02-02T07:40:20.674181006Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-1"} time="2026-02-02T07:40:20Z" level=info msg="invalidating lease" time="2026-02-02T07:40:20Z" level=info msg="getting lease" 2026-02-02T07:40:20.777866522Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-worker-test", "new status": {"observedNodes":2,"healthyNodes":2,"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2026-02-02T07:03:56Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."},{"type":"StormCooldownActive","status":"False","lastTransitionTime":"2026-02-02T07:34:52Z","reason":"HealthyNodeThresholdChange","message":"Storm cooldown completed"},{"type":"StormActive","status":"False","lastTransitionTime":"2026-02-02T07:34:52Z","reason":"HealthyNodeThresholdChange","message":"Storm mode is deactivated, remediation can occur normally"}],"phase":"Enabled","reason":"NHC is enabled, no ongoing remediation","lastUpdateTime":"2026-02-02T07:39:54Z"}, "patch": "{\"status\":{\"healthyNodes\":2,\"phase\":\"Enabled\",\"reason\":\"NHC is enabled, no ongoing remediation\",\"unhealthyNodes\":null}}"} 2026-02-02T07:40:20.985722862Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "0s"} 2026-02-02T07:40:20.985839901Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T07:40:20.997010524Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} time="2026-02-02T07:40:21Z" level=info msg="invalidating lease" time="2026-02-02T07:40:21Z" level=info msg="getting lease" 2026-02-02T07:40:21.098356884Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-1"} time="2026-02-02T07:40:21Z" level=info msg="invalidating lease" time="2026-02-02T07:40:21Z" level=info msg="getting lease" 2026-02-02T07:40:21.102150447Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "0s"} 2026-02-02T07:40:22.136218309Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T07:40:22.148395729Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} time="2026-02-02T07:40:22Z" level=info msg="invalidating lease" time="2026-02-02T07:40:22Z" level=info msg="getting lease" 2026-02-02T07:40:22.150768999Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-1"} time="2026-02-02T07:40:22Z" level=info msg="invalidating lease" time="2026-02-02T07:40:22Z" level=info msg="getting lease" 2026-02-02T07:40:22.24877112Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "0s"} When if healthyNodes < maxUnhealthy : 2026-02-02T07:27:03.805782541Z INFO setup Go Version: go1.25.3 (Red Hat 1.25.3-1.el9_7) X:strictfipsruntime 2026-02-02T07:27:03.80594231Z INFO setup Go OS/Arch: linux/amd64 2026-02-02T07:27:03.805946578Z INFO setup Operator Version: acf7a94 2026-02-02T07:27:03.805950085Z INFO setup Git Commit: acf7a94540e8f527122bbd1dc102a46ca985cb82 2026-02-02T07:27:03.80595311Z INFO setup Build Date: 2026-01-28T09:36:53+00:00 2026-02-02T07:27:03.805956032Z INFO setup HTTP/2 for metrics and webhook server disabled 2026-02-02T07:27:03.827252389Z INFO setup supported control plane topology {"topology": "HighlyAvailable"} 2026-02-02T07:27:03.827307674Z INFO setup Cluster capabilities {"IsOnOpenshift": true, "HasMachineAPI": true} 2026-02-02T07:27:03.827635678Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "remediation.medik8s.io/v1alpha1, Kind=NodeHealthCheck", "path": "/validate-remediation-medik8s-io-v1alpha1-nodehealthcheck"} 2026-02-02T07:27:03.827725433Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-remediation-medik8s-io-v1alpha1-nodehealthcheck"} 2026-02-02T07:27:03.827778123Z INFO setup starting manager 2026-02-02T07:27:03.827863001Z INFO controller-runtime.metrics Starting metrics server 2026-02-02T07:27:03.827958662Z INFO controller-runtime.metrics Serving metrics server {"bindAddress": "127.0.0.1:8080", "secure": false} 2026-02-02T07:27:03.827992709Z INFO starting server {"name": "health probe", "addr": "[::]:8081"} 2026-02-02T07:27:03.827992816Z INFO controller-runtime.webhook Starting webhook server 2026-02-02T07:27:03.828414533Z INFO controller-runtime.certwatcher Updated current TLS certificate 2026-02-02T07:27:03.828787116Z INFO controller-runtime.certwatcher Starting certificate poll+watcher {"interval": "10s"} 2026-02-02T07:27:03.829243894Z INFO controller-runtime.webhook Serving webhook server {"host": "", "port": 9443} I0202 07:27:03.929035 1 leaderelection.go:257] attempting to acquire leader lease openshift-workload-availability/e1f13584.medik8s.io... 2026-02-02T07:33:51.561629414Z INFO nodehealthcheck-resource validate update {"name": "nhc-worker-test"} I0202 07:57:07.848743 1 leaderelection.go:271] successfully acquired lease openshift-workload-availability/e1f13584.medik8s.io 2026-02-02T07:57:07.848787746Z DEBUG events node-healthcheck-controller-manager-cc94d66c-q5wd4_7d249b1e-9838-4d00-8e48-a67aa900631f became leader {"type": "Normal", "object": {"kind":"Lease","namespace":"openshift-workload-availability","name":"e1f13584.medik8s.io","uid":"2745936a-9873-4d3e-ac88-9210082439c7","apiVersion":"coordination.k8s.io/v1","resourceVersion":"4326682"}, "reason": "LeaderElection"} 2026-02-02T07:57:07.84900242Z INFO Starting EventSource {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "source": "channel source: 0xc000982000"} 2026-02-02T07:57:07.849027212Z INFO Starting EventSource {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "source": "kind source: *v1.Node"} 2026-02-02T07:57:07.849059076Z INFO Starting EventSource {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "source": "kind source: *v1alpha1.NodeHealthCheck"} 2026-02-02T07:57:07.849148315Z INFO Starting EventSource {"controller": "machinehealthcheck", "controllerGroup": "machine.openshift.io", "controllerKind": "MachineHealthCheck", "source": "kind source: *v1beta1.MachineHealthCheck"} 2026-02-02T07:57:07.849168483Z INFO Starting EventSource {"controller": "machinehealthcheck", "controllerGroup": "machine.openshift.io", "controllerKind": "MachineHealthCheck", "source": "kind source: *v1.Node"} 2026-02-02T07:57:07.849174707Z INFO Starting EventSource {"controller": "machinehealthcheck", "controllerGroup": "machine.openshift.io", "controllerKind": "MachineHealthCheck", "source": "kind source: *v1beta1.Machine"} 2026-02-02T07:57:07.849156627Z INFO Starting EventSource {"controller": "machinehealthcheck", "controllerGroup": "machine.openshift.io", "controllerKind": "MachineHealthCheck", "source": "channel source: 0xc000982230"} I0202 07:57:07.849901 1 shared_informer.go:350] "Waiting for caches to sync" controller="feature gate accessor" 2026-02-02T07:57:07.950056079Z INFO MHCChecker found termination handler MHC, will ignore Nodes with Terminating condition 2026-02-02T07:57:07.950094117Z INFO MHCChecker MHC Checker status changed, notifying NHC controller I0202 07:57:07.950125 1 shared_informer.go:357] "Caches are synced" controller="feature gate accessor" I0202 07:57:07.950145 1 simple_featuregate_reader.go:171] Starting feature-gate-detector 2026-02-02T07:57:07.950155758Z INFO adding all NHCs to reconcile queue for handling MHC event 2026-02-02T07:57:07.950144862Z INFO Starting Controller {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck"} 2026-02-02T07:57:07.950173322Z INFO Starting workers {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "worker count": 1} 2026-02-02T07:57:07.95026166Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T07:57:07.950322981Z INFO FeatureGateAccessor FeatureGates initialized I0202 07:57:07.950530 1 recorder_logging.go:49] &Event{ObjectMeta:{dummy.18905eed8eab29fe.ec1e899a dummy 0 0001-01-01 00:00:00 +0000 UTC map[] map[] [] [] []},InvolvedObject:ObjectReference{Kind:Pod,Namespace:dummy,Name:dummy,UID:,APIVersion:v1,ResourceVersion:,FieldPath:,},Reason:FeatureGatesInitialized,Message:FeatureGates updated to featuregates.Features{Enabled:[]v1.FeatureGateName{"AdditionalRoutingCapabilities", "AdminNetworkPolicy", "AlibabaPlatform", "AzureWorkloadIdentity", "BuildCSIVolumes", "CPMSMachineNamePrefix", "ConsolePluginContentSecurityPolicy", "ExternalOIDC", "ExternalOIDCWithUIDAndExtraClaimMappings", "GCPClusterHostedDNSInstall", "GatewayAPI", "GatewayAPIController", "HighlyAvailableArbiter", "ImageStreamImportMode", "ImageVolume", "KMSv1", "MachineConfigNodes", "ManagedBootImages", "ManagedBootImagesAWS", "ManagedBootImagesAzure", "ManagedBootImagesvSphere", "MetricsCollectionProfiles", "NetworkDiagnosticsConfig", "NetworkLiveMigration", "NetworkSegmentation", "NewOLM", "NewOLMWebhookProviderOpenshiftServiceCA", "PinnedImages", "PreconfiguredUDNAddresses", "ProcMountType", "RouteAdvertisements", "RouteExternalCertificate", "ServiceAccountTokenNodeBinding", "SigstoreImageVerification", "SigstoreImageVerificationPKI", "StoragePerformantSecurityPolicy", "UpgradeStatus", "UserNamespacesPodSecurityStandards", "UserNamespacesSupport", "VSphereMultiDisk", "VSphereMultiNetworks", "VolumeAttributesClass"}, Disabled:[]v1.FeatureGateName{"AWSClusterHostedDNS", "AWSClusterHostedDNSInstall", "AWSDedicatedHosts", "AWSDualStackInstall", "AWSServiceLBNetworkSecurityGroup", "AutomatedEtcdBackup", "AzureClusterHostedDNSInstall", "AzureDedicatedHosts", "AzureDualStackInstall", "AzureMultiDisk", "BootImageSkewEnforcement", "BootcNodeManagement", "CBORServingAndStorage", "CRDCompatibilityRequirementOperator", "ClientsAllowCBOR", "ClientsPreferCBOR", "ClusterAPIInstall", "ClusterAPIInstallIBMCloud", "ClusterAPIMachineManagement", "ClusterAPIMachineManagementVSphere", "ClusterMonitoringConfig", "ClusterVersionOperatorConfiguration", "DNSNameResolver", "DualReplica", "DyanmicServiceEndpointIBMCloud", "EtcdBackendQuota", "EventTTL", "EventedPLEG", "Example", "Example2", "ExternalSnapshotMetadata", "GCPClusterHostedDNS", "GCPCustomAPIEndpoints", "GCPCustomAPIEndpointsInstall", "GCPDualStackInstall", "HyperShiftOnlyDynamicResourceAllocation", "ImageModeStatusReporting", "IngressControllerDynamicConfigurationManager", "InsightsConfig", "InsightsOnDemandDataGather", "IrreconcilableMachineConfig", "KMSEncryptionProvider", "MachineAPIMigration", "MachineAPIOperatorDisableMachineHealthCheckController", "ManagedBootImagesCPMS", "MaxUnavailableStatefulSet", "MinimumKubeletVersion", "MixedCPUsAllocation", "MultiArchInstallAzure", "MultiDiskSetup", "MutableCSINodeAllocatableCount", "MutatingAdmissionPolicy", "NewOLMBoxCutterRuntime", "NewOLMCatalogdAPIV1Metas", "NewOLMOwnSingleNamespace", "NewOLMPreflightPermissionChecks", "NoRegistryClusterInstall", "NutanixMultiSubnets", "OSStreams", "OVNObservability", "OnPremDNSRecords", "OpenShiftPodSecurityAdmission", "ProvisioningRequestAvailable", "SELinuxMount", "ShortCertRotation", "SignatureStores", "TranslateStreamCloseWebsocketRequests", "VSphereConfigurableMaxAllowedBlockVolumesPerNode", "VSphereHostVMGroupZonal", "VSphereMixedNodeEnv", "VolumeGroupSnapshot"}},Source:EventSource{Component:,Host:,},FirstTimestamp:2026-02-02 07:57:07.950447102 +0000 UTC m=+1804.179245881,LastTimestamp:2026-02-02 07:57:07.950447102 +0000 UTC m=+1804.179245881,Count:1,Type:Normal,EventTime:0001-01-01 00:00:00 +0000 UTC,Series:nil,Action:,Related:nil,ReportingController:,ReportingInstance:,} 2026-02-02T07:57:07.955734324Z INFO Starting EventSource {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "source": "kind source: *unstructured.Unstructured"} 2026-02-02T07:57:07.955785209Z INFO controllers.NodeHealthCheck.WatchManager added watch for remediation template CRs {"kind": "SelfNodeRemediationTemplate"} 2026-02-02T07:57:07.955795045Z INFO Starting EventSource {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "source": "kind source: *unstructured.Unstructured"} 2026-02-02T07:57:07.955804241Z INFO controllers.NodeHealthCheck.WatchManager added watch for remediation CRs {"kind": "SelfNodeRemediation"} 2026-02-02T07:57:07.960748601Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} time="2026-02-02T07:57:07Z" level=info msg="invalidating lease" time="2026-02-02T07:57:07Z" level=info msg="getting lease" 2026-02-02T07:57:08.050890253Z INFO Starting Controller {"controller": "machinehealthcheck", "controllerGroup": "machine.openshift.io", "controllerKind": "MachineHealthCheck"} 2026-02-02T07:57:08.050922806Z INFO Starting workers {"controller": "machinehealthcheck", "controllerGroup": "machine.openshift.io", "controllerKind": "MachineHealthCheck", "worker count": 1} 2026-02-02T07:57:08.062626479Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-1"} 2026-02-02T07:57:08.064137635Z ERROR controllers.NodeHealthCheck.resource manager failed to get remediation CRs for healthy node {"node": "worker-1", "error": "failed to get all remediation objects with kind self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediation and apiVersion self-node-remediation.medik8s.io/v1alpha1: selfnoderemediations.self-node-remediation.medik8s.io is forbidden: User \"system:serviceaccount:openshift-workload-availability:node-healthcheck-controller-manager\" cannot list resource \"selfnoderemediations\" in API group \"self-node-remediation.medik8s.io\" at the cluster scope", "errorVerbose": "selfnoderemediations.self-node-remediation.medik8s.io is forbidden: User \"system:serviceaccount:openshift-workload-availability:node-healthcheck-controller-manager\" cannot list resource \"selfnoderemediations\" in API group \"self-node-remediation.medik8s.io\" at the cluster scope\nfailed to get all remediation objects with kind self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediation and apiVersion self-node-remediation.medik8s.io/v1alpha1\ngithub.com/medik8s/node-healthcheck-operator/controllers/resources.(*manager).ListRemediationCRs\n\t/app/node-healthcheck-operator/controllers/resources/manager.go:277\ngithub.com/medik8s/node-healthcheck-operator/controllers/resources.(*manager).HandleHealthyNode\n\t/app/node-healthcheck-operator/controllers/resources/manager.go:322\ngithub.com/medik8s/node-healthcheck-operator/controllers.(*NodeHealthCheckReconciler).Reconcile\n\t/app/node-healthcheck-operator/controllers/nodehealthcheck_controller.go:279\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Reconcile\n\t/app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:119\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler\n\t/app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:340\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem\n\t/app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:300\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.1\n\t/app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:202\nruntime.goexit\n\t/usr/lib/golang/src/runtime/asm_amd64.s:1693"} github.com/medik8s/node-healthcheck-operator/controllers/resources.(*manager).HandleHealthyNode /app/node-healthcheck-operator/controllers/resources/manager.go:326 github.com/medik8s/node-healthcheck-operator/controllers.(*NodeHealthCheckReconciler).Reconcile /app/node-healthcheck-operator/controllers/nodehealthcheck_controller.go:279 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Reconcile /app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:119 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler /app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:340 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem /app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:300 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.1 /app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:202 2026-02-02T07:57:08.064235582Z ERROR controllers.NodeHealthCheck failed to handle healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-1", "error": "failed to get all remediation objects with kind self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediation and apiVersion self-node-remediation.medik8s.io/v1alpha1: selfnoderemediations.self-node-remediation.medik8s.io is forbidden: User \"system:serviceaccount:openshift-workload-availability:node-healthcheck-controller-manager\" cannot list resource \"selfnoderemediations\" in API group \"self-node-remediation.medik8s.io\" at the cluster scope", "errorVerbose": "selfnoderemediations.self-node-remediation.medik8s.io is forbidden: User \"system:serviceaccount:openshift-workload-availability:node-healthcheck-controller-manager\" cannot list resource \"selfnoderemediations\" in API group \"self-node-remediation.medik8s.io\" at the cluster scope\nfailed to get all remediation objects with kind self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediation and apiVersion self-node-remediation.medik8s.io/v1alpha1\ngithub.com/medik8s/node-healthcheck-operator/controllers/resources.(*manager).ListRemediationCRs\n\t/app/node-healthcheck-operator/controllers/resources/manager.go:277\ngithub.com/medik8s/node-healthcheck-operator/controllers/resources.(*manager).HandleHealthyNode\n\t/app/node-healthcheck-operator/controllers/resources/manager.go:322\ngithub.com/medik8s/node-healthcheck-operator/controllers.(*NodeHealthCheckReconciler).Reconcile\n\t/app/node-healthcheck-operator/controllers/nodehealthcheck_controller.go:279\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Reconcile\n\t/app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:119\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler\n\t/app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:340\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem\n\t/app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:300\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.1\n\t/app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:202\nruntime.goexit\n\t/usr/lib/golang/src/runtime/asm_amd64.s:1693"} github.com/medik8s/node-healthcheck-operator/controllers.(*NodeHealthCheckReconciler).Reconcile /app/node-healthcheck-operator/controllers/nodehealthcheck_controller.go:281 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Reconcile /app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:119 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler /app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:340 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem /app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:300 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.1 /app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:202 2026-02-02T07:57:08.065749282Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-worker-test", "new status": {"observedNodes":0,"healthyNodes":0,"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2026-02-02T07:03:56Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."},{"type":"StormCooldownActive","status":"False","lastTransitionTime":"2026-02-02T07:34:52Z","reason":"HealthyNodeThresholdChange","message":"Storm cooldown completed"},{"type":"StormActive","status":"False","lastTransitionTime":"2026-02-02T07:34:52Z","reason":"HealthyNodeThresholdChange","message":"Storm mode is deactivated, remediation can occur normally"}],"phase":"Enabled","reason":"NHC is enabled, no ongoing remediation","lastUpdateTime":"2026-02-02T07:40:20Z"}, "patch": "{\"status\":{\"healthyNodes\":0,\"observedNodes\":0}}"} 2026-02-02T07:57:08.264977195Z INFO console-plugin successfully created / updated console plugin resources 2026-02-02T07:57:08.273059626Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": "failed to get all remediation objects with kind self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediation and apiVersion self-node-remediation.medik8s.io/v1alpha1: selfnoderemediations.self-node-remediation.medik8s.io is forbidden: User \"system:serviceaccount:openshift-workload-availability:node-healthcheck-controller-manager\" cannot list resource \"selfnoderemediations\" in API group \"self-node-remediation.medik8s.io\" at the cluster scope", "errorCauses": [{"error": "failed to get all remediation objects with kind self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediation and apiVersion self-node-remediation.medik8s.io/v1alpha1: selfnoderemediations.self-node-remediation.medik8s.io is forbidden: User \"system:serviceaccount:openshift-workload-availability:node-healthcheck-controller-manager\" cannot list resource \"selfnoderemediations\" in API group \"self-node-remediation.medik8s.io\" at the cluster scope", "errorVerbose": "selfnoderemediations.self-node-remediation.medik8s.io is forbidden: User \"system:serviceaccount:openshift-workload-availability:node-healthcheck-controller-manager\" cannot list resource \"selfnoderemediations\" in API group \"self-node-remediation.medik8s.io\" at the cluster scope\nfailed to get all remediation objects with kind self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediation and apiVersion self-node-remediation.medik8s.io/v1alpha1\ngithub.com/medik8s/node-healthcheck-operator/controllers/resources.(*manager).ListRemediationCRs\n\t/app/node-healthcheck-operator/controllers/resources/manager.go:277\ngithub.com/medik8s/node-healthcheck-operator/controllers/resources.(*manager).HandleHealthyNode\n\t/app/node-healthcheck-operator/controllers/resources/manager.go:322\ngithub.com/medik8s/node-healthcheck-operator/controllers.(*NodeHealthCheckReconciler).Reconcile\n\t/app/node-healthcheck-operator/controllers/nodehealthcheck_controller.go:279\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Reconcile\n\t/app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:119\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler\n\t/app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:340\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem\n\t/app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:300\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.1\n\t/app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:202\nruntime.goexit\n\t/usr/lib/golang/src/runtime/asm_amd64.s:1693"}], "requeue": false, "requeuAfter": "0s"} 2026-02-02T07:57:08.273162157Z ERROR Reconciler error {"controller": "nodehealthcheck", "controllerGroup": "remediation.medik8s.io", "controllerKind": "NodeHealthCheck", "NodeHealthCheck": {"name":"nhc-worker-test"}, "namespace": "", "name": "nhc-worker-test", "reconcileID": "2a9c2ba2-9a73-4f99-981b-e8d1ada45202", "error": "failed to get all remediation objects with kind self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediation and apiVersion self-node-remediation.medik8s.io/v1alpha1: selfnoderemediations.self-node-remediation.medik8s.io is forbidden: User \"system:serviceaccount:openshift-workload-availability:node-healthcheck-controller-manager\" cannot list resource \"selfnoderemediations\" in API group \"self-node-remediation.medik8s.io\" at the cluster scope", "errorCauses": [{"error": "failed to get all remediation objects with kind self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediation and apiVersion self-node-remediation.medik8s.io/v1alpha1: selfnoderemediations.self-node-remediation.medik8s.io is forbidden: User \"system:serviceaccount:openshift-workload-availability:node-healthcheck-controller-manager\" cannot list resource \"selfnoderemediations\" in API group \"self-node-remediation.medik8s.io\" at the cluster scope", "errorVerbose": "selfnoderemediations.self-node-remediation.medik8s.io is forbidden: User \"system:serviceaccount:openshift-workload-availability:node-healthcheck-controller-manager\" cannot list resource \"selfnoderemediations\" in API group \"self-node-remediation.medik8s.io\" at the cluster scope\nfailed to get all remediation objects with kind self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediation and apiVersion self-node-remediation.medik8s.io/v1alpha1\ngithub.com/medik8s/node-healthcheck-operator/controllers/resources.(*manager).ListRemediationCRs\n\t/app/node-healthcheck-operator/controllers/resources/manager.go:277\ngithub.com/medik8s/node-healthcheck-operator/controllers/resources.(*manager).HandleHealthyNode\n\t/app/node-healthcheck-operator/controllers/resources/manager.go:322\ngithub.com/medik8s/node-healthcheck-operator/controllers.(*NodeHealthCheckReconciler).Reconcile\n\t/app/node-healthcheck-operator/controllers/nodehealthcheck_controller.go:279\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Reconcile\n\t/app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:119\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler\n\t/app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:340\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem\n\t/app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:300\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.1\n\t/app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:202\nruntime.goexit\n\t/usr/lib/golang/src/runtime/asm_amd64.s:1693"}]} sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler /app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:353 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem /app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:300 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.1 /app/node-healthcheck-operator/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:202 2026-02-02T07:57:08.278612753Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T07:57:08.286889908Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} time="2026-02-02T07:57:08Z" level=info msg="invalidating lease" time="2026-02-02T07:57:08Z" level=info msg="getting lease" 2026-02-02T07:57:08.289052333Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-1"} time="2026-02-02T07:57:08Z" level=info msg="invalidating lease" time="2026-02-02T07:57:08Z" level=info msg="getting lease" 2026-02-02T07:57:08.290885156Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-worker-test", "new status": {"observedNodes":2,"healthyNodes":2,"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2026-02-02T07:03:56Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."},{"type":"StormCooldownActive","status":"False","lastTransitionTime":"2026-02-02T07:34:52Z","reason":"HealthyNodeThresholdChange","message":"Storm cooldown completed"},{"type":"StormActive","status":"False","lastTransitionTime":"2026-02-02T07:34:52Z","reason":"HealthyNodeThresholdChange","message":"Storm mode is deactivated, remediation can occur normally"}],"phase":"Enabled","reason":"NHC is enabled, no ongoing remediation","lastUpdateTime":"2026-02-02T07:57:08Z"}, "patch": "{\"status\":{\"healthyNodes\":2,\"observedNodes\":2}}"} 2026-02-02T07:57:08.497735938Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "0s"} 2026-02-02T07:59:37.087344817Z INFO adding NHC to reconcile queue for handling node {"node": "worker-1", "NHC": "nhc-worker-test"} 2026-02-02T07:59:37.087448985Z INFO adding NHC to reconcile queue for handling node {"node": "worker-1", "NHC": "nhc-worker-test"} 2026-02-02T07:59:37.087517999Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T07:59:37.094363471Z INFO controllers.NodeHealthCheck Node is going to match unhealthy condition {"node": "worker-1", "condition type": "Ready", "condition status": "Unknown", "duration left": "29.905639394s"} 2026-02-02T07:59:37.104370902Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} time="2026-02-02T07:59:37Z" level=info msg="invalidating lease" time="2026-02-02T07:59:37Z" level=info msg="getting lease" 2026-02-02T07:59:37.205504728Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-worker-test", "new status": {"observedNodes":2,"healthyNodes":1,"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2026-02-02T07:03:56Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."},{"type":"StormCooldownActive","status":"False","lastTransitionTime":"2026-02-02T07:34:52Z","reason":"HealthyNodeThresholdChange","message":"Storm cooldown completed"},{"type":"StormActive","status":"False","lastTransitionTime":"2026-02-02T07:34:52Z","reason":"HealthyNodeThresholdChange","message":"Storm mode is deactivated, remediation can occur normally"}],"phase":"Enabled","reason":"NHC is enabled, no ongoing remediation","lastUpdateTime":"2026-02-02T07:57:08Z"}, "patch": "{\"status\":{\"healthyNodes\":1}}"} 2026-02-02T07:59:37.416025024Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "30.905639394s"} 2026-02-02T08:00:08.322347365Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T08:00:08.326522658Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-1", "condition type": "Ready", "condition status": "Unknown"} 2026-02-02T08:00:08.326707207Z DEBUG events [remediation] Node matches unhealthy condition. Node "worker-1", condition type "Ready", condition status "Unknown" {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"4327909"}, "reason": "DetectedUnhealthy"} 2026-02-02T08:00:08.331161709Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} time="2026-02-02T08:00:08Z" level=info msg="invalidating lease" time="2026-02-02T08:00:08Z" level=info msg="getting lease" 2026-02-02T08:00:08.332719687Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-1"} 2026-02-02T08:00:08.335838123Z INFO controllers.NodeHealthCheck.resource manager Attempting to obtain Node Lease {"Node name": "worker-1"} time="2026-02-02T08:00:08Z" level=info msg="request lease" time="2026-02-02T08:00:08Z" level=info msg="getting lease" time="2026-02-02T08:00:08Z" level=info msg="create lease" 2026-02-02T08:00:08.33878751Z INFO controllers.NodeHealthCheck.resource manager Creating a remediation CR {"CR name": "", "CR kind": "SelfNodeRemediation", "namespace": "openshift-workload-availability"} 2026-02-02T08:00:08.346408636Z DEBUG events [remediation] Created remediation object for node worker-1 {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"4327909"}, "reason": "RemediationCreated"} 2026-02-02T08:00:08.349361382Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-worker-test", "new status": {"observedNodes":2,"healthyNodes":1,"unhealthyNodes":[{"name":"worker-1","remediations":[{"resource":{"kind":"SelfNodeRemediation","namespace":"openshift-workload-availability","name":"worker-1-x5n4b","uid":"b3aeb70e-8bb6-46c0-8739-bae0d84f773b","apiVersion":"self-node-remediation.medik8s.io/v1alpha1"},"started":"2026-02-02T08:00:08Z","templateName":"selfnoderemediationtemplate-sample"}]}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2026-02-02T07:03:56Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."},{"type":"StormCooldownActive","status":"False","lastTransitionTime":"2026-02-02T07:34:52Z","reason":"HealthyNodeThresholdChange","message":"Storm cooldown completed"},{"type":"StormActive","status":"False","lastTransitionTime":"2026-02-02T07:34:52Z","reason":"HealthyNodeThresholdChange","message":"Storm mode is deactivated, remediation can occur normally"}],"phase":"Remediating","reason":"NHC is remediating 1 nodes","lastUpdateTime":"2026-02-02T07:59:37Z"}, "patch": "{\"status\":{\"phase\":\"Remediating\",\"reason\":\"NHC is remediating 1 nodes\",\"unhealthyNodes\":[{\"name\":\"worker-1\",\"remediations\":[{\"resource\":{\"apiVersion\":\"self-node-remediation.medik8s.io/v1alpha1\",\"kind\":\"SelfNodeRemediation\",\"name\":\"worker-1-x5n4b\",\"namespace\":\"openshift-workload-availability\",\"uid\":\"b3aeb70e-8bb6-46c0-8739-bae0d84f773b\"},\"started\":\"2026-02-02T08:00:08Z\",\"templateName\":\"selfnoderemediationtemplate-sample\"}]}]}}"} 2026-02-02T08:00:08.356585769Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","controller":false}} 2026-02-02T08:00:08.356645987Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-worker-test", "Remediation CR Name": "worker-1-x5n4b", "Remediation CR Kind": "SelfNodeRemediation"} 2026-02-02T08:00:08.356660961Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","controller":false}} 2026-02-02T08:00:08.356667079Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-worker-test", "Remediation CR Name": "worker-1-x5n4b", "Remediation CR Kind": "SelfNodeRemediation"} 2026-02-02T08:00:08.361789448Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","controller":false}} 2026-02-02T08:00:08.36182383Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-worker-test", "Remediation CR Name": "worker-1-x5n4b", "Remediation CR Kind": "SelfNodeRemediation"} 2026-02-02T08:00:08.361835669Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","controller":false}} 2026-02-02T08:00:08.361839447Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-worker-test", "Remediation CR Name": "worker-1-x5n4b", "Remediation CR Kind": "SelfNodeRemediation"} 2026-02-02T08:00:08.379294117Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","controller":false}} 2026-02-02T08:00:08.379343181Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-worker-test", "Remediation CR Name": "worker-1-x5n4b", "Remediation CR Kind": "SelfNodeRemediation"} 2026-02-02T08:00:08.379358513Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","controller":false}} 2026-02-02T08:00:08.379364835Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-worker-test", "Remediation CR Name": "worker-1-x5n4b", "Remediation CR Kind": "SelfNodeRemediation"} 2026-02-02T08:00:08.557257141Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "10m0s"} 2026-02-02T08:00:08.55734759Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T08:00:08.561996462Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-1", "condition type": "Ready", "condition status": "Unknown"} 2026-02-02T08:00:08.566806712Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-worker-test", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2026-02-02T08:00:13Z"} 2026-02-02T08:00:08.566847185Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} time="2026-02-02T08:00:08Z" level=info msg="invalidating lease" time="2026-02-02T08:00:08Z" level=info msg="getting lease" 2026-02-02T08:00:08.569091194Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-1"} 2026-02-02T08:00:08.573013989Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-1-x5n4b", "kind": "SelfNodeRemediation", "namespace": "openshift-workload-availability"} time="2026-02-02T08:00:08Z" level=info msg="getting lease" 2026-02-02T08:00:08.573067059Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-worker-test", "lease name": "node-worker-1", "NHC is lease owner": true, "lease expiration time": "10m0s"} 2026-02-02T08:00:08.575457388Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "10m0s"} 2026-02-02T08:02:09.101456185Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","controller":false}} 2026-02-02T08:02:09.101521517Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-worker-test", "Remediation CR Name": "worker-1-x5n4b", "Remediation CR Kind": "SelfNodeRemediation"} 2026-02-02T08:02:09.101544044Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","controller":false}} 2026-02-02T08:02:09.101548198Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-worker-test", "Remediation CR Name": "worker-1-x5n4b", "Remediation CR Kind": "SelfNodeRemediation"} 2026-02-02T08:02:09.101589696Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T08:02:09.106379347Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-1", "condition type": "Ready", "condition status": "Unknown"} 2026-02-02T08:02:09.112646079Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-worker-test", "type": "Succeeded", "status": "Unknown", "reason": "", "message": "", "lastTransition": "2026-02-02T08:00:13Z"} 2026-02-02T08:02:09.112780965Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} time="2026-02-02T08:02:09Z" level=info msg="invalidating lease" time="2026-02-02T08:02:09Z" level=info msg="getting lease" 2026-02-02T08:02:09.118671538Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-1"} 2026-02-02T08:02:09.215051521Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-1-x5n4b", "kind": "SelfNodeRemediation", "namespace": "openshift-workload-availability"} time="2026-02-02T08:02:09Z" level=info msg="getting lease" 2026-02-02T08:02:09.215173334Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-worker-test", "lease name": "node-worker-1", "NHC is lease owner": true, "lease expiration time": "10m0s"} time="2026-02-02T08:02:09Z" level=info msg="request lease" time="2026-02-02T08:02:09Z" level=info msg="getting lease" time="2026-02-02T08:02:09Z" level=info msg="renew lease owned by NodeHealthCheck-nhc-worker-test setAcquireTime=false" 2026-02-02T08:02:09.249476622Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","controller":false}} 2026-02-02T08:02:09.249522641Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-worker-test", "Remediation CR Name": "worker-1-x5n4b", "Remediation CR Kind": "SelfNodeRemediation"} 2026-02-02T08:02:09.249536972Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","controller":false}} 2026-02-02T08:02:09.249543395Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-worker-test", "Remediation CR Name": "worker-1-x5n4b", "Remediation CR Kind": "SelfNodeRemediation"} 2026-02-02T08:02:09.250990535Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "10m0s"} 2026-02-02T08:02:09.251078302Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T08:02:09.274293932Z INFO controllers.NodeHealthCheck Node matches unhealthy condition {"node": "worker-1", "condition type": "Ready", "condition status": "Unknown"} 2026-02-02T08:02:09.313593269Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-worker-test", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2026-02-02T08:02:14Z"} 2026-02-02T08:02:09.313632651Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} time="2026-02-02T08:02:09Z" level=info msg="invalidating lease" time="2026-02-02T08:02:09Z" level=info msg="getting lease" 2026-02-02T08:02:09.413307525Z INFO controllers.NodeHealthCheck handling unhealthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-1"} 2026-02-02T08:02:09.418217671Z INFO controllers.NodeHealthCheck.resource manager external remediation CR already exists {"CR name": "worker-1-x5n4b", "kind": "SelfNodeRemediation", "namespace": "openshift-workload-availability"} time="2026-02-02T08:02:09Z" level=info msg="getting lease" 2026-02-02T08:02:09.418336394Z INFO controllers.NodeHealthCheck.nhc lease manager managing lease - about to try to acquire/extended the lease {"NodeHealthCheck name": "nhc-worker-test", "lease name": "node-worker-1", "NHC is lease owner": true, "lease expiration time": "10m0s"} 2026-02-02T08:02:09.420599237Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "10m0s"} 2026-02-02T08:03:50.478628553Z INFO adding NHC to reconcile queue for handling node {"node": "worker-1", "NHC": "nhc-worker-test"} 2026-02-02T08:03:50.478727288Z INFO adding NHC to reconcile queue for handling node {"node": "worker-1", "NHC": "nhc-worker-test"} 2026-02-02T08:03:50.479215522Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T08:03:50.488015145Z INFO controllers.NodeHealthCheck Node is going to match unhealthy condition {"node": "worker-1", "condition type": "Ready", "condition status": "False", "duration left": "35.511987773s"} 2026-02-02T08:03:50.496273219Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-worker-test", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2026-02-02T08:02:14Z"} 2026-02-02T08:03:50.496334008Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} time="2026-02-02T08:03:50Z" level=info msg="invalidating lease" time="2026-02-02T08:03:50Z" level=info msg="getting lease" 2026-02-02T08:03:50.499792247Z INFO controllers.NodeHealthCheck Ignoring node, because it was unhealthy, and is likely to be unhealthy again. {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-1"} 2026-02-02T08:03:50.500130615Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "36.511987773s"} 2026-02-02T08:03:51.798154682Z INFO adding NHC to reconcile queue for handling node {"node": "worker-1", "NHC": "nhc-worker-test"} 2026-02-02T08:03:51.798230056Z INFO adding NHC to reconcile queue for handling node {"node": "worker-1", "NHC": "nhc-worker-test"} 2026-02-02T08:03:51.798286079Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T08:03:51.813441221Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-worker-test", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2026-02-02T08:02:14Z"} 2026-02-02T08:03:51.81347977Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} time="2026-02-02T08:03:51Z" level=info msg="invalidating lease" time="2026-02-02T08:03:51Z" level=info msg="getting lease" 2026-02-02T08:03:51.91375515Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-1"} 2026-02-02T08:03:51.916805929Z INFO controllers.NodeHealthCheck.resource manager setting a delay for node getting healthy {"node name": "worker-1", "delay in seconds": 5} 2026-02-02T08:03:51.928356269Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","controller":false}} 2026-02-02T08:03:51.928432445Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-worker-test", "Remediation CR Name": "worker-1-x5n4b", "Remediation CR Kind": "SelfNodeRemediation"} 2026-02-02T08:03:51.928447358Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","controller":false}} 2026-02-02T08:03:51.928453379Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-worker-test", "Remediation CR Name": "worker-1-x5n4b", "Remediation CR Kind": "SelfNodeRemediation"} 2026-02-02T08:03:51.928762611Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-worker-test", "new status": {"observedNodes":2,"healthyNodes":1,"unhealthyNodes":[{"name":"worker-1","remediations":[{"resource":{"kind":"SelfNodeRemediation","namespace":"openshift-workload-availability","name":"worker-1-x5n4b","uid":"b3aeb70e-8bb6-46c0-8739-bae0d84f773b","apiVersion":"self-node-remediation.medik8s.io/v1alpha1"},"started":"2026-02-02T08:00:08Z","templateName":"selfnoderemediationtemplate-sample"}],"conditionsHealthyTimestamp":"2026-02-02T08:03:51Z","healthyDelayed":true}],"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2026-02-02T07:03:56Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."},{"type":"StormCooldownActive","status":"False","lastTransitionTime":"2026-02-02T07:34:52Z","reason":"HealthyNodeThresholdChange","message":"Storm cooldown completed"},{"type":"StormActive","status":"False","lastTransitionTime":"2026-02-02T07:34:52Z","reason":"HealthyNodeThresholdChange","message":"Storm mode is deactivated, remediation can occur normally"}],"phase":"Remediating","reason":"NHC is remediating 1 nodes","lastUpdateTime":"2026-02-02T08:00:08Z"}, "patch": "{\"status\":{\"unhealthyNodes\":[{\"conditionsHealthyTimestamp\":\"2026-02-02T08:03:51Z\",\"healthyDelayed\":true,\"name\":\"worker-1\",\"remediations\":[{\"resource\":{\"apiVersion\":\"self-node-remediation.medik8s.io/v1alpha1\",\"kind\":\"SelfNodeRemediation\",\"name\":\"worker-1-x5n4b\",\"namespace\":\"openshift-workload-availability\",\"uid\":\"b3aeb70e-8bb6-46c0-8739-bae0d84f773b\"},\"started\":\"2026-02-02T08:00:08Z\",\"templateName\":\"selfnoderemediationtemplate-sample\"}]}]}}"} 2026-02-02T08:03:52.135361276Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "6s"} 2026-02-02T08:03:52.13546307Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T08:03:52.148327909Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-worker-test", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2026-02-02T08:02:14Z"} 2026-02-02T08:03:52.148364613Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} time="2026-02-02T08:03:52Z" level=info msg="invalidating lease" time="2026-02-02T08:03:52Z" level=info msg="getting lease" 2026-02-02T08:03:52.150174144Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-1"} 2026-02-02T08:03:52.152180302Z INFO controllers.NodeHealthCheck.resource manager delaying node getting healthy {"node name": "worker-1", "remaining time in seconds": 3.84782219} 2026-02-02T08:03:52.152434612Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "4.84782219s"} 2026-02-02T08:03:57.000459878Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T08:03:57.009479803Z INFO controllers.NodeHealthCheck found condition {"NodeHealthCheck name": "nhc-worker-test", "type": "Succeeded", "status": "True", "reason": "", "message": "", "lastTransition": "2026-02-02T08:02:14Z"} 2026-02-02T08:03:57.009517641Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} time="2026-02-02T08:03:57Z" level=info msg="invalidating lease" time="2026-02-02T08:03:57Z" level=info msg="getting lease" 2026-02-02T08:03:57.011448434Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-1"} 2026-02-02T08:03:57.013119441Z INFO controllers.NodeHealthCheck.resource manager delaying for node getting healthy is done, about to remove the remediation CR {"node name": "worker-1"} 2026-02-02T08:03:57.019809846Z INFO controllers.NodeHealthCheck.resource manager deleted remediation CR {"name": "worker-1-x5n4b"} 2026-02-02T08:03:57.020008187Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","controller":false}} 2026-02-02T08:03:57.020035506Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-worker-test", "Remediation CR Name": "worker-1-x5n4b", "Remediation CR Kind": "SelfNodeRemediation"} 2026-02-02T08:03:57.020048824Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","controller":false}} 2026-02-02T08:03:57.020053192Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-worker-test", "Remediation CR Name": "worker-1-x5n4b", "Remediation CR Kind": "SelfNodeRemediation"} 2026-02-02T08:03:57.020042825Z DEBUG events [remediation] Deleted remediation CR of kind SelfNodeRemediation with name worker-1-x5n4b {"type": "Normal", "object": {"kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"4330349"}, "reason": "RemediationRemoved"} 2026-02-02T08:03:57.020625869Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "11s"} 2026-02-02T08:03:57.020692874Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T08:03:57.04664563Z INFO controllers.NodeHealthCheck.WatchManager Request info {"owner ref": {"apiVersion":"remediation.medik8s.io/v1alpha1","kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","controller":false}} 2026-02-02T08:03:57.046687558Z INFO controllers.NodeHealthCheck.WatchManager mapper: found NHC for remediation CR {"NHC Name": "nhc-worker-test", "Remediation CR Name": "worker-1-x5n4b", "Remediation CR Kind": "SelfNodeRemediation"} 2026-02-02T08:03:57.110403979Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-1"} time="2026-02-02T08:03:57Z" level=info msg="invalidating lease" time="2026-02-02T08:03:57Z" level=info msg="getting lease" 2026-02-02T08:03:57.116789022Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} time="2026-02-02T08:03:57Z" level=info msg="invalidating lease" time="2026-02-02T08:03:57Z" level=info msg="getting lease" 2026-02-02T08:03:57.211576905Z INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-worker-test", "new status": {"observedNodes":2,"healthyNodes":2,"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2026-02-02T07:03:56Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."},{"type":"StormCooldownActive","status":"False","lastTransitionTime":"2026-02-02T07:34:52Z","reason":"HealthyNodeThresholdChange","message":"Storm cooldown completed"},{"type":"StormActive","status":"False","lastTransitionTime":"2026-02-02T07:34:52Z","reason":"HealthyNodeThresholdChange","message":"Storm mode is deactivated, remediation can occur normally"}],"phase":"Enabled","reason":"NHC is enabled, no ongoing remediation","lastUpdateTime":"2026-02-02T08:03:51Z"}, "patch": "{\"status\":{\"healthyNodes\":2,\"phase\":\"Enabled\",\"reason\":\"NHC is enabled, no ongoing remediation\",\"unhealthyNodes\":null}}"} 2026-02-02T08:03:57.42415421Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "0s"} 2026-02-02T08:03:57.424255615Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T08:03:57.434504254Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} time="2026-02-02T08:03:57Z" level=info msg="invalidating lease" time="2026-02-02T08:03:57Z" level=info msg="getting lease" 2026-02-02T08:03:57.436388579Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-1"} time="2026-02-02T08:03:57Z" level=info msg="invalidating lease" time="2026-02-02T08:03:57Z" level=info msg="getting lease" 2026-02-02T08:03:57.438283366Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "0s"} 2026-02-02T08:04:08.021301117Z INFO controllers.NodeHealthCheck reconciling {"NodeHealthCheck name": "nhc-worker-test"} 2026-02-02T08:04:08.029615922Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-0"} time="2026-02-02T08:04:08Z" level=info msg="invalidating lease" time="2026-02-02T08:04:08Z" level=info msg="getting lease" 2026-02-02T08:04:08.031390467Z INFO controllers.NodeHealthCheck handling healthy node {"NodeHealthCheck name": "nhc-worker-test", "node": "worker-1"} time="2026-02-02T08:04:08Z" level=info msg="invalidating lease" time="2026-02-02T08:04:08Z" level=info msg="getting lease" 2026-02-02T08:04:08.033315463Z INFO controllers.NodeHealthCheck reconcile end {"NodeHealthCheck name": "nhc-worker-test", "error": null, "requeue": false, "requeuAfter": "0s"} Logs To Validate the Storm Cooldown Duration: INFO controllers.NodeHealthCheck Storm recovery mode activated {"nhc": "nhc-worker-test"} {"type":"StormCooldownActive","status":"False","lastTransitionTime":"reason":"HealthyNodeThresholdChange","message":"Cooldown cleared - storm threshold triggered"}, INFO controllers.NodeHealthCheck The cluster regained health after the storm, a cooldown period now begins as a safety measure before normal operations resume. {"nhc": "nhc-worker-test"} INFO controllers.NodeHealthCheck Storm recovery active: skipping creation of new remediations DEBUG events [remediation] Storm recovery mode will exit after cooldown delay {"type": "Warning", "object": {"kind":"NodeHealthCheck","name":"nhc-worker-test","uid":"f92dbd24-256c-4aa8-a5d4-dbe2298c21d2","apiVersion":"remediation.medik8s.io/v1alpha1","resourceVersion":"4300561"}, "reason": "StormRecoveryCooldownStarted"} {"type":"StormCooldownActive","status":"True","lastTransitionTime":"reason":"HealthyNodeThresholdChange","message":"Storm cooldown delay started - waiting before resuming normal remediation"}, INFO controllers.NodeHealthCheck Patching NHC status {"NodeHealthCheck name": "nhc-worker-test", "new status": {"observedNodes":2,"healthyNodes":2,"conditions":[{"type":"Disabled","status":"False","lastTransitionTime":"2026-02-02T07:03:56Z","reason":"NodeHealthCheckEnabled","message":"No issues found, NodeHealthCheck is enabled."},{"type":"StormCooldownActive","status":"False","lastTransitionTime":"2026-02-02T07:34:52Z","reason":"HealthyNodeThresholdChange","message":"Storm cooldown completed"},{"type":"StormActive","status":"False","lastTransitionTime":"2026-02-02T07:34:52Z","reason":"HealthyNodeThresholdChange","message":"Storm mode is deactivated, remediation can occur normally"}],"phase":"Enabled","reason":"NHC is enabled, no ongoing remediation","lastUpdateTime":"2026-02-02T07:39:54Z"}, "patch": "{\"status\":{\"healthyNodes\":2,\"phase\":\"Enabled\",\"reason\":\"NHC is enabled, no ongoing remediation\",\"unhealthyNodes\":null}}"}