[root@cert-rhosp-02 ~]# oc project openshift-workload-availability Now using project "openshift-workload-availability" on server "https://api.test-rhwa-25-9.vipin:6443". [root@cert-rhosp-02 ~]# oc get clusterversion NAME VERSION AVAILABLE PROGRESSING SINCE STATUS version 4.20.0 True False 140m Cluster version is 4.20.0 [root@cert-rhosp-02 ~]# oc get csv NAME DISPLAY VERSION REPLACES PHASE node-healthcheck-operator.v0.10.0 Node Health Check Operator 0.10.0 node-healthcheck-operator.v0.9.1 Succeeded self-node-remediation.v0.10.0 Self Node Remediation Operator 0.10.0 self-node-remediation.v0.9.0 Succeeded [root@cert-rhosp-02 ~]# PODS=$(oc get pods -o name -n openshift-workload-availability | grep node-healthcheck-controller-manager) [root@cert-rhosp-02 ~]# echo $PODS pod/node-healthcheck-controller-manager-54b7bc794d-q2hsl pod/node-healthcheck-controller-manager-54b7bc794d-whzdk [root@cert-rhosp-02 ~]# for p in $PODS; do > echo "== $p" > oc get "$p" -n openshift-workload-availability -o json | jq .spec.nodeName > done == pod/node-healthcheck-controller-manager-54b7bc794d-q2hsl "node2" == pod/node-healthcheck-controller-manager-54b7bc794d-whzdk "node0" [root@cert-rhosp-02 ~]# PODS=$(oc get pods -o name -n openshift-workload-availability | grep self-node-remediation-controller-manager) [root@cert-rhosp-02 ~]# echo $PODS pod/self-node-remediation-controller-manager-678856d898-462d5 [root@cert-rhosp-02 ~]# for p in $PODS; do > echo "== $p" > oc get "$p" -n openshift-workload-availability -o json | jq .spec.nodeName > done == pod/self-node-remediation-controller-manager-678856d898-462d5 "node2" [root@cert-rhosp-02 ~]# oc get nodes -l 'node-role.kubernetes.io/worker' NAME STATUS ROLES AGE VERSION node0 Ready control-plane,master,worker 4h15m v1.33.5 node2 Ready control-plane,master,worker 144m v1.33.5 [root@cert-rhosp-02 ~]# oc get nhc -o yaml apiVersion: v1 items: - apiVersion: remediation.medik8s.io/v1alpha1 kind: NodeHealthCheck metadata: creationTimestamp: "2025-11-05T19:27:47Z" generation: 1 name: nhc-worker-test resourceVersion: "75324" uid: b3e3e728-5fa5-4b65-94e4-7327369bb158 spec: maxUnhealthy: 1 remediationTemplate: apiVersion: self-node-remediation.medik8s.io/v1alpha1 kind: SelfNodeRemediationTemplate name: selfnoderemediationtemplate-sample namespace: openshift-machine-api selector: matchExpressions: - key: node-role.kubernetes.io/worker operator: Exists unhealthyConditions: - duration: 30s status: "False" type: Ready - duration: 30s status: Unknown type: Ready status: conditions: - lastTransitionTime: "2025-11-05T19:28:02Z" message: No issues found, NodeHealthCheck is enabled. reason: NodeHealthCheckEnabled status: "False" type: Disabled healthyNodes: 2 lastUpdateTime: "2025-11-05T19:28:02Z" observedNodes: 2 phase: Enabled reason: NHC is enabled, no ongoing remediation kind: List metadata: resourceVersion: "" [root@cert-rhosp-02 ~]# oc get snrt -o yaml apiVersion: v1 items: - apiVersion: self-node-remediation.medik8s.io/v1alpha1 kind: SelfNodeRemediationTemplate metadata: annotations: remediation.medik8s.io/multiple-templates-support: "true" creationTimestamp: "2025-11-05T19:12:48Z" generation: 1 labels: remediation.medik8s.io/default-template: "true" name: self-node-remediation-automatic-strategy-template namespace: openshift-workload-availability resourceVersion: "71449" uid: 5b6afac4-2b9a-422f-86cb-25e6f32063eb spec: template: spec: remediationStrategy: Automatic kind: List metadata: resourceVersion: "" [root@cert-rhosp-02 ~]# oc get nodes -l 'node-role.kubernetes.io/worker' NAME STATUS ROLES AGE VERSION node0 NotReady control-plane,master,worker 4h29m v1.33.5 node2 Ready control-plane,master,worker 158m v1.33.5 [root@cert-rhosp-02 ~]# oc get nodes -l 'node-role.kubernetes.io/worker' NAME STATUS ROLES AGE VERSION node0 Ready control-plane,master,worker 4h34m v1.33.5 node2 Ready control-plane,master,worker 163m v1.33.5 NHC logs: 2025-11-05T19:36:12.344708359Z INFO setup Go Version: go1.23.2 (Red Hat 1.23.2-1.el9) X:strictfipsruntime 2025-11-05T19:36:12.344787359Z INFO setup Go OS/Arch: linux/amd64 2025-11-05T19:36:12.344790324Z INFO setup Operator Version: v0.10.0 2025-11-05T19:36:12.344792022Z INFO setup Git Commit: 2025-11-05T19:36:12.344793878Z INFO setup Build Date: 2025-01-13T11:55:12+00:00 2025-11-05T19:36:12.344795453Z INFO setup HTTP/2 for metrics and webhook server disabled 2025-11-05T19:36:12.345201609Z INFO controller-runtime.metrics Metrics server is starting to listen {"addr": "127.0.0.1:8080"} 2025-11-05T19:36:12.35142477Z INFO utils-taints out of service taint strategy {"isSupported": true, "k8sMajorVersion": 1, "k8sMinorVersion": 33} 2025-11-05T19:36:12.351461218Z INFO utils-taints out of service taint strategy {"isGA": true, "k8sMajorVersion": 1, "k8sMinorVersion": 33} 2025-11-05T19:36:12.35146796Z INFO setup Starting as a manager that installs the daemonset 2025-11-05T19:36:12.351477811Z INFO controller-runtime.builder skip registering a mutating webhook, object does not implement admission.Defaulter or WithDefaulter wasn't called {"GVK": "self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediationConfig"} 2025-11-05T19:36:12.351517084Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediationConfig", "path": "/validate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediationconfig"} 2025-11-05T19:36:12.351608724Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediationconfig"} 2025-11-05T19:36:12.351655688Z INFO controller-runtime.builder Registering a mutating webhook {"GVK": "self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediationTemplate", "path": "/mutate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediationtemplate"} 2025-11-05T19:36:12.351694244Z INFO controller-runtime.webhook Registering webhook {"path": "/mutate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediationtemplate"} 2025-11-05T19:36:12.351721648Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediationTemplate", "path": "/validate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediationtemplate"} 2025-11-05T19:36:12.35176063Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediationtemplate"} 2025-11-05T19:36:12.351785507Z INFO controller-runtime.builder skip registering a mutating webhook, object does not implement admission.Defaulter or WithDefaulter wasn't called {"GVK": "self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediation"} 2025-11-05T19:36:12.351810101Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediation", "path": "/validate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediation"} 2025-11-05T19:36:12.351842117Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediation"} 2025-11-05T19:36:12.351889585Z INFO setup starting manager 2025-11-05T19:36:12.352053818Z INFO controller-runtime.webhook.webhooks Starting webhook server 2025-11-05T19:36:12.352089539Z INFO Starting server {"kind": "health probe", "addr": "[::]:8081"} 2025-11-05T19:36:12.352190464Z INFO starting server {"path": "/metrics", "kind": "metrics", "addr": "127.0.0.1:8080"} 2025-11-05T19:36:12.352346829Z INFO controller-runtime.certwatcher Updated current TLS certificate 2025-11-05T19:36:12.352402053Z INFO controller-runtime.webhook Serving webhook server {"host": "", "port": 9443} 2025-11-05T19:36:12.352464131Z INFO controller-runtime.certwatcher Starting certificate watcher I1105 19:36:12.352521 1 leaderelection.go:245] attempting to acquire leader lease openshift-workload-availability/547f6cb6.medik8s.io... E1105 19:36:14.281712 1 leaderelection.go:327] error retrieving resource lock openshift-workload-availability/547f6cb6.medik8s.io: rpc error: code = Unavailable desc = error reading from server: read tcp 192.168.122.135:34720->192.168.122.241:2379: read: connection timed out I1105 19:36:35.583675 1 leaderelection.go:255] successfully acquired lease openshift-workload-availability/547f6cb6.medik8s.io 2025-11-05T19:36:35.58370362Z DEBUG events self-node-remediation-controller-manager-678856d898-462d5_4e277798-a7bf-4e5a-b753-dc31f19582da became leader {"type": "Normal", "object": {"kind":"Lease","namespace":"openshift-workload-availability","name":"547f6cb6.medik8s.io","uid":"2f336e17-49df-44d1-b8b9-f23084dd3546","apiVersion":"coordination.k8s.io/v1","resourceVersion":"80553"}, "reason": "LeaderElection"} 2025-11-05T19:36:35.584008769Z INFO Starting EventSource {"controller": "selfnoderemediationconfig", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediationConfig", "source": "kind source: *v1alpha1.SelfNodeRemediationConfig"} 2025-11-05T19:36:35.584041634Z INFO Starting EventSource {"controller": "selfnoderemediationconfig", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediationConfig", "source": "kind source: *v1.DaemonSet"} 2025-11-05T19:36:35.584046413Z INFO Starting Controller {"controller": "selfnoderemediationconfig", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediationConfig"} 2025-11-05T19:36:35.584091934Z INFO Starting EventSource {"controller": "selfnoderemediation", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediation", "source": "kind source: *v1alpha1.SelfNodeRemediation"} 2025-11-05T19:36:35.584109812Z INFO Starting Controller {"controller": "selfnoderemediation", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediation"} 2025-11-05T19:36:35.589776634Z INFO selfnoderemediationconfig-resource validate create {"name": "self-node-remediation-config"} 2025-11-05T19:36:35.686540822Z INFO Starting workers {"controller": "selfnoderemediationconfig", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediationConfig", "worker count": 1} 2025-11-05T19:36:35.686574475Z INFO Starting workers {"controller": "selfnoderemediation", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediation", "worker count": 1} 2025-11-05T19:36:35.686679134Z INFO controllers.SelfNodeRemediationConfig Syncing certs 2025-11-05T19:36:35.788858002Z INFO controllers.SelfNodeRemediationConfig Cert secret already exists 2025-11-05T19:36:35.788894575Z INFO controllers.SelfNodeRemediationConfig.syncConfigDaemonset Start to sync config daemonset 2025-11-05T19:36:35.78887123Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}} 2025-11-05T19:36:35.789403356Z INFO controllers.SelfNodeRemediation Node didn't reboot yet, waiting for it to reboot {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}, "node name": "node0", "time left": "55.210602793s"} 2025-11-05T19:36:35.789552798Z INFO controllers.SelfNodeRemediationConfig Updating DS tolerations 2025/11/05 19:36:35 reconciling (apps/v1, Kind=DaemonSet) openshift-workload-availability/self-node-remediation-ds 2025-11-05T19:36:35.794524378Z ERROR controllers.SelfNodeRemediation failed to update snr status {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}, "error": "rpc error: code = Unavailable desc = error reading from server: read tcp 192.168.122.135:59520->192.168.122.241:2379: read: connection reset by peer"} github.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationReconciler).updateSnrStatus /remote-source/app/controllers/selfnoderemediation_controller.go:671 github.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationReconciler).ReconcileManager.func1 /remote-source/app/controllers/selfnoderemediation_controller.go:213 github.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationReconciler).ReconcileManager /remote-source/app/controllers/selfnoderemediation_controller.go:302 github.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationReconciler).Reconcile /remote-source/app/controllers/selfnoderemediation_controller.go:155 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Reconcile /remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:118 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler /remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:314 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem /remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:265 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2 /remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:226 2025-11-05T19:36:35.794582857Z ERROR Reconciler error {"controller": "selfnoderemediation", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediation", "SelfNodeRemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}, "namespace": "openshift-machine-api", "name": "node0-6ngtd", "reconcileID": "85a263cb-ad63-40d9-af2b-096c2de5a28b", "error": "rpc error: code = Unavailable desc = error reading from server: read tcp 192.168.122.135:59520->192.168.122.241:2379: read: connection reset by peer", "errorCauses": [{"error": "rpc error: code = Unavailable desc = error reading from server: read tcp 192.168.122.135:59520->192.168.122.241:2379: read: connection reset by peer"}]} sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler /remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:324 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem /remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:265 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2 /remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:226 2025-11-05T19:36:35.794489218Z ERROR controllers.SelfNodeRemediationConfig.syncConfigDaemonset Couldn't sync self-node-remediation daemons objects {"error": "failed to apply object &{map[apiVersion:apps/v1 kind:DaemonSet metadata:map[annotations:map[snr.medik8s.io/force-deletion-revision:1] labels:map[k8s-app:self-node-remediation] name:self-node-remediation-ds namespace:openshift-workload-availability ownerReferences:[map[apiVersion:self-node-remediation.medik8s.io/v1alpha1 blockOwnerDeletion:true controller:true kind:SelfNodeRemediationConfig name:self-node-remediation-config uid:5407dbf3-9635-467a-ac61-162f0a09b857]]] spec:map[selector:map[matchLabels:map[app.kubernetes.io/component:agent app.kubernetes.io/name:self-node-remediation]] template:map[metadata:map[creationTimestamp: labels:map[app.kubernetes.io/component:agent app.kubernetes.io/name:self-node-remediation]] spec:map[affinity:map[nodeAffinity:map[requiredDuringSchedulingIgnoredDuringExecution:map[nodeSelectorTerms:[map[matchExpressions:[map[key:remediation.medik8s.io/exclude-from-remediation operator:NotIn values:[true]]]]]]]] containers:[map[args:[--is-manager=false] command:[/manager] env:[map[name:MY_NODE_NAME valueFrom:map[fieldRef:map[fieldPath:spec.nodeName]]] map[name:DEPLOYMENT_NAMESPACE valueFrom:map[fieldRef:map[fieldPath:metadata.namespace]]] map[name:WATCHDOG_PATH value:/dev/watchdog] map[name:PEER_API_SERVER_TIMEOUT value:5000000000] map[name:API_CHECK_INTERVAL value:15000000000] map[name:PEER_UPDATE_INTERVAL value:900000000000] map[name:API_SERVER_TIMEOUT value:5000000000] map[name:PEER_DIAL_TIMEOUT value:5000000000] map[name:PEER_REQUEST_TIMEOUT value:5000000000] map[name:MAX_API_ERROR_THRESHOLD value:3] map[name:IS_SOFTWARE_REBOOT_ENABLED value:true] map[name:END_POINT_HEALTH_CHECK_URL value:] map[name:HOST_PORT value:30001]] image:registry.redhat.io/workload-availability/self-node-remediation-rhel9-operator@sha256:be5d89d6794bc1a50c59386e8be960e9f13c9668c8e024ff0c32ba23ee75956a imagePullPolicy:Always name:manager ports:[map[containerPort:30001 hostPort:30001 name:self-n-r-port protocol:TCP]] resources:map[requests:map[cpu:20m memory:60Mi]] securityContext:map[privileged:true] terminationMessagePath:/dev/termination-log terminationMessagePolicy:File volumeMounts:[map[mountPath:/dev name:devices]]]] dnsPolicy:ClusterFirst hostPID:true priorityClassName:system-node-critical restartPolicy:Always schedulerName:default-scheduler securityContext:map[] serviceAccountName:self-node-remediation-controller-manager terminationGracePeriodSeconds:10 tolerations:[map[effect:NoExecute key:medik8s.io/remediation operator:Equal value:self-node-remediation] map[effect:NoSchedule key:node-role.kubernetes.io/master operator:Equal] map[effect:NoSchedule key:node-role.kubernetes.io/control-plane operator:Equal]] volumes:[map[hostPath:map[path:/dev type:Directory] name:devices]]]]]]}: could not retrieve existing (apps/v1, Kind=DaemonSet) openshift-workload-availability/self-node-remediation-ds: rpc error: code = Unavailable desc = error reading from server: read tcp 192.168.122.135:34964->192.168.122.241:2379: read: connection reset by peer", "errorVerbose": "rpc error: code = Unavailable desc = error reading from server: read tcp 192.168.122.135:34964->192.168.122.241:2379: read: connection reset by peer\ncould not retrieve existing (apps/v1, Kind=DaemonSet) openshift-workload-availability/self-node-remediation-ds\ngithub.com/medik8s/self-node-remediation/pkg/apply.ApplyObject\n\t/remote-source/app/pkg/apply/apply.go:49\ngithub.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).syncK8sResource\n\t/remote-source/app/controllers/selfnoderemediationconfig_controller.go:196\ngithub.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).syncConfigDaemonSet\n\t/remote-source/app/controllers/selfnoderemediationconfig_controller.go:175\ngithub.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).Reconcile\n\t/remote-source/app/controllers/selfnoderemediationconfig_controller.go:100\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Reconcile\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:118\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:314\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:265\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:226\nruntime.goexit\n\t/usr/lib/golang/src/runtime/asm_amd64.s:1700\nfailed to apply object &{map[apiVersion:apps/v1 kind:DaemonSet metadata:map[annotations:map[snr.medik8s.io/force-deletion-revision:1] labels:map[k8s-app:self-node-remediation] name:self-node-remediation-ds namespace:openshift-workload-availability ownerReferences:[map[apiVersion:self-node-remediation.medik8s.io/v1alpha1 blockOwnerDeletion:true controller:true kind:SelfNodeRemediationConfig name:self-node-remediation-config uid:5407dbf3-9635-467a-ac61-162f0a09b857]]] spec:map[selector:map[matchLabels:map[app.kubernetes.io/component:agent app.kubernetes.io/name:self-node-remediation]] template:map[metadata:map[creationTimestamp: labels:map[app.kubernetes.io/component:agent app.kubernetes.io/name:self-node-remediation]] spec:map[affinity:map[nodeAffinity:map[requiredDuringSchedulingIgnoredDuringExecution:map[nodeSelectorTerms:[map[matchExpressions:[map[key:remediation.medik8s.io/exclude-from-remediation operator:NotIn values:[true]]]]]]]] containers:[map[args:[--is-manager=false] command:[/manager] env:[map[name:MY_NODE_NAME valueFrom:map[fieldRef:map[fieldPath:spec.nodeName]]] map[name:DEPLOYMENT_NAMESPACE valueFrom:map[fieldRef:map[fieldPath:metadata.namespace]]] map[name:WATCHDOG_PATH value:/dev/watchdog] map[name:PEER_API_SERVER_TIMEOUT value:5000000000] map[name:API_CHECK_INTERVAL value:15000000000] map[name:PEER_UPDATE_INTERVAL value:900000000000] map[name:API_SERVER_TIMEOUT value:5000000000] map[name:PEER_DIAL_TIMEOUT value:5000000000] map[name:PEER_REQUEST_TIMEOUT value:5000000000] map[name:MAX_API_ERROR_THRESHOLD value:3] map[name:IS_SOFTWARE_REBOOT_ENABLED value:true] map[name:END_POINT_HEALTH_CHECK_URL value:] map[name:HOST_PORT value:30001]] image:registry.redhat.io/workload-availability/self-node-remediation-rhel9-operator@sha256:be5d89d6794bc1a50c59386e8be960e9f13c9668c8e024ff0c32ba23ee75956a imagePullPolicy:Always name:manager ports:[map[containerPort:30001 hostPort:30001 name:self-n-r-port protocol:TCP]] resources:map[requests:map[cpu:20m memory:60Mi]] securityContext:map[privileged:true] terminationMessagePath:/dev/termination-log terminationMessagePolicy:File volumeMounts:[map[mountPath:/dev name:devices]]]] dnsPolicy:ClusterFirst hostPID:true priorityClassName:system-node-critical restartPolicy:Always schedulerName:default-scheduler securityContext:map[] serviceAccountName:self-node-remediation-controller-manager terminationGracePeriodSeconds:10 tolerations:[map[effect:NoExecute key:medik8s.io/remediation operator:Equal value:self-node-remediation] map[effect:NoSchedule key:node-role.kubernetes.io/master operator:Equal] map[effect:NoSchedule key:node-role.kubernetes.io/control-plane operator:Equal]] volumes:[map[hostPath:map[path:/dev type:Directory] name:devices]]]]]]}\ngithub.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).syncK8sResource\n\t/remote-source/app/controllers/selfnoderemediationconfig_controller.go:197\ngithub.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).syncConfigDaemonSet\n\t/remote-source/app/controllers/selfnoderemediationconfig_controller.go:175\ngithub.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).Reconcile\n\t/remote-source/app/controllers/selfnoderemediationconfig_controller.go:100\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Reconcile\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:118\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:314\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:265\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:226\nruntime.goexit\n\t/usr/lib/golang/src/runtime/asm_amd64.s:1700"} github.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).syncConfigDaemonSet /remote-source/app/controllers/selfnoderemediationconfig_controller.go:180 github.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).Reconcile /remote-source/app/controllers/selfnoderemediationconfig_controller.go:100 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Reconcile /remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:118 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler /remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:314 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem /remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:265 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2 /remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:226 2025-11-05T19:36:35.794611568Z ERROR controllers.SelfNodeRemediationConfig error syncing DS {"selfnoderemediationconfig": {"name":"self-node-remediation-config","namespace":"openshift-workload-availability"}, "error": "failed to apply object &{map[apiVersion:apps/v1 kind:DaemonSet metadata:map[annotations:map[snr.medik8s.io/force-deletion-revision:1] labels:map[k8s-app:self-node-remediation] name:self-node-remediation-ds namespace:openshift-workload-availability ownerReferences:[map[apiVersion:self-node-remediation.medik8s.io/v1alpha1 blockOwnerDeletion:true controller:true kind:SelfNodeRemediationConfig name:self-node-remediation-config uid:5407dbf3-9635-467a-ac61-162f0a09b857]]] spec:map[selector:map[matchLabels:map[app.kubernetes.io/component:agent app.kubernetes.io/name:self-node-remediation]] template:map[metadata:map[creationTimestamp: labels:map[app.kubernetes.io/component:agent app.kubernetes.io/name:self-node-remediation]] spec:map[affinity:map[nodeAffinity:map[requiredDuringSchedulingIgnoredDuringExecution:map[nodeSelectorTerms:[map[matchExpressions:[map[key:remediation.medik8s.io/exclude-from-remediation operator:NotIn values:[true]]]]]]]] containers:[map[args:[--is-manager=false] command:[/manager] env:[map[name:MY_NODE_NAME valueFrom:map[fieldRef:map[fieldPath:spec.nodeName]]] map[name:DEPLOYMENT_NAMESPACE valueFrom:map[fieldRef:map[fieldPath:metadata.namespace]]] map[name:WATCHDOG_PATH value:/dev/watchdog] map[name:PEER_API_SERVER_TIMEOUT value:5000000000] map[name:API_CHECK_INTERVAL value:15000000000] map[name:PEER_UPDATE_INTERVAL value:900000000000] map[name:API_SERVER_TIMEOUT value:5000000000] map[name:PEER_DIAL_TIMEOUT value:5000000000] map[name:PEER_REQUEST_TIMEOUT value:5000000000] map[name:MAX_API_ERROR_THRESHOLD value:3] map[name:IS_SOFTWARE_REBOOT_ENABLED value:true] map[name:END_POINT_HEALTH_CHECK_URL value:] map[name:HOST_PORT value:30001]] image:registry.redhat.io/workload-availability/self-node-remediation-rhel9-operator@sha256:be5d89d6794bc1a50c59386e8be960e9f13c9668c8e024ff0c32ba23ee75956a imagePullPolicy:Always name:manager ports:[map[containerPort:30001 hostPort:30001 name:self-n-r-port protocol:TCP]] resources:map[requests:map[cpu:20m memory:60Mi]] securityContext:map[privileged:true] terminationMessagePath:/dev/termination-log terminationMessagePolicy:File volumeMounts:[map[mountPath:/dev name:devices]]]] dnsPolicy:ClusterFirst hostPID:true priorityClassName:system-node-critical restartPolicy:Always schedulerName:default-scheduler securityContext:map[] serviceAccountName:self-node-remediation-controller-manager terminationGracePeriodSeconds:10 tolerations:[map[effect:NoExecute key:medik8s.io/remediation operator:Equal value:self-node-remediation] map[effect:NoSchedule key:node-role.kubernetes.io/master operator:Equal] map[effect:NoSchedule key:node-role.kubernetes.io/control-plane operator:Equal]] volumes:[map[hostPath:map[path:/dev type:Directory] name:devices]]]]]]}: could not retrieve existing (apps/v1, Kind=DaemonSet) openshift-workload-availability/self-node-remediation-ds: rpc error: code = Unavailable desc = error reading from server: read tcp 192.168.122.135:34964->192.168.122.241:2379: read: connection reset by peer", "errorVerbose": "rpc error: code = Unavailable desc = error reading from server: read tcp 192.168.122.135:34964->192.168.122.241:2379: read: connection reset by peer\ncould not retrieve existing (apps/v1, Kind=DaemonSet) openshift-workload-availability/self-node-remediation-ds\ngithub.com/medik8s/self-node-remediation/pkg/apply.ApplyObject\n\t/remote-source/app/pkg/apply/apply.go:49\ngithub.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).syncK8sResource\n\t/remote-source/app/controllers/selfnoderemediationconfig_controller.go:196\ngithub.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).syncConfigDaemonSet\n\t/remote-source/app/controllers/selfnoderemediationconfig_controller.go:175\ngithub.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).Reconcile\n\t/remote-source/app/controllers/selfnoderemediationconfig_controller.go:100\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Reconcile\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:118\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:314\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:265\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:226\nruntime.goexit\n\t/usr/lib/golang/src/runtime/asm_amd64.s:1700\nfailed to apply object &{map[apiVersion:apps/v1 kind:DaemonSet metadata:map[annotations:map[snr.medik8s.io/force-deletion-revision:1] labels:map[k8s-app:self-node-remediation] name:self-node-remediation-ds namespace:openshift-workload-availability ownerReferences:[map[apiVersion:self-node-remediation.medik8s.io/v1alpha1 blockOwnerDeletion:true controller:true kind:SelfNodeRemediationConfig name:self-node-remediation-config uid:5407dbf3-9635-467a-ac61-162f0a09b857]]] spec:map[selector:map[matchLabels:map[app.kubernetes.io/component:agent app.kubernetes.io/name:self-node-remediation]] template:map[metadata:map[creationTimestamp: labels:map[app.kubernetes.io/component:agent app.kubernetes.io/name:self-node-remediation]] spec:map[affinity:map[nodeAffinity:map[requiredDuringSchedulingIgnoredDuringExecution:map[nodeSelectorTerms:[map[matchExpressions:[map[key:remediation.medik8s.io/exclude-from-remediation operator:NotIn values:[true]]]]]]]] containers:[map[args:[--is-manager=false] command:[/manager] env:[map[name:MY_NODE_NAME valueFrom:map[fieldRef:map[fieldPath:spec.nodeName]]] map[name:DEPLOYMENT_NAMESPACE valueFrom:map[fieldRef:map[fieldPath:metadata.namespace]]] map[name:WATCHDOG_PATH value:/dev/watchdog] map[name:PEER_API_SERVER_TIMEOUT value:5000000000] map[name:API_CHECK_INTERVAL value:15000000000] map[name:PEER_UPDATE_INTERVAL value:900000000000] map[name:API_SERVER_TIMEOUT value:5000000000] map[name:PEER_DIAL_TIMEOUT value:5000000000] map[name:PEER_REQUEST_TIMEOUT value:5000000000] map[name:MAX_API_ERROR_THRESHOLD value:3] map[name:IS_SOFTWARE_REBOOT_ENABLED value:true] map[name:END_POINT_HEALTH_CHECK_URL value:] map[name:HOST_PORT value:30001]] image:registry.redhat.io/workload-availability/self-node-remediation-rhel9-operator@sha256:be5d89d6794bc1a50c59386e8be960e9f13c9668c8e024ff0c32ba23ee75956a imagePullPolicy:Always name:manager ports:[map[containerPort:30001 hostPort:30001 name:self-n-r-port protocol:TCP]] resources:map[requests:map[cpu:20m memory:60Mi]] securityContext:map[privileged:true] terminationMessagePath:/dev/termination-log terminationMessagePolicy:File volumeMounts:[map[mountPath:/dev name:devices]]]] dnsPolicy:ClusterFirst hostPID:true priorityClassName:system-node-critical restartPolicy:Always schedulerName:default-scheduler securityContext:map[] serviceAccountName:self-node-remediation-controller-manager terminationGracePeriodSeconds:10 tolerations:[map[effect:NoExecute key:medik8s.io/remediation operator:Equal value:self-node-remediation] map[effect:NoSchedule key:node-role.kubernetes.io/master operator:Equal] map[effect:NoSchedule key:node-role.kubernetes.io/control-plane operator:Equal]] volumes:[map[hostPath:map[path:/dev type:Directory] name:devices]]]]]]}\ngithub.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).syncK8sResource\n\t/remote-source/app/controllers/selfnoderemediationconfig_controller.go:197\ngithub.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).syncConfigDaemonSet\n\t/remote-source/app/controllers/selfnoderemediationconfig_controller.go:175\ngithub.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).Reconcile\n\t/remote-source/app/controllers/selfnoderemediationconfig_controller.go:100\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Reconcile\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:118\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:314\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:265\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:226\nruntime.goexit\n\t/usr/lib/golang/src/runtime/asm_amd64.s:1700"} github.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).Reconcile /remote-source/app/controllers/selfnoderemediationconfig_controller.go:104 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Reconcile /remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:118 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler /remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:314 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem /remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:265 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2 /remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:226 2025-11-05T19:36:35.794697863Z ERROR Reconciler error {"controller": "selfnoderemediationconfig", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediationConfig", "SelfNodeRemediationConfig": {"name":"self-node-remediation-config","namespace":"openshift-workload-availability"}, "namespace": "openshift-workload-availability", "name": "self-node-remediation-config", "reconcileID": "31f2e49b-e4ee-4d05-8b2c-3d60506432af", "error": "failed to apply object &{map[apiVersion:apps/v1 kind:DaemonSet metadata:map[annotations:map[snr.medik8s.io/force-deletion-revision:1] labels:map[k8s-app:self-node-remediation] name:self-node-remediation-ds namespace:openshift-workload-availability ownerReferences:[map[apiVersion:self-node-remediation.medik8s.io/v1alpha1 blockOwnerDeletion:true controller:true kind:SelfNodeRemediationConfig name:self-node-remediation-config uid:5407dbf3-9635-467a-ac61-162f0a09b857]]] spec:map[selector:map[matchLabels:map[app.kubernetes.io/component:agent app.kubernetes.io/name:self-node-remediation]] template:map[metadata:map[creationTimestamp: labels:map[app.kubernetes.io/component:agent app.kubernetes.io/name:self-node-remediation]] spec:map[affinity:map[nodeAffinity:map[requiredDuringSchedulingIgnoredDuringExecution:map[nodeSelectorTerms:[map[matchExpressions:[map[key:remediation.medik8s.io/exclude-from-remediation operator:NotIn values:[true]]]]]]]] containers:[map[args:[--is-manager=false] command:[/manager] env:[map[name:MY_NODE_NAME valueFrom:map[fieldRef:map[fieldPath:spec.nodeName]]] map[name:DEPLOYMENT_NAMESPACE valueFrom:map[fieldRef:map[fieldPath:metadata.namespace]]] map[name:WATCHDOG_PATH value:/dev/watchdog] map[name:PEER_API_SERVER_TIMEOUT value:5000000000] map[name:API_CHECK_INTERVAL value:15000000000] map[name:PEER_UPDATE_INTERVAL value:900000000000] map[name:API_SERVER_TIMEOUT value:5000000000] map[name:PEER_DIAL_TIMEOUT value:5000000000] map[name:PEER_REQUEST_TIMEOUT value:5000000000] map[name:MAX_API_ERROR_THRESHOLD value:3] map[name:IS_SOFTWARE_REBOOT_ENABLED value:true] map[name:END_POINT_HEALTH_CHECK_URL value:] map[name:HOST_PORT value:30001]] image:registry.redhat.io/workload-availability/self-node-remediation-rhel9-operator@sha256:be5d89d6794bc1a50c59386e8be960e9f13c9668c8e024ff0c32ba23ee75956a imagePullPolicy:Always name:manager ports:[map[containerPort:30001 hostPort:30001 name:self-n-r-port protocol:TCP]] resources:map[requests:map[cpu:20m memory:60Mi]] securityContext:map[privileged:true] terminationMessagePath:/dev/termination-log terminationMessagePolicy:File volumeMounts:[map[mountPath:/dev name:devices]]]] dnsPolicy:ClusterFirst hostPID:true priorityClassName:system-node-critical restartPolicy:Always schedulerName:default-scheduler securityContext:map[] serviceAccountName:self-node-remediation-controller-manager terminationGracePeriodSeconds:10 tolerations:[map[effect:NoExecute key:medik8s.io/remediation operator:Equal value:self-node-remediation] map[effect:NoSchedule key:node-role.kubernetes.io/master operator:Equal] map[effect:NoSchedule key:node-role.kubernetes.io/control-plane operator:Equal]] volumes:[map[hostPath:map[path:/dev type:Directory] name:devices]]]]]]}: could not retrieve existing (apps/v1, Kind=DaemonSet) openshift-workload-availability/self-node-remediation-ds: rpc error: code = Unavailable desc = error reading from server: read tcp 192.168.122.135:34964->192.168.122.241:2379: read: connection reset by peer", "errorVerbose": "rpc error: code = Unavailable desc = error reading from server: read tcp 192.168.122.135:34964->192.168.122.241:2379: read: connection reset by peer\ncould not retrieve existing (apps/v1, Kind=DaemonSet) openshift-workload-availability/self-node-remediation-ds\ngithub.com/medik8s/self-node-remediation/pkg/apply.ApplyObject\n\t/remote-source/app/pkg/apply/apply.go:49\ngithub.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).syncK8sResource\n\t/remote-source/app/controllers/selfnoderemediationconfig_controller.go:196\ngithub.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).syncConfigDaemonSet\n\t/remote-source/app/controllers/selfnoderemediationconfig_controller.go:175\ngithub.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).Reconcile\n\t/remote-source/app/controllers/selfnoderemediationconfig_controller.go:100\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Reconcile\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:118\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:314\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:265\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:226\nruntime.goexit\n\t/usr/lib/golang/src/runtime/asm_amd64.s:1700\nfailed to apply object &{map[apiVersion:apps/v1 kind:DaemonSet metadata:map[annotations:map[snr.medik8s.io/force-deletion-revision:1] labels:map[k8s-app:self-node-remediation] name:self-node-remediation-ds namespace:openshift-workload-availability ownerReferences:[map[apiVersion:self-node-remediation.medik8s.io/v1alpha1 blockOwnerDeletion:true controller:true kind:SelfNodeRemediationConfig name:self-node-remediation-config uid:5407dbf3-9635-467a-ac61-162f0a09b857]]] spec:map[selector:map[matchLabels:map[app.kubernetes.io/component:agent app.kubernetes.io/name:self-node-remediation]] template:map[metadata:map[creationTimestamp: labels:map[app.kubernetes.io/component:agent app.kubernetes.io/name:self-node-remediation]] spec:map[affinity:map[nodeAffinity:map[requiredDuringSchedulingIgnoredDuringExecution:map[nodeSelectorTerms:[map[matchExpressions:[map[key:remediation.medik8s.io/exclude-from-remediation operator:NotIn values:[true]]]]]]]] containers:[map[args:[--is-manager=false] command:[/manager] env:[map[name:MY_NODE_NAME valueFrom:map[fieldRef:map[fieldPath:spec.nodeName]]] map[name:DEPLOYMENT_NAMESPACE valueFrom:map[fieldRef:map[fieldPath:metadata.namespace]]] map[name:WATCHDOG_PATH value:/dev/watchdog] map[name:PEER_API_SERVER_TIMEOUT value:5000000000] map[name:API_CHECK_INTERVAL value:15000000000] map[name:PEER_UPDATE_INTERVAL value:900000000000] map[name:API_SERVER_TIMEOUT value:5000000000] map[name:PEER_DIAL_TIMEOUT value:5000000000] map[name:PEER_REQUEST_TIMEOUT value:5000000000] map[name:MAX_API_ERROR_THRESHOLD value:3] map[name:IS_SOFTWARE_REBOOT_ENABLED value:true] map[name:END_POINT_HEALTH_CHECK_URL value:] map[name:HOST_PORT value:30001]] image:registry.redhat.io/workload-availability/self-node-remediation-rhel9-operator@sha256:be5d89d6794bc1a50c59386e8be960e9f13c9668c8e024ff0c32ba23ee75956a imagePullPolicy:Always name:manager ports:[map[containerPort:30001 hostPort:30001 name:self-n-r-port protocol:TCP]] resources:map[requests:map[cpu:20m memory:60Mi]] securityContext:map[privileged:true] terminationMessagePath:/dev/termination-log terminationMessagePolicy:File volumeMounts:[map[mountPath:/dev name:devices]]]] dnsPolicy:ClusterFirst hostPID:true priorityClassName:system-node-critical restartPolicy:Always schedulerName:default-scheduler securityContext:map[] serviceAccountName:self-node-remediation-controller-manager terminationGracePeriodSeconds:10 tolerations:[map[effect:NoExecute key:medik8s.io/remediation operator:Equal value:self-node-remediation] map[effect:NoSchedule key:node-role.kubernetes.io/master operator:Equal] map[effect:NoSchedule key:node-role.kubernetes.io/control-plane operator:Equal]] volumes:[map[hostPath:map[path:/dev type:Directory] name:devices]]]]]]}\ngithub.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).syncK8sResource\n\t/remote-source/app/controllers/selfnoderemediationconfig_controller.go:197\ngithub.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).syncConfigDaemonSet\n\t/remote-source/app/controllers/selfnoderemediationconfig_controller.go:175\ngithub.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).Reconcile\n\t/remote-source/app/controllers/selfnoderemediationconfig_controller.go:100\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Reconcile\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:118\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:314\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:265\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:226\nruntime.goexit\n\t/usr/lib/golang/src/runtime/asm_amd64.s:1700"} sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler /remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:324 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem /remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:265 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2 /remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:226 2025-11-05T19:36:35.794808631Z INFO controllers.SelfNodeRemediationConfig Syncing certs 2025-11-05T19:36:35.79482749Z INFO controllers.SelfNodeRemediationConfig Cert secret already exists 2025-11-05T19:36:35.794829959Z INFO controllers.SelfNodeRemediationConfig.syncConfigDaemonset Start to sync config daemonset 2025-11-05T19:36:35.798129381Z INFO controllers.SelfNodeRemediationConfig Updating DS tolerations 2025/11/05 19:36:35 reconciling (apps/v1, Kind=DaemonSet) openshift-workload-availability/self-node-remediation-ds 2025-11-05T19:36:35.799806908Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}} 2025-11-05T19:36:35.799816492Z INFO controllers.SelfNodeRemediation Node didn't reboot yet, waiting for it to reboot {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}, "node name": "node0", "time left": "55.200184332s"} 2025/11/05 19:36:35 update was successful 2025-11-05T19:36:35.808623799Z INFO controllers.SelfNodeRemediationConfig Syncing certs 2025-11-05T19:36:35.808646992Z INFO controllers.SelfNodeRemediationConfig Cert secret already exists 2025-11-05T19:36:35.808651959Z INFO controllers.SelfNodeRemediationConfig.syncConfigDaemonset Start to sync config daemonset 2025-11-05T19:36:35.809229162Z INFO controllers.SelfNodeRemediationConfig Updating DS tolerations 2025/11/05 19:36:35 reconciling (apps/v1, Kind=DaemonSet) openshift-workload-availability/self-node-remediation-ds 2025/11/05 19:36:35 update was successful 2025-11-05T19:36:45.591244725Z INFO selfnoderemediationtemplate-resource default {"name": "self-node-remediation-automatic-strategy-template"} 2025-11-05T19:36:45.594048772Z INFO selfnoderemediationtemplate-resource validate create {"name": "self-node-remediation-automatic-strategy-template"} 2025-11-05T19:37:31.005581165Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}} 2025-11-05T19:37:31.00560163Z INFO controllers.SelfNodeRemediation TimeAssumedRebooted is old. The unhealthy node assumed to been rebooted {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}, "node name": "node0"} 2025-11-05T19:37:31.012927033Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}} 2025-11-05T19:37:31.021611667Z INFO controllers.SelfNodeRemediation out-of-service taint added {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}, "new taints": [{"key":"node.kubernetes.io/unreachable","effect":"NoSchedule","timeAdded":"2025-11-05T19:34:57Z"},{"key":"medik8s.io/remediation","value":"self-node-remediation","effect":"NoExecute","timeAdded":"2025-11-05T19:35:29Z"},{"key":"node.kubernetes.io/unschedulable","effect":"NoSchedule","timeAdded":"2025-11-05T19:35:29Z"},{"key":"node.kubernetes.io/unreachable","effect":"NoExecute","timeAdded":"2025-11-05T19:37:28Z"},{"key":"node.kubernetes.io/out-of-service","value":"nodeshutdown","effect":"NoExecute","timeAdded":"2025-11-05T19:37:31Z"}]} 2025-11-05T19:37:31.02174812Z DEBUG events [remediation] Remediation process - add out-of-service taint to unhealthy node {"type": "Normal", "object": {"kind":"Node","name":"node0","uid":"fd322311-b9e0-44be-bd4c-18d0fd23b361","apiVersion":"v1","resourceVersion":"80871"}, "reason": "AddOutOfService"} 2025-11-05T19:37:31.127751958Z INFO controllers.SelfNodeRemediation waiting for terminating pod {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}, "pod name": "prometheus-operator-admission-webhook-868d999c66-s2b82", "phase": "Running"} 2025-11-05T19:37:36.13359165Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}} 2025-11-05T19:37:36.137732271Z INFO controllers.SelfNodeRemediation waiting for terminating pod {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}, "pod name": "csi-snapshot-controller-5dcc684ddd-rqpvl", "phase": "Running"} 2025-11-05T19:37:41.144224957Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}} 2025-11-05T19:37:41.15035736Z INFO controllers.SelfNodeRemediation waiting for terminating pod {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}, "pod name": "cluster-olm-operator-6475d48494-zjktp", "phase": "Running"} 2025-11-05T19:37:46.15574893Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}} 2025-11-05T19:37:46.159524556Z INFO controllers.SelfNodeRemediation waiting for terminating pod {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}, "pod name": "ovnkube-control-plane-77f5ddd888-zvwb9", "phase": "Running"} 2025-11-05T19:37:51.16555291Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}} 2025-11-05T19:37:51.280317853Z INFO controllers.SelfNodeRemediation out-of-service taint removed {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}, "new taints": [{"key":"node.kubernetes.io/unreachable","effect":"NoSchedule","timeAdded":"2025-11-05T19:34:57Z"},{"key":"medik8s.io/remediation","value":"self-node-remediation","effect":"NoExecute","timeAdded":"2025-11-05T19:35:29Z"},{"key":"node.kubernetes.io/unschedulable","effect":"NoSchedule","timeAdded":"2025-11-05T19:35:29Z"},{"key":"node.kubernetes.io/unreachable","effect":"NoExecute","timeAdded":"2025-11-05T19:37:28Z"}]} 2025-11-05T19:37:51.280481568Z DEBUG events [remediation] Remediation process - remove out-of-service taint from node {"type": "Normal", "object": {"kind":"Node","name":"node0","uid":"fd322311-b9e0-44be-bd4c-18d0fd23b361","apiVersion":"v1","resourceVersion":"81506"}, "reason": "RemoveOutOfService"} 2025-11-05T19:37:51.280512872Z DEBUG events [remediation] Remediation process - finished deleting unhealthy node resources {"type": "Normal", "object": {"kind":"Node","name":"node0","uid":"fd322311-b9e0-44be-bd4c-18d0fd23b361","apiVersion":"v1","resourceVersion":"81506"}, "reason": "DeleteResources"} 2025-11-05T19:37:51.287378946Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}} 2025-11-05T19:39:32.042099823Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}} 2025-11-05T19:39:32.04211872Z INFO controllers.SelfNodeRemediation fencing completed, cleaning up {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}} 2025-11-05T19:39:32.050974172Z DEBUG events [remediation] Remediation process - mark healthy remediated node as schedulable {"type": "Normal", "object": {"kind":"Node","name":"node0","uid":"fd322311-b9e0-44be-bd4c-18d0fd23b361","apiVersion":"v1","resourceVersion":"82146"}, "reason": "MarkNodeSchedulable"} 2025-11-05T19:39:33.060519753Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}} 2025-11-05T19:39:33.060537987Z INFO controllers.SelfNodeRemediation fencing completed, cleaning up {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}} 2025-11-05T19:39:33.068524336Z INFO controllers.SelfNodeRemediation NoExecute taint removed {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}, "new taints": [{"key":"node.kubernetes.io/unreachable","effect":"NoExecute","timeAdded":"2025-11-05T19:37:28Z"}]} 2025-11-05T19:39:33.068670751Z DEBUG events [remediation] Remediation process - remove NoExecute taint from healthy remediated node {"type": "Normal", "object": {"kind":"Node","name":"node0","uid":"fd322311-b9e0-44be-bd4c-18d0fd23b361","apiVersion":"v1","resourceVersion":"82218"}, "reason": "RemoveNoExecuteTaint"} 2025-11-05T19:39:33.077644557Z INFO selfnoderemediation-resource validate update {"name": "node0-6ngtd"} 2025-11-05T19:39:33.094056806Z INFO controllers.SelfNodeRemediation finalizer removed {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}} 2025-11-05T19:39:33.094266738Z DEBUG events [remediation] Remediation process - remove finalizer from snr {"type": "Normal", "object": {"kind":"SelfNodeRemediation","namespace":"openshift-machine-api","name":"node0-6ngtd","uid":"6255e50e-0e85-47d8-beb6-846295543095","apiVersion":"self-node-remediation.medik8s.io/v1alpha1","resourceVersion":"82143"}, "reason": "RemoveFinalizer"} 2025-11-05T19:39:33.094287146Z DEBUG events [remediation] Remediation finished {"type": "Normal", "object": {"kind":"SelfNodeRemediation","namespace":"openshift-machine-api","name":"node0-6ngtd","uid":"6255e50e-0e85-47d8-beb6-846295543095","apiVersion":"self-node-remediation.medik8s.io/v1alpha1","resourceVersion":"82143"}, "reason": "RemediationFinished"} 2025-11-05T19:39:33.105252932Z INFO controllers.SelfNodeRemediation SNR already deleted {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}} 2025-11-05T19:39:34.106132325Z INFO controllers.SelfNodeRemediation SNR already deleted {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}} SNR logs: 2025-11-05T19:36:12.344708359Z INFO setup Go Version: go1.23.2 (Red Hat 1.23.2-1.el9) X:strictfipsruntime 2025-11-05T19:36:12.344787359Z INFO setup Go OS/Arch: linux/amd64 2025-11-05T19:36:12.344790324Z INFO setup Operator Version: v0.10.0 2025-11-05T19:36:12.344792022Z INFO setup Git Commit: 2025-11-05T19:36:12.344793878Z INFO setup Build Date: 2025-01-13T11:55:12+00:00 2025-11-05T19:36:12.344795453Z INFO setup HTTP/2 for metrics and webhook server disabled 2025-11-05T19:36:12.345201609Z INFO controller-runtime.metrics Metrics server is starting to listen {"addr": "127.0.0.1:8080"} 2025-11-05T19:36:12.35142477Z INFO utils-taints out of service taint strategy {"isSupported": true, "k8sMajorVersion": 1, "k8sMinorVersion": 33} 2025-11-05T19:36:12.351461218Z INFO utils-taints out of service taint strategy {"isGA": true, "k8sMajorVersion": 1, "k8sMinorVersion": 33} 2025-11-05T19:36:12.35146796Z INFO setup Starting as a manager that installs the daemonset 2025-11-05T19:36:12.351477811Z INFO controller-runtime.builder skip registering a mutating webhook, object does not implement admission.Defaulter or WithDefaulter wasn't called {"GVK": "self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediationConfig"} 2025-11-05T19:36:12.351517084Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediationConfig", "path": "/validate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediationconfig"} 2025-11-05T19:36:12.351608724Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediationconfig"} 2025-11-05T19:36:12.351655688Z INFO controller-runtime.builder Registering a mutating webhook {"GVK": "self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediationTemplate", "path": "/mutate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediationtemplate"} 2025-11-05T19:36:12.351694244Z INFO controller-runtime.webhook Registering webhook {"path": "/mutate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediationtemplate"} 2025-11-05T19:36:12.351721648Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediationTemplate", "path": "/validate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediationtemplate"} 2025-11-05T19:36:12.35176063Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediationtemplate"} 2025-11-05T19:36:12.351785507Z INFO controller-runtime.builder skip registering a mutating webhook, object does not implement admission.Defaulter or WithDefaulter wasn't called {"GVK": "self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediation"} 2025-11-05T19:36:12.351810101Z INFO controller-runtime.builder Registering a validating webhook {"GVK": "self-node-remediation.medik8s.io/v1alpha1, Kind=SelfNodeRemediation", "path": "/validate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediation"} 2025-11-05T19:36:12.351842117Z INFO controller-runtime.webhook Registering webhook {"path": "/validate-self-node-remediation-medik8s-io-v1alpha1-selfnoderemediation"} 2025-11-05T19:36:12.351889585Z INFO setup starting manager 2025-11-05T19:36:12.352053818Z INFO controller-runtime.webhook.webhooks Starting webhook server 2025-11-05T19:36:12.352089539Z INFO Starting server {"kind": "health probe", "addr": "[::]:8081"} 2025-11-05T19:36:12.352190464Z INFO starting server {"path": "/metrics", "kind": "metrics", "addr": "127.0.0.1:8080"} 2025-11-05T19:36:12.352346829Z INFO controller-runtime.certwatcher Updated current TLS certificate 2025-11-05T19:36:12.352402053Z INFO controller-runtime.webhook Serving webhook server {"host": "", "port": 9443} 2025-11-05T19:36:12.352464131Z INFO controller-runtime.certwatcher Starting certificate watcher I1105 19:36:12.352521 1 leaderelection.go:245] attempting to acquire leader lease openshift-workload-availability/547f6cb6.medik8s.io... E1105 19:36:14.281712 1 leaderelection.go:327] error retrieving resource lock openshift-workload-availability/547f6cb6.medik8s.io: rpc error: code = Unavailable desc = error reading from server: read tcp 192.168.122.135:34720->192.168.122.241:2379: read: connection timed out I1105 19:36:35.583675 1 leaderelection.go:255] successfully acquired lease openshift-workload-availability/547f6cb6.medik8s.io 2025-11-05T19:36:35.58370362Z DEBUG events self-node-remediation-controller-manager-678856d898-462d5_4e277798-a7bf-4e5a-b753-dc31f19582da became leader {"type": "Normal", "object": {"kind":"Lease","namespace":"openshift-workload-availability","name":"547f6cb6.medik8s.io","uid":"2f336e17-49df-44d1-b8b9-f23084dd3546","apiVersion":"coordination.k8s.io/v1","resourceVersion":"80553"}, "reason": "LeaderElection"} 2025-11-05T19:36:35.584008769Z INFO Starting EventSource {"controller": "selfnoderemediationconfig", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediationConfig", "source": "kind source: *v1alpha1.SelfNodeRemediationConfig"} 2025-11-05T19:36:35.584041634Z INFO Starting EventSource {"controller": "selfnoderemediationconfig", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediationConfig", "source": "kind source: *v1.DaemonSet"} 2025-11-05T19:36:35.584046413Z INFO Starting Controller {"controller": "selfnoderemediationconfig", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediationConfig"} 2025-11-05T19:36:35.584091934Z INFO Starting EventSource {"controller": "selfnoderemediation", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediation", "source": "kind source: *v1alpha1.SelfNodeRemediation"} 2025-11-05T19:36:35.584109812Z INFO Starting Controller {"controller": "selfnoderemediation", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediation"} 2025-11-05T19:36:35.589776634Z INFO selfnoderemediationconfig-resource validate create {"name": "self-node-remediation-config"} 2025-11-05T19:36:35.686540822Z INFO Starting workers {"controller": "selfnoderemediationconfig", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediationConfig", "worker count": 1} 2025-11-05T19:36:35.686574475Z INFO Starting workers {"controller": "selfnoderemediation", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediation", "worker count": 1} 2025-11-05T19:36:35.686679134Z INFO controllers.SelfNodeRemediationConfig Syncing certs 2025-11-05T19:36:35.788858002Z INFO controllers.SelfNodeRemediationConfig Cert secret already exists 2025-11-05T19:36:35.788894575Z INFO controllers.SelfNodeRemediationConfig.syncConfigDaemonset Start to sync config daemonset 2025-11-05T19:36:35.78887123Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}} 2025-11-05T19:36:35.789403356Z INFO controllers.SelfNodeRemediation Node didn't reboot yet, waiting for it to reboot {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}, "node name": "node0", "time left": "55.210602793s"} 2025-11-05T19:36:35.789552798Z INFO controllers.SelfNodeRemediationConfig Updating DS tolerations 2025/11/05 19:36:35 reconciling (apps/v1, Kind=DaemonSet) openshift-workload-availability/self-node-remediation-ds 2025-11-05T19:36:35.794524378Z ERROR controllers.SelfNodeRemediation failed to update snr status {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}, "error": "rpc error: code = Unavailable desc = error reading from server: read tcp 192.168.122.135:59520->192.168.122.241:2379: read: connection reset by peer"} github.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationReconciler).updateSnrStatus /remote-source/app/controllers/selfnoderemediation_controller.go:671 github.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationReconciler).ReconcileManager.func1 /remote-source/app/controllers/selfnoderemediation_controller.go:213 github.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationReconciler).ReconcileManager /remote-source/app/controllers/selfnoderemediation_controller.go:302 github.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationReconciler).Reconcile /remote-source/app/controllers/selfnoderemediation_controller.go:155 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Reconcile /remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:118 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler /remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:314 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem /remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:265 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2 /remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:226 2025-11-05T19:36:35.794582857Z ERROR Reconciler error {"controller": "selfnoderemediation", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediation", "SelfNodeRemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}, "namespace": "openshift-machine-api", "name": "node0-6ngtd", "reconcileID": "85a263cb-ad63-40d9-af2b-096c2de5a28b", "error": "rpc error: code = Unavailable desc = error reading from server: read tcp 192.168.122.135:59520->192.168.122.241:2379: read: connection reset by peer", "errorCauses": [{"error": "rpc error: code = Unavailable desc = error reading from server: read tcp 192.168.122.135:59520->192.168.122.241:2379: read: connection reset by peer"}]} sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler /remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:324 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem /remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:265 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2 /remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:226 2025-11-05T19:36:35.794489218Z ERROR controllers.SelfNodeRemediationConfig.syncConfigDaemonset Couldn't sync self-node-remediation daemons objects {"error": "failed to apply object &{map[apiVersion:apps/v1 kind:DaemonSet metadata:map[annotations:map[snr.medik8s.io/force-deletion-revision:1] labels:map[k8s-app:self-node-remediation] name:self-node-remediation-ds namespace:openshift-workload-availability ownerReferences:[map[apiVersion:self-node-remediation.medik8s.io/v1alpha1 blockOwnerDeletion:true controller:true kind:SelfNodeRemediationConfig name:self-node-remediation-config uid:5407dbf3-9635-467a-ac61-162f0a09b857]]] spec:map[selector:map[matchLabels:map[app.kubernetes.io/component:agent app.kubernetes.io/name:self-node-remediation]] template:map[metadata:map[creationTimestamp: labels:map[app.kubernetes.io/component:agent app.kubernetes.io/name:self-node-remediation]] spec:map[affinity:map[nodeAffinity:map[requiredDuringSchedulingIgnoredDuringExecution:map[nodeSelectorTerms:[map[matchExpressions:[map[key:remediation.medik8s.io/exclude-from-remediation operator:NotIn values:[true]]]]]]]] containers:[map[args:[--is-manager=false] command:[/manager] env:[map[name:MY_NODE_NAME valueFrom:map[fieldRef:map[fieldPath:spec.nodeName]]] map[name:DEPLOYMENT_NAMESPACE valueFrom:map[fieldRef:map[fieldPath:metadata.namespace]]] map[name:WATCHDOG_PATH value:/dev/watchdog] map[name:PEER_API_SERVER_TIMEOUT value:5000000000] map[name:API_CHECK_INTERVAL value:15000000000] map[name:PEER_UPDATE_INTERVAL value:900000000000] map[name:API_SERVER_TIMEOUT value:5000000000] map[name:PEER_DIAL_TIMEOUT value:5000000000] map[name:PEER_REQUEST_TIMEOUT value:5000000000] map[name:MAX_API_ERROR_THRESHOLD value:3] map[name:IS_SOFTWARE_REBOOT_ENABLED value:true] map[name:END_POINT_HEALTH_CHECK_URL value:] map[name:HOST_PORT value:30001]] image:registry.redhat.io/workload-availability/self-node-remediation-rhel9-operator@sha256:be5d89d6794bc1a50c59386e8be960e9f13c9668c8e024ff0c32ba23ee75956a imagePullPolicy:Always name:manager ports:[map[containerPort:30001 hostPort:30001 name:self-n-r-port protocol:TCP]] resources:map[requests:map[cpu:20m memory:60Mi]] securityContext:map[privileged:true] terminationMessagePath:/dev/termination-log terminationMessagePolicy:File volumeMounts:[map[mountPath:/dev name:devices]]]] dnsPolicy:ClusterFirst hostPID:true priorityClassName:system-node-critical restartPolicy:Always schedulerName:default-scheduler securityContext:map[] serviceAccountName:self-node-remediation-controller-manager terminationGracePeriodSeconds:10 tolerations:[map[effect:NoExecute key:medik8s.io/remediation operator:Equal value:self-node-remediation] map[effect:NoSchedule key:node-role.kubernetes.io/master operator:Equal] map[effect:NoSchedule key:node-role.kubernetes.io/control-plane operator:Equal]] volumes:[map[hostPath:map[path:/dev type:Directory] name:devices]]]]]]}: could not retrieve existing (apps/v1, Kind=DaemonSet) openshift-workload-availability/self-node-remediation-ds: rpc error: code = Unavailable desc = error reading from server: read tcp 192.168.122.135:34964->192.168.122.241:2379: read: connection reset by peer", "errorVerbose": "rpc error: code = Unavailable desc = error reading from server: read tcp 192.168.122.135:34964->192.168.122.241:2379: read: connection reset by peer\ncould not retrieve existing (apps/v1, Kind=DaemonSet) openshift-workload-availability/self-node-remediation-ds\ngithub.com/medik8s/self-node-remediation/pkg/apply.ApplyObject\n\t/remote-source/app/pkg/apply/apply.go:49\ngithub.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).syncK8sResource\n\t/remote-source/app/controllers/selfnoderemediationconfig_controller.go:196\ngithub.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).syncConfigDaemonSet\n\t/remote-source/app/controllers/selfnoderemediationconfig_controller.go:175\ngithub.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).Reconcile\n\t/remote-source/app/controllers/selfnoderemediationconfig_controller.go:100\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Reconcile\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:118\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:314\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:265\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:226\nruntime.goexit\n\t/usr/lib/golang/src/runtime/asm_amd64.s:1700\nfailed to apply object &{map[apiVersion:apps/v1 kind:DaemonSet metadata:map[annotations:map[snr.medik8s.io/force-deletion-revision:1] labels:map[k8s-app:self-node-remediation] name:self-node-remediation-ds namespace:openshift-workload-availability ownerReferences:[map[apiVersion:self-node-remediation.medik8s.io/v1alpha1 blockOwnerDeletion:true controller:true kind:SelfNodeRemediationConfig name:self-node-remediation-config uid:5407dbf3-9635-467a-ac61-162f0a09b857]]] spec:map[selector:map[matchLabels:map[app.kubernetes.io/component:agent app.kubernetes.io/name:self-node-remediation]] template:map[metadata:map[creationTimestamp: labels:map[app.kubernetes.io/component:agent app.kubernetes.io/name:self-node-remediation]] spec:map[affinity:map[nodeAffinity:map[requiredDuringSchedulingIgnoredDuringExecution:map[nodeSelectorTerms:[map[matchExpressions:[map[key:remediation.medik8s.io/exclude-from-remediation operator:NotIn values:[true]]]]]]]] containers:[map[args:[--is-manager=false] command:[/manager] env:[map[name:MY_NODE_NAME valueFrom:map[fieldRef:map[fieldPath:spec.nodeName]]] map[name:DEPLOYMENT_NAMESPACE valueFrom:map[fieldRef:map[fieldPath:metadata.namespace]]] map[name:WATCHDOG_PATH value:/dev/watchdog] map[name:PEER_API_SERVER_TIMEOUT value:5000000000] map[name:API_CHECK_INTERVAL value:15000000000] map[name:PEER_UPDATE_INTERVAL value:900000000000] map[name:API_SERVER_TIMEOUT value:5000000000] map[name:PEER_DIAL_TIMEOUT value:5000000000] map[name:PEER_REQUEST_TIMEOUT value:5000000000] map[name:MAX_API_ERROR_THRESHOLD value:3] map[name:IS_SOFTWARE_REBOOT_ENABLED value:true] map[name:END_POINT_HEALTH_CHECK_URL value:] map[name:HOST_PORT value:30001]] image:registry.redhat.io/workload-availability/self-node-remediation-rhel9-operator@sha256:be5d89d6794bc1a50c59386e8be960e9f13c9668c8e024ff0c32ba23ee75956a imagePullPolicy:Always name:manager ports:[map[containerPort:30001 hostPort:30001 name:self-n-r-port protocol:TCP]] resources:map[requests:map[cpu:20m memory:60Mi]] securityContext:map[privileged:true] terminationMessagePath:/dev/termination-log terminationMessagePolicy:File volumeMounts:[map[mountPath:/dev name:devices]]]] dnsPolicy:ClusterFirst hostPID:true priorityClassName:system-node-critical restartPolicy:Always schedulerName:default-scheduler securityContext:map[] serviceAccountName:self-node-remediation-controller-manager terminationGracePeriodSeconds:10 tolerations:[map[effect:NoExecute key:medik8s.io/remediation operator:Equal value:self-node-remediation] map[effect:NoSchedule key:node-role.kubernetes.io/master operator:Equal] map[effect:NoSchedule key:node-role.kubernetes.io/control-plane operator:Equal]] volumes:[map[hostPath:map[path:/dev type:Directory] name:devices]]]]]]}\ngithub.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).syncK8sResource\n\t/remote-source/app/controllers/selfnoderemediationconfig_controller.go:197\ngithub.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).syncConfigDaemonSet\n\t/remote-source/app/controllers/selfnoderemediationconfig_controller.go:175\ngithub.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).Reconcile\n\t/remote-source/app/controllers/selfnoderemediationconfig_controller.go:100\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Reconcile\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:118\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:314\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:265\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:226\nruntime.goexit\n\t/usr/lib/golang/src/runtime/asm_amd64.s:1700"} github.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).syncConfigDaemonSet /remote-source/app/controllers/selfnoderemediationconfig_controller.go:180 github.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).Reconcile /remote-source/app/controllers/selfnoderemediationconfig_controller.go:100 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Reconcile /remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:118 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler /remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:314 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem /remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:265 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2 /remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:226 2025-11-05T19:36:35.794611568Z ERROR controllers.SelfNodeRemediationConfig error syncing DS {"selfnoderemediationconfig": {"name":"self-node-remediation-config","namespace":"openshift-workload-availability"}, "error": "failed to apply object &{map[apiVersion:apps/v1 kind:DaemonSet metadata:map[annotations:map[snr.medik8s.io/force-deletion-revision:1] labels:map[k8s-app:self-node-remediation] name:self-node-remediation-ds namespace:openshift-workload-availability ownerReferences:[map[apiVersion:self-node-remediation.medik8s.io/v1alpha1 blockOwnerDeletion:true controller:true kind:SelfNodeRemediationConfig name:self-node-remediation-config uid:5407dbf3-9635-467a-ac61-162f0a09b857]]] spec:map[selector:map[matchLabels:map[app.kubernetes.io/component:agent app.kubernetes.io/name:self-node-remediation]] template:map[metadata:map[creationTimestamp: labels:map[app.kubernetes.io/component:agent app.kubernetes.io/name:self-node-remediation]] spec:map[affinity:map[nodeAffinity:map[requiredDuringSchedulingIgnoredDuringExecution:map[nodeSelectorTerms:[map[matchExpressions:[map[key:remediation.medik8s.io/exclude-from-remediation operator:NotIn values:[true]]]]]]]] containers:[map[args:[--is-manager=false] command:[/manager] env:[map[name:MY_NODE_NAME valueFrom:map[fieldRef:map[fieldPath:spec.nodeName]]] map[name:DEPLOYMENT_NAMESPACE valueFrom:map[fieldRef:map[fieldPath:metadata.namespace]]] map[name:WATCHDOG_PATH value:/dev/watchdog] map[name:PEER_API_SERVER_TIMEOUT value:5000000000] map[name:API_CHECK_INTERVAL value:15000000000] map[name:PEER_UPDATE_INTERVAL value:900000000000] map[name:API_SERVER_TIMEOUT value:5000000000] map[name:PEER_DIAL_TIMEOUT value:5000000000] map[name:PEER_REQUEST_TIMEOUT value:5000000000] map[name:MAX_API_ERROR_THRESHOLD value:3] map[name:IS_SOFTWARE_REBOOT_ENABLED value:true] map[name:END_POINT_HEALTH_CHECK_URL value:] map[name:HOST_PORT value:30001]] image:registry.redhat.io/workload-availability/self-node-remediation-rhel9-operator@sha256:be5d89d6794bc1a50c59386e8be960e9f13c9668c8e024ff0c32ba23ee75956a imagePullPolicy:Always name:manager ports:[map[containerPort:30001 hostPort:30001 name:self-n-r-port protocol:TCP]] resources:map[requests:map[cpu:20m memory:60Mi]] securityContext:map[privileged:true] terminationMessagePath:/dev/termination-log terminationMessagePolicy:File volumeMounts:[map[mountPath:/dev name:devices]]]] dnsPolicy:ClusterFirst hostPID:true priorityClassName:system-node-critical restartPolicy:Always schedulerName:default-scheduler securityContext:map[] serviceAccountName:self-node-remediation-controller-manager terminationGracePeriodSeconds:10 tolerations:[map[effect:NoExecute key:medik8s.io/remediation operator:Equal value:self-node-remediation] map[effect:NoSchedule key:node-role.kubernetes.io/master operator:Equal] map[effect:NoSchedule key:node-role.kubernetes.io/control-plane operator:Equal]] volumes:[map[hostPath:map[path:/dev type:Directory] name:devices]]]]]]}: could not retrieve existing (apps/v1, Kind=DaemonSet) openshift-workload-availability/self-node-remediation-ds: rpc error: code = Unavailable desc = error reading from server: read tcp 192.168.122.135:34964->192.168.122.241:2379: read: connection reset by peer", "errorVerbose": "rpc error: code = Unavailable desc = error reading from server: read tcp 192.168.122.135:34964->192.168.122.241:2379: read: connection reset by peer\ncould not retrieve existing (apps/v1, Kind=DaemonSet) openshift-workload-availability/self-node-remediation-ds\ngithub.com/medik8s/self-node-remediation/pkg/apply.ApplyObject\n\t/remote-source/app/pkg/apply/apply.go:49\ngithub.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).syncK8sResource\n\t/remote-source/app/controllers/selfnoderemediationconfig_controller.go:196\ngithub.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).syncConfigDaemonSet\n\t/remote-source/app/controllers/selfnoderemediationconfig_controller.go:175\ngithub.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).Reconcile\n\t/remote-source/app/controllers/selfnoderemediationconfig_controller.go:100\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Reconcile\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:118\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:314\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:265\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:226\nruntime.goexit\n\t/usr/lib/golang/src/runtime/asm_amd64.s:1700\nfailed to apply object &{map[apiVersion:apps/v1 kind:DaemonSet metadata:map[annotations:map[snr.medik8s.io/force-deletion-revision:1] labels:map[k8s-app:self-node-remediation] name:self-node-remediation-ds namespace:openshift-workload-availability ownerReferences:[map[apiVersion:self-node-remediation.medik8s.io/v1alpha1 blockOwnerDeletion:true controller:true kind:SelfNodeRemediationConfig name:self-node-remediation-config uid:5407dbf3-9635-467a-ac61-162f0a09b857]]] spec:map[selector:map[matchLabels:map[app.kubernetes.io/component:agent app.kubernetes.io/name:self-node-remediation]] template:map[metadata:map[creationTimestamp: labels:map[app.kubernetes.io/component:agent app.kubernetes.io/name:self-node-remediation]] spec:map[affinity:map[nodeAffinity:map[requiredDuringSchedulingIgnoredDuringExecution:map[nodeSelectorTerms:[map[matchExpressions:[map[key:remediation.medik8s.io/exclude-from-remediation operator:NotIn values:[true]]]]]]]] containers:[map[args:[--is-manager=false] command:[/manager] env:[map[name:MY_NODE_NAME valueFrom:map[fieldRef:map[fieldPath:spec.nodeName]]] map[name:DEPLOYMENT_NAMESPACE valueFrom:map[fieldRef:map[fieldPath:metadata.namespace]]] map[name:WATCHDOG_PATH value:/dev/watchdog] map[name:PEER_API_SERVER_TIMEOUT value:5000000000] map[name:API_CHECK_INTERVAL value:15000000000] map[name:PEER_UPDATE_INTERVAL value:900000000000] map[name:API_SERVER_TIMEOUT value:5000000000] map[name:PEER_DIAL_TIMEOUT value:5000000000] map[name:PEER_REQUEST_TIMEOUT value:5000000000] map[name:MAX_API_ERROR_THRESHOLD value:3] map[name:IS_SOFTWARE_REBOOT_ENABLED value:true] map[name:END_POINT_HEALTH_CHECK_URL value:] map[name:HOST_PORT value:30001]] image:registry.redhat.io/workload-availability/self-node-remediation-rhel9-operator@sha256:be5d89d6794bc1a50c59386e8be960e9f13c9668c8e024ff0c32ba23ee75956a imagePullPolicy:Always name:manager ports:[map[containerPort:30001 hostPort:30001 name:self-n-r-port protocol:TCP]] resources:map[requests:map[cpu:20m memory:60Mi]] securityContext:map[privileged:true] terminationMessagePath:/dev/termination-log terminationMessagePolicy:File volumeMounts:[map[mountPath:/dev name:devices]]]] dnsPolicy:ClusterFirst hostPID:true priorityClassName:system-node-critical restartPolicy:Always schedulerName:default-scheduler securityContext:map[] serviceAccountName:self-node-remediation-controller-manager terminationGracePeriodSeconds:10 tolerations:[map[effect:NoExecute key:medik8s.io/remediation operator:Equal value:self-node-remediation] map[effect:NoSchedule key:node-role.kubernetes.io/master operator:Equal] map[effect:NoSchedule key:node-role.kubernetes.io/control-plane operator:Equal]] volumes:[map[hostPath:map[path:/dev type:Directory] name:devices]]]]]]}\ngithub.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).syncK8sResource\n\t/remote-source/app/controllers/selfnoderemediationconfig_controller.go:197\ngithub.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).syncConfigDaemonSet\n\t/remote-source/app/controllers/selfnoderemediationconfig_controller.go:175\ngithub.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).Reconcile\n\t/remote-source/app/controllers/selfnoderemediationconfig_controller.go:100\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Reconcile\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:118\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:314\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:265\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:226\nruntime.goexit\n\t/usr/lib/golang/src/runtime/asm_amd64.s:1700"} github.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).Reconcile /remote-source/app/controllers/selfnoderemediationconfig_controller.go:104 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Reconcile /remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:118 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler /remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:314 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem /remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:265 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2 /remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:226 2025-11-05T19:36:35.794697863Z ERROR Reconciler error {"controller": "selfnoderemediationconfig", "controllerGroup": "self-node-remediation.medik8s.io", "controllerKind": "SelfNodeRemediationConfig", "SelfNodeRemediationConfig": {"name":"self-node-remediation-config","namespace":"openshift-workload-availability"}, "namespace": "openshift-workload-availability", "name": "self-node-remediation-config", "reconcileID": "31f2e49b-e4ee-4d05-8b2c-3d60506432af", "error": "failed to apply object &{map[apiVersion:apps/v1 kind:DaemonSet metadata:map[annotations:map[snr.medik8s.io/force-deletion-revision:1] labels:map[k8s-app:self-node-remediation] name:self-node-remediation-ds namespace:openshift-workload-availability ownerReferences:[map[apiVersion:self-node-remediation.medik8s.io/v1alpha1 blockOwnerDeletion:true controller:true kind:SelfNodeRemediationConfig name:self-node-remediation-config uid:5407dbf3-9635-467a-ac61-162f0a09b857]]] spec:map[selector:map[matchLabels:map[app.kubernetes.io/component:agent app.kubernetes.io/name:self-node-remediation]] template:map[metadata:map[creationTimestamp: labels:map[app.kubernetes.io/component:agent app.kubernetes.io/name:self-node-remediation]] spec:map[affinity:map[nodeAffinity:map[requiredDuringSchedulingIgnoredDuringExecution:map[nodeSelectorTerms:[map[matchExpressions:[map[key:remediation.medik8s.io/exclude-from-remediation operator:NotIn values:[true]]]]]]]] containers:[map[args:[--is-manager=false] command:[/manager] env:[map[name:MY_NODE_NAME valueFrom:map[fieldRef:map[fieldPath:spec.nodeName]]] map[name:DEPLOYMENT_NAMESPACE valueFrom:map[fieldRef:map[fieldPath:metadata.namespace]]] map[name:WATCHDOG_PATH value:/dev/watchdog] map[name:PEER_API_SERVER_TIMEOUT value:5000000000] map[name:API_CHECK_INTERVAL value:15000000000] map[name:PEER_UPDATE_INTERVAL value:900000000000] map[name:API_SERVER_TIMEOUT value:5000000000] map[name:PEER_DIAL_TIMEOUT value:5000000000] map[name:PEER_REQUEST_TIMEOUT value:5000000000] map[name:MAX_API_ERROR_THRESHOLD value:3] map[name:IS_SOFTWARE_REBOOT_ENABLED value:true] map[name:END_POINT_HEALTH_CHECK_URL value:] map[name:HOST_PORT value:30001]] image:registry.redhat.io/workload-availability/self-node-remediation-rhel9-operator@sha256:be5d89d6794bc1a50c59386e8be960e9f13c9668c8e024ff0c32ba23ee75956a imagePullPolicy:Always name:manager ports:[map[containerPort:30001 hostPort:30001 name:self-n-r-port protocol:TCP]] resources:map[requests:map[cpu:20m memory:60Mi]] securityContext:map[privileged:true] terminationMessagePath:/dev/termination-log terminationMessagePolicy:File volumeMounts:[map[mountPath:/dev name:devices]]]] dnsPolicy:ClusterFirst hostPID:true priorityClassName:system-node-critical restartPolicy:Always schedulerName:default-scheduler securityContext:map[] serviceAccountName:self-node-remediation-controller-manager terminationGracePeriodSeconds:10 tolerations:[map[effect:NoExecute key:medik8s.io/remediation operator:Equal value:self-node-remediation] map[effect:NoSchedule key:node-role.kubernetes.io/master operator:Equal] map[effect:NoSchedule key:node-role.kubernetes.io/control-plane operator:Equal]] volumes:[map[hostPath:map[path:/dev type:Directory] name:devices]]]]]]}: could not retrieve existing (apps/v1, Kind=DaemonSet) openshift-workload-availability/self-node-remediation-ds: rpc error: code = Unavailable desc = error reading from server: read tcp 192.168.122.135:34964->192.168.122.241:2379: read: connection reset by peer", "errorVerbose": "rpc error: code = Unavailable desc = error reading from server: read tcp 192.168.122.135:34964->192.168.122.241:2379: read: connection reset by peer\ncould not retrieve existing (apps/v1, Kind=DaemonSet) openshift-workload-availability/self-node-remediation-ds\ngithub.com/medik8s/self-node-remediation/pkg/apply.ApplyObject\n\t/remote-source/app/pkg/apply/apply.go:49\ngithub.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).syncK8sResource\n\t/remote-source/app/controllers/selfnoderemediationconfig_controller.go:196\ngithub.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).syncConfigDaemonSet\n\t/remote-source/app/controllers/selfnoderemediationconfig_controller.go:175\ngithub.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).Reconcile\n\t/remote-source/app/controllers/selfnoderemediationconfig_controller.go:100\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Reconcile\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:118\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:314\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:265\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:226\nruntime.goexit\n\t/usr/lib/golang/src/runtime/asm_amd64.s:1700\nfailed to apply object &{map[apiVersion:apps/v1 kind:DaemonSet metadata:map[annotations:map[snr.medik8s.io/force-deletion-revision:1] labels:map[k8s-app:self-node-remediation] name:self-node-remediation-ds namespace:openshift-workload-availability ownerReferences:[map[apiVersion:self-node-remediation.medik8s.io/v1alpha1 blockOwnerDeletion:true controller:true kind:SelfNodeRemediationConfig name:self-node-remediation-config uid:5407dbf3-9635-467a-ac61-162f0a09b857]]] spec:map[selector:map[matchLabels:map[app.kubernetes.io/component:agent app.kubernetes.io/name:self-node-remediation]] template:map[metadata:map[creationTimestamp: labels:map[app.kubernetes.io/component:agent app.kubernetes.io/name:self-node-remediation]] spec:map[affinity:map[nodeAffinity:map[requiredDuringSchedulingIgnoredDuringExecution:map[nodeSelectorTerms:[map[matchExpressions:[map[key:remediation.medik8s.io/exclude-from-remediation operator:NotIn values:[true]]]]]]]] containers:[map[args:[--is-manager=false] command:[/manager] env:[map[name:MY_NODE_NAME valueFrom:map[fieldRef:map[fieldPath:spec.nodeName]]] map[name:DEPLOYMENT_NAMESPACE valueFrom:map[fieldRef:map[fieldPath:metadata.namespace]]] map[name:WATCHDOG_PATH value:/dev/watchdog] map[name:PEER_API_SERVER_TIMEOUT value:5000000000] map[name:API_CHECK_INTERVAL value:15000000000] map[name:PEER_UPDATE_INTERVAL value:900000000000] map[name:API_SERVER_TIMEOUT value:5000000000] map[name:PEER_DIAL_TIMEOUT value:5000000000] map[name:PEER_REQUEST_TIMEOUT value:5000000000] map[name:MAX_API_ERROR_THRESHOLD value:3] map[name:IS_SOFTWARE_REBOOT_ENABLED value:true] map[name:END_POINT_HEALTH_CHECK_URL value:] map[name:HOST_PORT value:30001]] image:registry.redhat.io/workload-availability/self-node-remediation-rhel9-operator@sha256:be5d89d6794bc1a50c59386e8be960e9f13c9668c8e024ff0c32ba23ee75956a imagePullPolicy:Always name:manager ports:[map[containerPort:30001 hostPort:30001 name:self-n-r-port protocol:TCP]] resources:map[requests:map[cpu:20m memory:60Mi]] securityContext:map[privileged:true] terminationMessagePath:/dev/termination-log terminationMessagePolicy:File volumeMounts:[map[mountPath:/dev name:devices]]]] dnsPolicy:ClusterFirst hostPID:true priorityClassName:system-node-critical restartPolicy:Always schedulerName:default-scheduler securityContext:map[] serviceAccountName:self-node-remediation-controller-manager terminationGracePeriodSeconds:10 tolerations:[map[effect:NoExecute key:medik8s.io/remediation operator:Equal value:self-node-remediation] map[effect:NoSchedule key:node-role.kubernetes.io/master operator:Equal] map[effect:NoSchedule key:node-role.kubernetes.io/control-plane operator:Equal]] volumes:[map[hostPath:map[path:/dev type:Directory] name:devices]]]]]]}\ngithub.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).syncK8sResource\n\t/remote-source/app/controllers/selfnoderemediationconfig_controller.go:197\ngithub.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).syncConfigDaemonSet\n\t/remote-source/app/controllers/selfnoderemediationconfig_controller.go:175\ngithub.com/medik8s/self-node-remediation/controllers.(*SelfNodeRemediationConfigReconciler).Reconcile\n\t/remote-source/app/controllers/selfnoderemediationconfig_controller.go:100\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Reconcile\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:118\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:314\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:265\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2\n\t/remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:226\nruntime.goexit\n\t/usr/lib/golang/src/runtime/asm_amd64.s:1700"} sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler /remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:324 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem /remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:265 sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2 /remote-source/app/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:226 2025-11-05T19:36:35.794808631Z INFO controllers.SelfNodeRemediationConfig Syncing certs 2025-11-05T19:36:35.79482749Z INFO controllers.SelfNodeRemediationConfig Cert secret already exists 2025-11-05T19:36:35.794829959Z INFO controllers.SelfNodeRemediationConfig.syncConfigDaemonset Start to sync config daemonset 2025-11-05T19:36:35.798129381Z INFO controllers.SelfNodeRemediationConfig Updating DS tolerations 2025/11/05 19:36:35 reconciling (apps/v1, Kind=DaemonSet) openshift-workload-availability/self-node-remediation-ds 2025-11-05T19:36:35.799806908Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}} 2025-11-05T19:36:35.799816492Z INFO controllers.SelfNodeRemediation Node didn't reboot yet, waiting for it to reboot {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}, "node name": "node0", "time left": "55.200184332s"} 2025/11/05 19:36:35 update was successful 2025-11-05T19:36:35.808623799Z INFO controllers.SelfNodeRemediationConfig Syncing certs 2025-11-05T19:36:35.808646992Z INFO controllers.SelfNodeRemediationConfig Cert secret already exists 2025-11-05T19:36:35.808651959Z INFO controllers.SelfNodeRemediationConfig.syncConfigDaemonset Start to sync config daemonset 2025-11-05T19:36:35.809229162Z INFO controllers.SelfNodeRemediationConfig Updating DS tolerations 2025/11/05 19:36:35 reconciling (apps/v1, Kind=DaemonSet) openshift-workload-availability/self-node-remediation-ds 2025/11/05 19:36:35 update was successful 2025-11-05T19:36:45.591244725Z INFO selfnoderemediationtemplate-resource default {"name": "self-node-remediation-automatic-strategy-template"} 2025-11-05T19:36:45.594048772Z INFO selfnoderemediationtemplate-resource validate create {"name": "self-node-remediation-automatic-strategy-template"} 2025-11-05T19:37:31.005581165Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}} 2025-11-05T19:37:31.00560163Z INFO controllers.SelfNodeRemediation TimeAssumedRebooted is old. The unhealthy node assumed to been rebooted {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}, "node name": "node0"} 2025-11-05T19:37:31.012927033Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}} 2025-11-05T19:37:31.021611667Z INFO controllers.SelfNodeRemediation out-of-service taint added {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}, "new taints": [{"key":"node.kubernetes.io/unreachable","effect":"NoSchedule","timeAdded":"2025-11-05T19:34:57Z"},{"key":"medik8s.io/remediation","value":"self-node-remediation","effect":"NoExecute","timeAdded":"2025-11-05T19:35:29Z"},{"key":"node.kubernetes.io/unschedulable","effect":"NoSchedule","timeAdded":"2025-11-05T19:35:29Z"},{"key":"node.kubernetes.io/unreachable","effect":"NoExecute","timeAdded":"2025-11-05T19:37:28Z"},{"key":"node.kubernetes.io/out-of-service","value":"nodeshutdown","effect":"NoExecute","timeAdded":"2025-11-05T19:37:31Z"}]} 2025-11-05T19:37:31.02174812Z DEBUG events [remediation] Remediation process - add out-of-service taint to unhealthy node {"type": "Normal", "object": {"kind":"Node","name":"node0","uid":"fd322311-b9e0-44be-bd4c-18d0fd23b361","apiVersion":"v1","resourceVersion":"80871"}, "reason": "AddOutOfService"} 2025-11-05T19:37:31.127751958Z INFO controllers.SelfNodeRemediation waiting for terminating pod {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}, "pod name": "prometheus-operator-admission-webhook-868d999c66-s2b82", "phase": "Running"} 2025-11-05T19:37:36.13359165Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}} 2025-11-05T19:37:36.137732271Z INFO controllers.SelfNodeRemediation waiting for terminating pod {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}, "pod name": "csi-snapshot-controller-5dcc684ddd-rqpvl", "phase": "Running"} 2025-11-05T19:37:41.144224957Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}} 2025-11-05T19:37:41.15035736Z INFO controllers.SelfNodeRemediation waiting for terminating pod {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}, "pod name": "cluster-olm-operator-6475d48494-zjktp", "phase": "Running"} 2025-11-05T19:37:46.15574893Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}} 2025-11-05T19:37:46.159524556Z INFO controllers.SelfNodeRemediation waiting for terminating pod {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}, "pod name": "ovnkube-control-plane-77f5ddd888-zvwb9", "phase": "Running"} 2025-11-05T19:37:51.16555291Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}} 2025-11-05T19:37:51.280317853Z INFO controllers.SelfNodeRemediation out-of-service taint removed {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}, "new taints": [{"key":"node.kubernetes.io/unreachable","effect":"NoSchedule","timeAdded":"2025-11-05T19:34:57Z"},{"key":"medik8s.io/remediation","value":"self-node-remediation","effect":"NoExecute","timeAdded":"2025-11-05T19:35:29Z"},{"key":"node.kubernetes.io/unschedulable","effect":"NoSchedule","timeAdded":"2025-11-05T19:35:29Z"},{"key":"node.kubernetes.io/unreachable","effect":"NoExecute","timeAdded":"2025-11-05T19:37:28Z"}]} 2025-11-05T19:37:51.280481568Z DEBUG events [remediation] Remediation process - remove out-of-service taint from node {"type": "Normal", "object": {"kind":"Node","name":"node0","uid":"fd322311-b9e0-44be-bd4c-18d0fd23b361","apiVersion":"v1","resourceVersion":"81506"}, "reason": "RemoveOutOfService"} 2025-11-05T19:37:51.280512872Z DEBUG events [remediation] Remediation process - finished deleting unhealthy node resources {"type": "Normal", "object": {"kind":"Node","name":"node0","uid":"fd322311-b9e0-44be-bd4c-18d0fd23b361","apiVersion":"v1","resourceVersion":"81506"}, "reason": "DeleteResources"} 2025-11-05T19:37:51.287378946Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}} 2025-11-05T19:39:32.042099823Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}} 2025-11-05T19:39:32.04211872Z INFO controllers.SelfNodeRemediation fencing completed, cleaning up {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}} 2025-11-05T19:39:32.050974172Z DEBUG events [remediation] Remediation process - mark healthy remediated node as schedulable {"type": "Normal", "object": {"kind":"Node","name":"node0","uid":"fd322311-b9e0-44be-bd4c-18d0fd23b361","apiVersion":"v1","resourceVersion":"82146"}, "reason": "MarkNodeSchedulable"} 2025-11-05T19:39:33.060519753Z INFO controllers.SelfNodeRemediation Remediating with OutOfServiceTaint Remediation strategy (auto-selected) {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}} 2025-11-05T19:39:33.060537987Z INFO controllers.SelfNodeRemediation fencing completed, cleaning up {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}} 2025-11-05T19:39:33.068524336Z INFO controllers.SelfNodeRemediation NoExecute taint removed {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}, "new taints": [{"key":"node.kubernetes.io/unreachable","effect":"NoExecute","timeAdded":"2025-11-05T19:37:28Z"}]} 2025-11-05T19:39:33.068670751Z DEBUG events [remediation] Remediation process - remove NoExecute taint from healthy remediated node {"type": "Normal", "object": {"kind":"Node","name":"node0","uid":"fd322311-b9e0-44be-bd4c-18d0fd23b361","apiVersion":"v1","resourceVersion":"82218"}, "reason": "RemoveNoExecuteTaint"} 2025-11-05T19:39:33.077644557Z INFO selfnoderemediation-resource validate update {"name": "node0-6ngtd"} 2025-11-05T19:39:33.094056806Z INFO controllers.SelfNodeRemediation finalizer removed {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}} 2025-11-05T19:39:33.094266738Z DEBUG events [remediation] Remediation process - remove finalizer from snr {"type": "Normal", "object": {"kind":"SelfNodeRemediation","namespace":"openshift-machine-api","name":"node0-6ngtd","uid":"6255e50e-0e85-47d8-beb6-846295543095","apiVersion":"self-node-remediation.medik8s.io/v1alpha1","resourceVersion":"82143"}, "reason": "RemoveFinalizer"} 2025-11-05T19:39:33.094287146Z DEBUG events [remediation] Remediation finished {"type": "Normal", "object": {"kind":"SelfNodeRemediation","namespace":"openshift-machine-api","name":"node0-6ngtd","uid":"6255e50e-0e85-47d8-beb6-846295543095","apiVersion":"self-node-remediation.medik8s.io/v1alpha1","resourceVersion":"82143"}, "reason": "RemediationFinished"} 2025-11-05T19:39:33.105252932Z INFO controllers.SelfNodeRemediation SNR already deleted {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}} 2025-11-05T19:39:34.106132325Z INFO controllers.SelfNodeRemediation SNR already deleted {"pod": "manager", "selfnoderemediation": {"name":"node0-6ngtd","namespace":"openshift-machine-api"}}