Uploaded image for project: 'Red Hat Advanced Cluster Management'
  1. Red Hat Advanced Cluster Management
  2. ACM-13594

The maestro-agent has a panic error in CS scenario

XMLWordPrintable

    • Icon: Bug Bug
    • Resolution: Done
    • Icon: Critical Critical
    • None
    • None
    • Maestro
    • 2
    • False
    • None
    • False
    • ACM Maestro Train-18
    • Moderate
    • None

      Description of problem:

      The maestro-agent shows following error:
      E0823 00:29:21.025371 1 agentclient.go:187] failed to decode spec, manifest is invalid, Object 'Kind' is missing in 'null
      ...
      E0823 00:34:50.785787 1 manifestwork_controller.go:226] Reconcile work 69fbbdc8-f9c3-5881-983e-45915e228091 fails with err: Internal error occurred: failed calling webhook "manifestworkvalidators.admission.work.open-cluster-management.io": failed to call webhook: Post "https://cluster-manager-work-webhook.open-cluster-management-hub.svc:9443/validate-work-open-cluster-management-io-v1-manifestwork?timeout=10s": no endpoints available for service "cluster-manager-work-webhook"
      ....
      E0824 04:02:39.713622 1 runtime.go:79] Observed a panic: "invalid memory address or nil pointer dereference" (runtime error: invalid memory address or nil pointer dereference)
      goroutine 443 [running]:
      k8s.io/apimachinery/pkg/util/runtime.logPanic({0x2f93280, 0x5af63f0})
      /go/pkg/mod/k8s.io/apimachinery@v0.30.2/pkg/util/runtime/runtime.go:75 +0x85
      k8s.io/apimachinery/pkg/util/runtime.HandleCrash({0xc00111c8f8, 0x1, 0xc000fc6e00?})
      /go/pkg/mod/k8s.io/apimachinery@v0.30.2/pkg/util/runtime/runtime.go:49 +0x6b
      panic({0x2f93280?, 0x5af63f0?})
      /usr/lib/golang/src/runtime/panic.go:770 +0x132
      bytes.(*Buffer).WriteString(0x520001?, {0x0?, 0x1?})
      /usr/lib/golang/src/bytes/buffer.go:193 +0xa8
      github.com/eclipse/paho.golang/packets.writeString({0x0, 0x1}, 0xc00111cab8)
      /go/pkg/mod/github.com/eclipse/paho.golang@v0.11.0/packets/packets.go:389 +0x3c
      github.com/eclipse/paho.golang/packets.(*Properties).Pack(0xc0004037c0, 0x3)
      /go/pkg/mod/github.com/eclipse/paho.golang@v0.11.0/packets/properties.go:388 +0x865
      github.com/eclipse/paho.golang/packets.(*Publish).Buffers(0xc000df3080)
      /go/pkg/mod/github.com/eclipse/paho.golang@v0.11.0/packets/publish.go:60 +0x7b
      github.com/eclipse/paho.golang/packets.(*ControlPacket).WriteTo(0xc00111cc60, {0x739df038d828, 0xc0003a4240})
      /go/pkg/mod/github.com/eclipse/paho.golang@v0.11.0/packets/packets.go:281 +0x7c
      github.com/eclipse/paho.golang/packets.(*Publish).WriteTo(0xc000c71d10?, {0x739df038d828?, 0xc0003a4240?})
      /go/pkg/mod/github.com/eclipse/paho.golang@v0.11.0/packets/publish.go:79 +0x7a
      github.com/eclipse/paho.golang/paho.(*Client).publishQoS12(0xc00039d040, {0x3b0c608, 0xc000801890}, 0xc000df3080)
      /go/pkg/mod/github.com/eclipse/paho.golang@v0.11.0/paho/client.go:821 +0x3d6
      github.com/eclipse/paho.golang/paho.(*Client).Publish(0xc00039d040, {0x3b0c608, 0xc000801890}, 0xc00052e800)
      /go/pkg/mod/github.com/eclipse/paho.golang@v0.11.0/paho/client.go:798 +0x28b
      github.com/cloudevents/sdk-go/protocol/mqtt_paho/v2.(*Protocol).Send(0xc000caee40, {0x3b0c608, 0xc000801890}, {0x3b0c870, 0xc000df3040}, {0x0, 0x0, 0x0})
      /go/pkg/mod/github.com/cloudevents/sdk-go/protocol/mqtt_paho/v2@v2.0.0-20231030012137-0836a524e995/protocol.go:103 +0x1e5
      github.com/cloudevents/sdk-go/v2/client.(*ceClient).Send(0xc000d348f0, {0x3b0c608, 0xc000801890}, {

      {0x3b3bcf0, 0xc000f2c0e0}, {0xc00116a800, 0x37e0, 0x3800}, 0x0, 0x0})
      /go/pkg/mod/github.com/cloudevents/sdk-go/v2@v2.15.3-0.20240329120647-e6a74efbacbf/client/client.go:135 +0x2e2
      open-cluster-management.io/sdk-go/pkg/cloudevents/generic.(*baseClient).publish(0xc0003ab030, {0x3b0c640, 0xc0008ae320}, {{0x3b3bcf0, 0xc000f2c0e0}

      , {0xc00116a800, 0x37e0, 0x3800}, 0x0, 0x0})
      /go/pkg/mod/open-cluster-management.io/sdk-go@v0.14.1-0.20240628095929-9ffb1b19e566/pkg/cloudevents/generic/baseclient.go:133 +0x379
      open-cluster-management.io/sdk-go/pkg/cloudevents/generic.(*CloudEventAgentClient[...]).Publish(0x3704, {0x3b0c640?, 0xc0008ae320}, {0xc0018cef60, 0x20}, {0xc00112dc21, 0x8}, {0xc00112dc2a, 0xf, {0x35536d8, ...}, ...}, ...)
      /go/pkg/mod/open-cluster-management.io/sdk-go@v0.14.1-0.20240628095929-9ffb1b19e566/pkg/cloudevents/generic/agentclient.go:136 +0x1a8
      open-cluster-management.io/sdk-go/pkg/cloudevents/work/agent/client.(*ManifestWorkAgentClient).Patch(0xc000e6a5a0, {0x3b0c640, 0xc0008ae320}, {0xc00145c9c0, 0x24}, {0x3590067, 0x1c}, {0xc001e03800, 0x3704, 0x3800}, ...)
      /go/pkg/mod/open-cluster-management.io/sdk-go@v0.14.1-0.20240628095929-9ffb1b19e566/pkg/cloudevents/work/agent/client/manifestwork.go:127 +0x5de
      open-cluster-management.io/sdk-go/pkg/patcher.(*patcher[...]).patch(0x3b41300, {0x3b0c640, 0xc0008ae320}, 0xc001525080, 0xc000b54580, 0xc000b542c0, {0xc00183d740, 0x1, 0x1})
      /go/pkg/mod/open-cluster-management.io/sdk-go@v0.14.1-0.20240628095929-9ffb1b19e566/pkg/patcher/patcher.go:204 +0x4db
      open-cluster-management.io/sdk-go/pkg/patcher.(*patcher[...]).PatchStatus(0x3b41300, {0x3b0c640, 0xc0008ae320}, 0xc001525080, {

      {0xc00013ee40, 0x2, 0x2}

      , {

      {0xc00082caa0, 0x1, 0x1}

      }}, ...)
      /go/pkg/mod/open-cluster-management.io/sdk-go@v0.14.1-0.20240628095929-9ffb1b19e566/pkg/patcher/patcher.go:224 +0x1f5
      open-cluster-management.io/ocm/pkg/work/spoke/controllers/statuscontroller.(*AvailableStatusController).syncManifestWork(0xc000caf4c0, {0x3b0c640, 0xc0008ae320}, 0xc001db5980)
      /go/pkg/mod/open-cluster-management.io/ocm@v0.13.1-0.20240618054845-e2a7b9e78b33/pkg/work/spoke/controllers/statuscontroller/availablestatus_controller.go:144 +0x85b
      open-cluster-management.io/ocm/pkg/work/spoke/controllers/statuscontroller.(*AvailableStatusController).sync(0xc000caf4c0, {0x3b0c640, 0xc0008ae320}, {0x3af6ea0, 0xc000fda2d0})
      /go/pkg/mod/open-cluster-management.io/ocm@v0.13.1-0.20240618054845-e2a7b9e78b33/pkg/work/spoke/controllers/statuscontroller/availablestatus_controller.go:82 +0x17e
      github.com/openshift/library-go/pkg/controller/factory.(*baseController).reconcile(0xc000fcab40, {0x3b0c640, 0xc0008ae320}, {0x3af6ea0?, 0xc000fda2d0?})
      /go/pkg/mod/github.com/openshift/library-go@v0.0.0-20240621150525-4bb4238aef81/pkg/controller/factory/base_controller.go:201 +0x43
      github.com/openshift/library-go/pkg/controller/factory.(*baseController).processNextWorkItem(0xc000fcab40, {0x3b0c640, 0xc0008ae320})
      /go/pkg/mod/github.com/openshift/library-go@v0.0.0-20240621150525-4bb4238aef81/pkg/controller/factory/base_controller.go:260 +0x1ae
      github.com/openshift/library-go/pkg/controller/factory.(*baseController).runWorker.func1({0x3b0c640, 0xc0008ae320})
      /go/pkg/mod/github.com/openshift/library-go@v0.0.0-20240621150525-4bb4238aef81/pkg/controller/factory/base_controller.go:192 +0x89
      k8s.io/apimachinery/pkg/util/wait.JitterUntilWithContext.func1()
      /go/pkg/mod/k8s.io/apimachinery@v0.30.2/pkg/util/wait/backoff.go:259 +0x1f
      k8s.io/apimachinery/pkg/util/wait.BackoffUntil.func1(0x30?)
      /go/pkg/mod/k8s.io/apimachinery@v0.30.2/pkg/util/wait/backoff.go:226 +0x33
      k8s.io/apimachinery/pkg/util/wait.BackoffUntil(0xc00111df10, {0x3ad8fc0, 0xc000d536b0}, 0x1, 0xc000d76f60)
      /go/pkg/mod/k8s.io/apimachinery@v0.30.2/pkg/util/wait/backoff.go:227 +0xaf
      k8s.io/apimachinery/pkg/util/wait.JitterUntil(0xc0013db710, 0x3b9aca00, 0x0, 0x1, 0xc000d76f60)
      /go/pkg/mod/k8s.io/apimachinery@v0.30.2/pkg/util/wait/backoff.go:204 +0x7f
      k8s.io/apimachinery/pkg/util/wait.JitterUntilWithContext({0x3b0c640, 0xc0008ae320}, 0xc0013db770, 0x3b9aca00, 0x0, 0x1)
      /go/pkg/mod/k8s.io/apimachinery@v0.30.2/pkg/util/wait/backoff.go:259 +0x93
      k8s.io/apimachinery/pkg/util/wait.UntilWithContext(...)
      /go/pkg/mod/k8s.io/apimachinery@v0.30.2/pkg/util/wait/backoff.go:170
      github.com/openshift/library-go/pkg/controller/factory.(*baseController).runWorker(0x0?, {0x3b0c640?, 0xc0008ae320?})
      /go/pkg/mod/github.com/openshift/library-go@v0.0.0-20240621150525-4bb4238aef81/pkg/controller/factory/base_controller.go:183 +0x4d
      github.com/openshift/library-go/pkg/controller/factory.(*baseController).Run.func2()
      /go/pkg/mod/github.com/openshift/library-go@v0.0.0-20240621150525-4bb4238aef81/pkg/controller/factory/base_controller.go:117 +0x65
      created by github.com/openshift/library-go/pkg/controller/factory.(*baseController).Run in goroutine 391
      /go/pkg/mod/github.com/openshift/library-go@v0.0.0-20240621150525-4bb4238aef81/pkg/controller/factory/base_controller.go:112 +0x2c9

      Version-Release number of selected component (if applicable):

       

      quay.io/redhat-user-workloads/maestro-rhtap-tenant/maestro/maestro:6583de322fb4518a32f6ab21c0c1f6a144ea65cd

      How reproducible:

      Steps to Reproduce:

      1. start 10 go routines
      2. for each routine, using sdk-go to publish cloud events repeatedly interval 1 seconds

      Actual results:

      After ten mins, the sdk-go client will be panic

      Expected results:

       

      no panics even if publishing the cloud events frequently

      Additional info:

              wliu1 Wei Liu
              wliu1 Wei Liu
              Votes:
              0 Vote for this issue
              Watchers:
              4 Start watching this issue

                Created:
                Updated:
                Resolved: