Uploaded image for project: 'OpenShift Bugs'
  1. OpenShift Bugs
  2. OCPBUGS-52454

[AWS]machine stuck in provisioning and machine-controller log shows panic when set marketType: Spot

    • Moderate
    • None
    • False
    • Hide

      None

      Show
      None

      Description of problem:

          machine stuck in provisioning and machine-controller log shows panic when set marketType: Spot

      Version-Release number of selected component (if applicable):

          4.19.0-0.nightly-2025-03-05-160850 

      How reproducible:

          Always

      Steps to Reproduce:

          1.Create an AWS cluster
      
      liuhuali@Lius-MacBook-Pro huali-test % oc get clusterversion
      NAME      VERSION                              AVAILABLE   PROGRESSING   SINCE   STATUS
      version   4.19.0-0.nightly-2025-03-05-160850   True        False         28m     Cluster version is 4.19.0-0.nightly-2025-03-05-160850 
      
          2.Copy a default machineset and set marketType: Spot, then create it
      
      liuhuali@Lius-MacBook-Pro huali-test % oc create -f ms1.yaml 
      machineset.machine.openshift.io/huliu-aws36a-6bslb-worker-us-east-2aa created
      
      liuhuali@Lius-MacBook-Pro huali-test % oc get machineset  huliu-aws36a-6bslb-worker-us-east-2aa -oyaml
      apiVersion: machine.openshift.io/v1beta1
      kind: MachineSet
      metadata:
        annotations:
          capacity.cluster-autoscaler.kubernetes.io/labels: kubernetes.io/arch=amd64
          machine.openshift.io/GPU: "0"
          machine.openshift.io/memoryMb: "16384"
          machine.openshift.io/vCPU: "4"
        creationTimestamp: "2025-03-06T03:46:29Z"
        generation: 1
        labels:
          machine.openshift.io/cluster-api-cluster: huliu-aws36a-6bslb
        name: huliu-aws36a-6bslb-worker-us-east-2aa
        namespace: openshift-machine-api
        resourceVersion: "55489"
        uid: fe33ee1d-384f-413e-b2a6-046c9d94dfc3
      spec:
        replicas: 1
        selector:
          matchLabels:
            machine.openshift.io/cluster-api-cluster: huliu-aws36a-6bslb
            machine.openshift.io/cluster-api-machineset: huliu-aws36a-6bslb-worker-us-east-2aa
        template:
          metadata:
            labels:
              machine.openshift.io/cluster-api-cluster: huliu-aws36a-6bslb
              machine.openshift.io/cluster-api-machine-role: worker
              machine.openshift.io/cluster-api-machine-type: worker
              machine.openshift.io/cluster-api-machineset: huliu-aws36a-6bslb-worker-us-east-2aa
          spec:
            lifecycleHooks: {}
            metadata: {}
            providerSpec:
              value:
                ami:
                  id: ami-0e763ecd8ccccbc99
                apiVersion: machine.openshift.io/v1beta1
                blockDevices:
                - ebs:
                    encrypted: true
                    iops: 0
                    kmsKey:
                      arn: ""
                    volumeSize: 120
                    volumeType: gp3
                capacityReservationId: ""
                credentialsSecret:
                  name: aws-cloud-credentials
                deviceIndex: 0
                iamInstanceProfile:
                  id: huliu-aws36a-6bslb-worker-profile
                instanceType: m6i.xlarge
                kind: AWSMachineProviderConfig
                marketType: Spot
                metadata:
                  creationTimestamp: null
                metadataServiceOptions: {}
                placement:
                  availabilityZone: us-east-2a
                  region: us-east-2
                securityGroups:
                - filters:
                  - name: tag:Name
                    values:
                    - huliu-aws36a-6bslb-node
                - filters:
                  - name: tag:Name
                    values:
                    - huliu-aws36a-6bslb-lb
                subnet:
                  filters:
                  - name: tag:Name
                    values:
                    - huliu-aws36a-6bslb-subnet-private-us-east-2a
                tags:
                - name: kubernetes.io/cluster/huliu-aws36a-6bslb
                  value: owned
                userDataSecret:
                  name: worker-user-data
      status:
        fullyLabeledReplicas: 1
        observedGeneration: 1
        replicas: 1
      liuhuali@Lius-MacBook-Pro huali-test % oc get machine
      NAME                                          PHASE          TYPE         REGION      ZONE         AGE
      huliu-aws36a-6bslb-master-0                   Running        m6i.xlarge   us-east-2   us-east-2a   123m
      huliu-aws36a-6bslb-master-1                   Running        m6i.xlarge   us-east-2   us-east-2b   123m
      huliu-aws36a-6bslb-master-2                   Running        m6i.xlarge   us-east-2   us-east-2c   123m
      huliu-aws36a-6bslb-worker-us-east-2a-p7jdn    Running        m6i.xlarge   us-east-2   us-east-2a   119m
      huliu-aws36a-6bslb-worker-us-east-2aa-kxvfx   Provisioning                                         5m10s
      huliu-aws36a-6bslb-worker-us-east-2b-wktd2    Running        m6i.xlarge   us-east-2   us-east-2b   119m
      huliu-aws36a-6bslb-worker-us-east-2c-5b5zs    Running        m6i.xlarge   us-east-2   us-east-2c   119m
      
      
      liuhuali@Lius-MacBook-Pro huali-test % oc logs machine-api-controllers-6b567f49c8-kpkfg  -c machine-controller
      ...
      E0306 03:46:30.434664       1 signal_unix.go:917] "msg"="Observed a panic" "error"=null "controller"="machine-controller" "name"="huliu-aws36a-6bslb-worker-us-east-2aa-kxvfx" "namespace"="openshift-machine-api" "object"={"name":"huliu-aws36a-6bslb-worker-us-east-2aa-kxvfx","namespace":"openshift-machine-api"} "panic"="runtime error: invalid memory address or nil pointer dereference" "panicGoValue"="\"invalid memory address or nil pointer dereference\"" "reconcileID"="ef6541ce-d66a-457c-93fb-6c8a32070c04" "stacktrace"="goroutine 181 [running]:\nk8s.io/apimachinery/pkg/util/runtime.logPanic({0x4357d00, 0xc00235f2f0}, {0x3910ae0, 0x5be1da0})\n\t/go/src/github.com/openshift/machine-api-provider-aws/vendor/k8s.io/apimachinery/pkg/util/runtime/runtime.go:107 +0xbc\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Reconcile.func1()\n\t/go/src/github.com/openshift/machine-api-provider-aws/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:107 +0x112\npanic({0x3910ae0?, 0x5be1da0?})\n\t/usr/lib/golang/src/runtime/panic.go:785 +0x132\ngithub.com/openshift/machine-api-provider-aws/pkg/actuators/machine.getInstanceMarketOptionsRequest(0xc0000af348)\n\t/go/src/github.com/openshift/machine-api-provider-aws/pkg/actuators/machine/instances.go:610 +0x1db\ngithub.com/openshift/machine-api-provider-aws/pkg/actuators/machine.launchInstance(0xc000c9a288, 0xc0000af348, {0xc000c88000, 0x6ce, 0x6ce}, {0x437e800, 0xc001494ff0}, {0x436bae0, 0xc00061b290}, 0xc0007f3d48)\n\t/go/src/github.com/openshift/machine-api-provider-aws/pkg/actuators/machine/instances.go:450 +0xe0a\ngithub.com/openshift/machine-api-provider-aws/pkg/actuators/machine.(*Reconciler).create(0xc000efb558)\n\t/go/src/github.com/openshift/machine-api-provider-aws/pkg/actuators/machine/reconciler.go:99 +0x81e\ngithub.com/openshift/machine-api-provider-aws/pkg/actuators/machine.(*Actuator).Create(0xc0008d7a90, {0x4357d00, 0xc00235f2f0}, 0xc000c9a288)\n\t/go/src/github.com/openshift/machine-api-provider-aws/pkg/actuators/machine/actuator.go:94 +0x2b6\ngithub.com/openshift/machine-api-operator/pkg/controller/machine.(*ReconcileMachine).Reconcile(0xc0007697a0, {0x4357d00, 0xc00235f2f0}, {{{0xc001d47488?, 0x34f7f7a?}, {0xc000d27a40?, 0x0?}}})\n\t/go/src/github.com/openshift/machine-api-provider-aws/vendor/github.com/openshift/machine-api-operator/pkg/controller/machine/controller.go:408 +0x1459\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Reconcile(0xc0000e8e80?, {0x4357d00?, 0xc00235f2f0?}, {{{0xc001d47488?, 0x0?}, {0xc000d27a40?, 0x0?}}})\n\t/go/src/github.com/openshift/machine-api-provider-aws/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:118 +0xbf\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).reconcileHandler(0x437e740, {0x4357d38, 0xc00076f810}, {{{0xc001d47488, 0x15}, {0xc000d27a40, 0x2b}}})\n\t/go/src/github.com/openshift/machine-api-provider-aws/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:328 +0x3a5\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).processNextWorkItem(0x437e740, {0x4357d38, 0xc00076f810})\n\t/go/src/github.com/openshift/machine-api-provider-aws/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:288 +0x20e\nsigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2.2()\n\t/go/src/github.com/openshift/machine-api-provider-aws/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:249 +0x85\ncreated by sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller[...]).Start.func2 in goroutine 112\n\t/go/src/github.com/openshift/machine-api-provider-aws/vendor/sigs.k8s.io/controller-runtime/pkg/internal/controller/controller.go:245 +0x6b8\n"
      E0306 03:46:30.434717       1 controller.go:341] "msg"="Reconciler error" "error"="panic: runtime error: invalid memory address or nil pointer dereference [recovered]" "controller"="machine-controller" "name"="huliu-aws36a-6bslb-worker-us-east-2aa-kxvfx" "namespace"="openshift-machine-api" "object"={"name":"huliu-aws36a-6bslb-worker-us-east-2aa-kxvfx","namespace":"openshift-machine-api"} "reconcileID"="ef6541ce-d66a-457c-93fb-6c8a32070c04"
      
      
          3.
          

      Actual results:

          machine stuck in Provisioning and panic in machine-controller log

      Expected results:

          machine get Running

      Additional info:

          New feature testing for https://issues.redhat.com/browse/OCPCLOUD-2780

              athiruma@redhat.com Thirumalesh Aaraveti
              huliu@redhat.com Huali Liu
              Huali Liu Huali Liu
              Votes:
              0 Vote for this issue
              Watchers:
              4 Start watching this issue

                Created:
                Updated: