Uploaded image for project: 'Red Hat Advanced Cluster Management'
  1. Red Hat Advanced Cluster Management
  2. ACM-14672

The BMH template should be updated with "externally provisioned" to avoid an extra reboot

XMLWordPrintable

    • Moderate
    • None

      advanced-cluster-management.v2.12.0-92
      multicluster-engine.v2.7.0

      used IBIO with siteconfig

      siteconfig logs:

      oc logs -n rhacm                                              siteconfig-controller-manager-b94fb5855-jcwqf  -c manager
      2024-10-02T22:29:08Z	INFO	setup	created default reference template ConfigMap rhacm/ai-cluster-templates-v1
      2024-10-02T22:29:08Z	INFO	setup	created default reference template ConfigMap rhacm/ai-node-templates-v1
      2024-10-02T22:29:08Z	INFO	setup	created default reference template ConfigMap rhacm/ibi-cluster-templates-v1
      2024-10-02T22:29:08Z	INFO	setup	created default reference template ConfigMap rhacm/ibi-node-templates-v1
      2024-10-02T22:29:08Z	INFO	setup	starting manager
      2024-10-02T22:29:08Z	INFO	controller-runtime.metrics	Starting metrics server
      2024-10-02T22:29:08Z	INFO	starting server	{"kind": "health probe", "addr": "[::]:8081"}
      2024-10-02T22:29:08Z	INFO	controller-runtime.metrics	Serving metrics server	{"bindAddress": "127.0.0.1:8080", "secure": false}
      I1002 22:29:08.080385       1 leaderelection.go:250] attempting to acquire leader lease rhacm/manager.siteconfig.open-cluster-management.io...
      I1002 22:29:23.750847       1 leaderelection.go:260] successfully acquired lease rhacm/manager.siteconfig.open-cluster-management.io
      2024-10-02T22:29:23Z	DEBUG	events	siteconfig-controller-manager-b94fb5855-jcwqf_71a7c4ca-b9ef-4cd4-a9c4-7da60db117b1 became leader	{"type": "Normal", "object": {"kind":"Lease","namespace":"rhacm","name":"manager.siteconfig.open-cluster-management.io","uid":"641286e9-2e71-45bb-b753-964c78221782","apiVersion":"coordination.k8s.io/v1","resourceVersion":"803931"}, "reason": "LeaderElection"}
      2024-10-02T22:29:23Z	INFO	Starting EventSource	{"controller": "clusterDeploymentReconciler", "controllerGroup": "hive.openshift.io", "controllerKind": "ClusterDeployment", "source": "kind source: *v1.ClusterDeployment"}
      2024-10-02T22:29:23Z	INFO	Starting EventSource	{"controller": "clusterDeploymentReconciler", "controllerGroup": "hive.openshift.io", "controllerKind": "ClusterDeployment", "source": "kind source: *v1alpha1.ClusterInstance"}
      2024-10-02T22:29:23Z	INFO	Starting Controller	{"controller": "clusterDeploymentReconciler", "controllerGroup": "hive.openshift.io", "controllerKind": "ClusterDeployment"}
      2024-10-02T22:29:23Z	INFO	Starting EventSource	{"controller": "clusterinstance", "controllerGroup": "siteconfig.open-cluster-management.io", "controllerKind": "ClusterInstance", "source": "kind source: *v1alpha1.ClusterInstance"}
      2024-10-02T22:29:23Z	INFO	Starting Controller	{"controller": "clusterinstance", "controllerGroup": "siteconfig.open-cluster-management.io", "controllerKind": "ClusterInstance"}
      2024-10-02T22:29:23Z	INFO	Starting workers	{"controller": "clusterinstance", "controllerGroup": "siteconfig.open-cluster-management.io", "controllerKind": "ClusterInstance", "worker count": 1}
      2024-10-02T22:29:23Z	INFO	Starting workers	{"controller": "clusterDeploymentReconciler", "controllerGroup": "hive.openshift.io", "controllerKind": "ClusterDeployment", "worker count": 1}
      2024-10-03T15:41:15Z	INFO	controllers.ClusterInstance	Start reconciling ClusterInstance	{"name": {"name":"elvis2","namespace":"elvis2"}}
      2024-10-03T15:41:15Z	INFO	controllers.ClusterInstance	Loaded ClusterInstance	{"name": {"name":"elvis2","namespace":"elvis2"}, "version": "1393277"}
      2024-10-03T15:41:15Z	INFO	controllers.ClusterInstance	Finished reconciling ClusterInstance	{"name": {"name":"elvis2","namespace":"elvis2"}}
      2024-10-03T15:41:15Z	INFO	controllers.ClusterInstance	Start reconciling ClusterInstance	{"name": {"name":"elvis2","namespace":"elvis2"}}
      2024-10-03T15:41:15Z	INFO	controllers.ClusterInstance	Loaded ClusterInstance	{"name": {"name":"elvis2","namespace":"elvis2"}, "version": "1393278"}
      2024-10-03T15:41:15Z	INFO	controllers.ClusterInstance	Starting validation	{"ClusterInstance": "elvis2"}
      2024-10-03T15:41:15Z	INFO	controllers.ClusterInstance	Validation succeeded	{"ClusterInstance": "elvis2"}
      2024-10-03T15:41:15Z	INFO	controllers.ClusterInstance	Finished validation	{"ClusterInstance": "elvis2"}
      2024-10-03T15:41:15Z	INFO	controllers.ClusterInstance	Rendering templates for ClusterInstance elvis2
      2024-10-03T15:41:15Z	INFO	controllers.ClusterInstance	Rendering templates for ClusterInstance elvis2
      2024-10-03T15:41:15Z	INFO	controllers.ClusterInstance.TemplateEngine	Processing cluster-level templates for ClusterInstance elvis2
      2024-10-03T15:41:15Z	INFO	controllers.ClusterInstance.TemplateEngine	renderTemplates: processing templateRef 1 of 1
      2024-10-03T15:41:15Z	INFO	controllers.ClusterInstance.TemplateEngine	Processed cluster-level templates for ClusterInstance elvis2
      2024-10-03T15:41:15Z	INFO	controllers.ClusterInstance.TemplateEngine	Processing node-level templates for ClusterInstance elvis2 [node: 1 of 1]
      2024-10-03T15:41:15Z	INFO	controllers.ClusterInstance.TemplateEngine	renderTemplates: processing templateRef 1 of 1
      2024-10-03T15:41:15Z	INFO	controllers.ClusterInstance.TemplateEngine	Processed node-level templates for ClusterInstance elvis2 [node: 1 of 1]
      2024-10-03T15:41:15Z	INFO	controllers.ClusterInstance	Validating rendered manifests for ClusterInstance elvis2
      2024-10-03T15:41:16Z	INFO	KubeAPIWarningLogger	unknown field "machineNetwork"
      2024-10-03T15:41:16Z	INFO	controllers.ClusterInstance	Applying rendered manifests for ClusterInstance elvis2
      2024-10-03T15:41:16Z	INFO	controllers.ClusterDeploymentReconciler	Initializing Provisioned condition	{"ClusterInstance": "elvis2"}
      2024-10-03T15:41:16Z	INFO	controllers.ClusterDeploymentReconciler	Failed to extract condition(s)	{"name": "elvis2"}
      2024-10-03T15:41:16Z	INFO	controllers.ClusterDeploymentReconciler	Failed to extract condition(s)	{"name": "elvis2"}
      2024-10-03T15:41:16Z	INFO	controllers.ClusterDeploymentReconciler	Failed to extract condition(s)	{"name": "elvis2"}
      2024-10-03T15:41:16Z	INFO	controllers.ClusterDeploymentReconciler	Failed to extract condition(s)	{"name": "elvis2"}
      2024-10-03T15:41:16Z	INFO	controllers.ClusterDeploymentReconciler	Failed to extract condition(s)	{"name": "elvis2"}
      2024-10-03T15:41:16Z	INFO	controllers.ClusterDeploymentReconciler	Failed to extract condition(s)	{"name": "elvis2"}
      2024-10-03T15:41:16Z	INFO	controllers.ClusterInstance	ClusterInstance templates are rendered	{"name": {"name":"elvis2","namespace":"elvis2"}}
      2024-10-03T15:41:16Z	INFO	controllers.ClusterInstance	Updating ObservedGeneration to 1	{"ClusterInstance": {"name":"elvis2","namespace":"elvis2"}}
      2024-10-03T15:41:16Z	INFO	controllers.ClusterInstance	Finished reconciling ClusterInstance	{"name": {"name":"elvis2","namespace":"elvis2"}}
      2024-10-03T16:02:07Z	INFO	controllers.ClusterInstance	Start reconciling ClusterInstance	{"name": {"name":"elvis2","namespace":"elvis2"}}
      2024-10-03T16:02:07Z	INFO	controllers.ClusterInstance	Loaded ClusterInstance	{"name": {"name":"elvis2","namespace":"elvis2"}, "version": "1405553"}
      2024-10-03T16:02:07Z	INFO	controllers.ClusterInstance	Successfully deleted resource	{"KlusterletAddonConfig": "elvis2"}
      2024-10-03T16:02:07Z	INFO	controllers.ClusterInstance	Successfully deleted resource	{"ManagedCluster": "elvis2"}
      2024-10-03T16:02:07Z	INFO	controllers.ClusterInstance	Successfully deleted resource	{"BareMetalHost": "elvis2.example.redhat.com"}
      2024-10-03T16:02:07Z	INFO	controllers.ClusterInstance	Successfully deleted resource	{"ClusterDeployment": "elvis2"}
      2024-10-03T16:02:07Z	INFO	controllers.ClusterInstance	Successfully deleted resource	{"ImageClusterInstall": "elvis2"}
      2024-10-03T16:02:07Z	INFO	controllers.ClusterInstance	Successfully deleted resource	{"Secret": "elvis2.example.redhat.com"}
      2024-10-03T16:02:07Z	INFO	controllers.ClusterInstance	Successfully finalized ClusterInstance	{"name": "elvis2"}
      2024-10-03T16:02:07Z	INFO	controllers.ClusterInstance	Removing ClusterInstance finalizer	{"name": "elvis2"}
      2024-10-03T16:02:07Z	INFO	controllers.ClusterInstance	Finished reconciling ClusterInstance	{"name": {"name":"elvis2","namespace":"elvis2"}}
      2024-10-03T16:02:07Z	ERROR	Reconciler error	{"controller": "clusterDeploymentReconciler", "controllerGroup": "hive.openshift.io", "controllerKind": "ClusterDeployment", "ClusterDeployment": {"name":"elvis2","namespace":"elvis2"}, "namespace": "elvis2", "name": "elvis2", "reconcileID": "67f13231-79b7-458c-9851-a232b7c127fb", "error": "failed to update ClusterInstance status: clusterinstances.siteconfig.open-cluster-management.io \"elvis2\" not found"}
      sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).reconcileHandler
      	sigs.k8s.io/controller-runtime@v0.16.2/pkg/internal/controller/controller.go:329
      sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).processNextWorkItem
      	sigs.k8s.io/controller-runtime@v0.16.2/pkg/internal/controller/controller.go:266
      sigs.k8s.io/controller-runtime/pkg/internal/controller.(*Controller).Start.func2.2
      	sigs.k8s.io/controller-runtime@v0.16.2/pkg/internal/controller/controller.go:227
      2024-10-03T16:02:07Z	INFO	controllers.ClusterInstance	Start reconciling ClusterInstance	{"name": {"name":"elvis2","namespace":"elvis2"}}
      2024-10-03T16:02:07Z	INFO	controllers.ClusterInstance	ClusterInstance not found	{"name": {"name":"elvis2","namespace":"elvis2"}}
      2024-10-03T16:02:07Z	INFO	controllers.ClusterInstance	Finished reconciling ClusterInstance	{"name": {"name":"elvis2","namespace":"elvis2"}}
      2024-10-03T16:02:07Z	INFO	controllers.ClusterDeploymentReconciler	ClusterInstance not found	{"name": "elvis2"}
      

      It seems like the bmh doesn't get set with "externally provisioned" , which causes the inspection to statrt, despite being explicitly disabled in the clusterinstance spec:

      apiVersion: siteconfig.open-cluster-management.io/v1alpha1
      kind: ClusterInstance
      metadata:
        name: elvis2
        namespace: elvis2
      spec:
        additionalNTPSources:
          - clock.redhat.com
          - clock2.redhat.com
        baseDomain: qe.lab.redhat.com
        clusterImageSetNameRef: "4.17"
        clusterLabels:
          common: "true"
          sites: elvis2
        clusterName: elvis2
        clusterNetwork:
          - cidr: 10.128.0.0/14
            hostPrefix: 23
        cpuPartitioningMode: None
        extraAnnotations:
          ClusterDeployment:
            myTestAnnotation: success
        holdInstallation: false
        installConfigOverrides: '{"capabilities":{"baselineCapabilitySet": "None", "additionalEnabledCapabilities": [ "marketplace", "OperatorLifecycleManager", "Console", "NodeTuning", "Ingress", "ImageRegistry" ] }}'
        extraManifestsRefs:
        - name: "elvis2-extras-cm0"
        machineNetwork:
          - cidr: 192.168.123.0/24
        networkType: OVNKubernetes
        nodes:
          - automatedCleaningMode: disabled
            bmcAddress: "redfish-virtualmedia+https://192.168.123.1:8000/redfish/v1/Systems/4a50b4a0-02c1-4cc5-ba80-8741b43583d1"
            bmcCredentialsName:
              name: bmc-secret1
            bootMACAddress: "52:54:00:f7:d4:d1"
            bootMode: UEFI
            rootDeviceHints:
              deviceName: "/dev/sda"
            hostName: elvis2.example.redhat.com
            ironicInspect: "disabled"
            nodeNetwork:
              config:
                interfaces:
                  - name: enp5s0
                    type: ethernet
                    state: up
                    mac-address: "52:54:00:f7:d4:d1"
                    ipv4:
                      enabled: true
                      address:
                        - ip: 192.168.123.142
                          prefix-length: 24
                      dhcp: false
                    ipv6:
                      enabled: false
                dns-resolver:
                  config:
                    server:
                      - 192.168.123.1
                routes:
                  config:
                    - destination: 0.0.0.0/0
                      next-hop-address: 192.168.123.1
                      next-hop-interface: enp5s0
                      table-id: 254
              interfaces:
              - name: "enp5s0"
                macAddress: "52:54:00:f7:d4:d1"
            role: master
            templateRefs:
              - name: ibi-node-templates-v1
                namespace: rhacm
        pullSecretRef:
          name: pull-secret
        serviceNetwork:
          - cidr: 172.30.0.0/16
        sshPublicKey: 'ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABgQDjEQXbTauuwDKVeGyveSAIU77h1iZE5iMa8WzGSx2m+6khNqBMBih88Y4OShhsL0lT1pRPhvW8vyFbotkkCe5VNzvfHnUE8/PrRyIZnXv91oXjITHN2URixwEhqJESUCbizXCGfzN5LpJzQL8PCJTiuObIFqCYIke082/uGfx8u15gtJbSvt9/v6NbicOvVZya8fKKTs2ZAomAj2G33RW7PRRD3DV9kRl9sR8CLsdhX1SuNsmyksIXF/8LdGZNtqAft6pVcGE/iJE20f3rfqFxO7folUur0IQESciLHbCAtaAA9yTrBo20HvxP+PMQQUEFrv7isSUqBBtUroMRBWeiZIA7WEfH3aWIXM5rpMWBqlBFLi0jqNy4P36ynpQ58nm/7C+SF+j762nr1tuFANzuX4LZKLAfgqaMVQHbERsbrOSLlN42w3LJHG6s3fvX13RDI5NbUh8139bIpqfHMueC/q04CcrUiQZLLT15MxJ0CddmIzC2/55B7ZCp/5bc8H8= root@sealusa34.mobius.lab.eng.rdu2.redhat.com'
        templateRefs:
          - name: ibi-cluster-templates-v1
            namespace: rhacm
      
      

        1. must-gather.tgz
          68.03 MB
          Alexander Chuzhoy

              sakhoury@redhat.com Sharat Akhoury
              achuzhoy@redhat.com Alexander Chuzhoy
              Votes:
              0 Vote for this issue
              Watchers:
              4 Start watching this issue

                Created:
                Updated:
                Resolved: