Uploaded image for project: 'OpenShift Bugs'
  1. OpenShift Bugs
  2. OCPBUGS-29101

Manually deploy NFD 4.15 in BM with error

XMLWordPrintable

    • No
    • Rejected
    • False
    • Hide

      None

      Show
      None

      Description of problem:

          Fail to deploy NFD4.15 from source code with below error
      [root@infra cluster-nfd-operator]#  oc -n openshift-nfd logs nfd-worker-kswtg 
      I0206 09:44:05.749892       1 main.go:51] "version not set! Set -ldflags \"-X github.com/openshift/node-feature-discovery/pkg/version.version=`git describe --tags --dirty --always`\" during build or run."
      I0206 09:44:05.749979       1 main.go:66] "-server is deprecated, will be removed in a future release along with the deprecated gRPC API"
      I0206 09:44:05.750048       1 nfd-worker.go:220] "Node Feature Discovery Worker" version="undefined" nodeName="openshift-qe-017.lab.eng.rdu2.redhat.com" namespace="openshift-nfd"
      I0206 09:44:05.750425       1 nfd-worker.go:512] "configuration file parsed" path="/etc/kubernetes/node-feature-discovery/nfd-worker.conf"
      I0206 09:44:05.750617       1 nfd-worker.go:544] "configuration successfully updated" configuration={"Core":{"Klog":{},"LabelWhiteList":{},"NoPublish":false,"FeatureSources":["all"],"Sources":null,"LabelSources":["all"],"SleepInterval":{"Duration":60000000000}},"Sources":{"cpu":{"cpuid":{"attributeBlacklist":["BMI1","BMI2","CLMUL","CMOV","CX16","ERMS","F16C","HTT","LZCNT","MMX","MMXEXT","NX","POPCNT","RDRAND","RDSEED","RDTSCP","SGX","SGXLC","SSE","SSE2","SSE3","SSE4","SSE42","SSSE3","TDX_GUEST"]}},"custom":[],"fake":{"labels":{"fakefeature1":"true","fakefeature2":"true","fakefeature3":"true"},"flagFeatures":["flag_1","flag_2","flag_3"],"attributeFeatures":{"attr_1":"true","attr_2":"false","attr_3":"10"},"instanceFeatures":[{"attr_1":"true","attr_2":"false","attr_3":"10","attr_4":"foobar","name":"instance_1"},{"attr_1":"true","attr_2":"true","attr_3":"100","name":"instance_2"},{"name":"instance_3"}]},"kernel":{"KconfigFile":"","configOpts":["NO_HZ","NO_HZ_IDLE","NO_HZ_FULL","PREEMPT"]},"local":{},"pci":{"deviceClassWhitelist":["0200","03","12"],"deviceLabelFields":["vendor"]},"usb":{"deviceClassWhitelist":["0e","ef","fe","ff"],"deviceLabelFields":["class","vendor","device"]}}}
      I0206 09:44:05.750806       1 metrics.go:44] "metrics server starting" port=":8081"
      E0206 09:44:05.756889       1 kernel.go:149] "failed to get builtin kernel modules" err="failed to read file /host-lib/modules/5.14.0-284.50.1.el9_2.x86_64/modules.builtin: open /host-lib/modules/5.14.0-284.50.1.el9_2.x86_64/modules.builtin: no such file or directory"
      I0206 09:44:05.797093       1 nfd-worker.go:554] "starting feature discovery..."
      I0206 09:44:05.797748       1 nfd-worker.go:569] "feature discovery completed"
      I0206 09:44:05.806586       1 nfd-worker.go:690] "creating NodeFeature object" nodefeature=""
      I0206 09:44:05.811329       1 metrics.go:51] "stopping metrics server" port=":8081"
      I0206 09:44:05.811412       1 metrics.go:45] "metrics server stopped" exitCode="http: Server closed"
      E0206 09:44:05.811438       1 main.go:83] "error while running" err="failed to advertise features (via CRD API): failed to create NodeFeature object \"openshift-qe-017.lab.eng.rdu2.redhat.com\": the server could not find the requested resource (post nodefeatures.nfd.openshift.io)"
      [root@infra cluster-nfd-operator]# 
      
      E0206 10:08:41.803645       1 nfd-master.go:402] "failed to update nodes" err="nodes is forbidden: User \"system:serviceaccount:openshift-nfd:nfd-master\" cannot list resource \"nodes\" in API group \"\" at the cluster scope"
      I0206 10:08:42.803752       1 nfd-master.go:708] "will process all nodes in the cluster"
      E0206 10:08:42.804995       1 nfd-master.go:402] "failed to update nodes" err="nodes is forbidden: User \"system:serviceaccount:openshift-nfd:nfd-master\" cannot list resource \"nodes\" in API group \"\" at the cluster scope"
      I0206 10:08:43.805378       1 nfd-master.go:708] "will process all nodes in the cluster"
      E0206 10:08:43.806685       1 nfd-master.go:402] "failed to update nodes" err="nodes is forbidden: User \"system:serviceaccount:openshift-nfd:nfd-master\" cannot list resource \"nodes\" in API group \"\" at the cluster scope"
      I0206 10:08:44.806826       1 nfd-master.go:708] "will process all nodes in the cluster"
      E0206 10:08:44.808097       1 nfd-master.go:402] "failed to update nodes" err="nodes is forbidden: User \"system:serviceaccount:openshift-nfd:nfd-master\" cannot list resource \"nodes\" in API group \"\" at the cluster scope"

      Version-Release number of selected component (if applicable):

          

      How reproducible:

          

      Steps to Reproduce:

       git clone -b release-4.15 https://github.com/openshift/cluster-nfd-operator.git
      cd cluster-nfd-operator/
      export REGISTRY_AUTH_FILE=~/pull-secret.json
      export IMAGE_REGISTRY=quay.io/openshift-psap-qe
      export IMAGE_PUSH_CMD='podman push'
      export IMAGE_BUILD_CMD='podman build'
      make image
      
      IMAGE_TAG=quay.io/openshift-psap-qe/cluster-nfd-operator:4.15.0 make deploy
      oc apply -f config/samples/nfd.openshift.io_v1_nodefeaturediscovery.yaml     

      Actual results:

          The nfd-worker-xxx pods get crashloopback

      Expected results:

          The NFD master and worker pod deploy succeed. 

      Additional info:

          

              yshnaidm Yevgeny Shnaidman
              rhn-support-liqcui Liquan Cui
              Guy Gordani Guy Gordani
              Votes:
              0 Vote for this issue
              Watchers:
              6 Start watching this issue

                Created:
                Updated:
                Resolved: