Uploaded image for project: 'OpenShift Bugs'
  1. OpenShift Bugs
  2. OCPBUGS-49741

[Regression] Failed to get pod annotation: timed out waiting for annotations when creating a non-UDN pod with secondary+layer2

XMLWordPrintable

    • Critical
    • Yes
    • Rejected
    • False
    • Hide

      None

      Show
      None

      Description of problem:

      When creating a non-UDN pod with secondary+layer2, pod stuck on ContainerCreating due to Failed to get pod annotation: timed out waiting for annotations.
      But same test case pass in 4.16 and 4.17, fail in 4.18 and 4.19 now

      Version-Release number of selected component (if applicable):

      4.18 and 4.19

      How reproducible:

      Always

      Steps to Reproduce:

       

      $ cat nad.yaml
      apiVersion: k8s.cni.cncf.io/v1
      kind: NetworkAttachmentDefinition
      metadata:
        name: layer2ipv4network
      spec:
        config: |2
          {
                  "cniVersion": "0.3.1",
                  "name": "layer2ipv4network",
                  "type": "ovn-k8s-cni-overlay",
                  "topology":"layer2",
                  "subnets": "192.168.100.0/24",
                  "mtu": 1300,
                  "netAttachDefName": "test/layer2ipv4network"
          }
      kind: Pod
      apiVersion: v1
      metadata:
        name: multihoming-ipv4-pod1
        labels:
          name: multihoming-ipv4-pod1
        annotations:
          k8s.v1.cni.cncf.io/networks: layer2ipv4network
      spec:
        securityContext:
          runAsNonRoot: true
          seccompProfile:
            type: RuntimeDefault
        containers:
        - name: multihoming-ipv4-pod1
          image: quay.io/openshifttest/hello-sdn@sha256:c89445416459e7adea9a5a416b3365ed3d74f2491beb904d61dc8d1eb89a72a4
          securityContext:
            allowPrivilegeEscalation: false
            capabilities:
              drop: ["ALL"]
      ## Pass in 4.16 
      $ oc get clusterversion
      NAME      VERSION                              AVAILABLE   PROGRESSING   SINCE   STATUS
      version   4.16.0-0.nightly-2025-01-31-045344   True        False         4h12m   Cluster version is 4.16.0-0.nightly-2025-01-31-045344
      $ oc create -f nad.yaml
      networkattachmentdefinition.k8s.cni.cncf.io/layer2ipv4network created
      $ oc create -f pod.yaml 
      pod/multihoming-ipv4-pod1 created
      $ oc get pod
      NAME                    READY   STATUS    RESTARTS   AGE
      multihoming-ipv4-pod1   1/1     Running   0          5s
      
      # Fail in 4.19
      $ oc get clusterversion
      NAME      VERSION                              AVAILABLE   PROGRESSING   SINCE   STATUS
      version   4.19.0-0.nightly-2025-01-30-091858   True        False         3h17m   Cluster version is 4.19.0-0.nightly-2025-01-30-091858
      $ oc create -f nad.yaml
      networkattachmentdefinition.k8s.cni.cncf.io/layer2ipv4network created
      $ oc create -f pod.yaml 
      pod/multihoming-ipv4-pod1 created
      $ oc get pod
      NAME                    READY   STATUS              RESTARTS   AGE
      multihoming-ipv4-pod1   0/1     ContainerCreating   0          2m36s
      $ oc describe pod multihoming-ipv4-pod1
      Name:             multihoming-ipv4-pod1
      Namespace:        test
      Priority:         0
      Service Account:  default
      Node:             ip-10-0-59-120.us-east-2.compute.internal/10.0.59.120
      Start Time:       Fri, 31 Jan 2025 15:05:19 -0500
      Labels:           name=multihoming-ipv4-pod1
      Annotations:      k8s.ovn.org/pod-networks:
                          {"default":{"ip_addresses":["10.131.0.51/23"],"mac_address":"0a:58:0a:83:00:33","gateway_ips":["10.131.0.1"],"routes":[{"dest":"10.128.0.0...
                        k8s.v1.cni.cncf.io/networks: layer2ipv4network
                        openshift.io/scc: restricted-v2
                        seccomp.security.alpha.kubernetes.io/pod: runtime/default
      Status:           Pending
      SeccompProfile:   RuntimeDefault
      IP:               
      IPs:              <none>
      Containers:
        multihoming-ipv4-pod1:
          Container ID:   
          Image:          quay.io/openshifttest/hello-sdn@sha256:c89445416459e7adea9a5a416b3365ed3d74f2491beb904d61dc8d1eb89a72a4
          Image ID:       
          Port:           <none>
          Host Port:      <none>
          State:          Waiting
            Reason:       ContainerCreating
          Ready:          False
          Restart Count:  0
          Environment:    <none>
          Mounts:
            /var/run/secrets/kubernetes.io/serviceaccount from kube-api-access-lvcmb (ro)
      Conditions:
        Type                        Status
        PodReadyToStartContainers   False 
        Initialized                 True 
        Ready                       False 
        ContainersReady             False 
        PodScheduled                True 
      Volumes:
        kube-api-access-lvcmb:
          Type:                    Projected (a volume that contains injected data from multiple sources)
          TokenExpirationSeconds:  3607
          ConfigMapName:           kube-root-ca.crt
          ConfigMapOptional:       <nil>
          DownwardAPI:             true
          ConfigMapName:           openshift-service-ca.crt
          ConfigMapOptional:       <nil>
      QoS Class:                   BestEffort
      Node-Selectors:              <none>
      Tolerations:                 node.kubernetes.io/not-ready:NoExecute op=Exists for 300s
                                   node.kubernetes.io/unreachable:NoExecute op=Exists for 300s
      Events:
        Type     Reason                  Age   From               Message
        ----     ------                  ----  ----               -------
        Normal   Scheduled               3m2s  default-scheduler  Successfully assigned test/multihoming-ipv4-pod1 to ip-10-0-59-120.us-east-2.compute.internal
        Warning  FailedCreatePodSandBox  63s   kubelet            Failed to create pod sandbox: rpc error: code = Unknown desc = failed to create pod network sandbox k8s_multihoming-ipv4-pod1_test_0a9b931f-d20f-497f-831f-7b1302cc65d8_0(36431cbb1b6b487e1ecb3758922db87328f9e58a0ea74fa2453834b5f1afb621): error adding pod test_multihoming-ipv4-pod1 to CNI network "multus-cni-network": plugin type="multus-shim" name="multus-cni-network" failed (add): CmdAdd (shim): CNI request failed with status 400: 'ContainerID:"36431cbb1b6b487e1ecb3758922db87328f9e58a0ea74fa2453834b5f1afb621" Netns:"/var/run/netns/3bdfa373-0940-45b1-b3e5-8d4213ff96ee" IfName:"eth0" Args:"IgnoreUnknown=1;K8S_POD_NAMESPACE=test;K8S_POD_NAME=multihoming-ipv4-pod1;K8S_POD_INFRA_CONTAINER_ID=36431cbb1b6b487e1ecb3758922db87328f9e58a0ea74fa2453834b5f1afb621;K8S_POD_UID=0a9b931f-d20f-497f-831f-7b1302cc65d8" Path:"" ERRORED: error configuring pod [test/multihoming-ipv4-pod1] networking: [test/multihoming-ipv4-pod1/0a9b931f-d20f-497f-831f-7b1302cc65d8:layer2ipv4network]: error adding container to network "layer2ipv4network": CNI request failed with status 400: '[test/multihoming-ipv4-pod1 36431cbb1b6b487e1ecb3758922db87328f9e58a0ea74fa2453834b5f1afb621 network layer2ipv4network NAD test/layer2ipv4network] [test/multihoming-ipv4-pod1 36431cbb1b6b487e1ecb3758922db87328f9e58a0ea74fa2453834b5f1afb621 network layer2ipv4network NAD test/layer2ipv4network] failed to get pod annotation: timed out waiting for annotations: context deadline exceeded
      '
      ': StdinData: {"binDir":"/var/lib/cni/bin","clusterNetwork":"/host/run/multus/cni/net.d/10-ovn-kubernetes.conf","cniVersion":"0.3.1","daemonSocketDir":"/run/multus/socket","globalNamespaces":"default,openshift-multus,openshift-sriov-network-operator,openshift-cnv","logLevel":"verbose","logToStderr":true,"name":"multus-cni-network","namespaceIsolation":true,"type":"multus-shim"}
        Normal  AddedInterface  62s (x2 over 3m3s)  multus  Add eth0 [10.131.0.51/23] from ovn-kubernetes
       
      

      Actual results:
      pod stuck on ContainerCreating due to Failed to get pod annotation: timed out waiting for annotations
      Expected results:

      Pod in Running state

      Additional info:

      Testing failed reported in sippy:

      https://qe-component-readiness.dptools.openshift.org/sippy-ng/component_readiness/env_capability?Architecture=amd64&Architecture=amd64&Network=ovn&Network=ovn&Platform=aws&Platform=aws&baseEndTime=2024-10-01%2023%3A59%3A59&baseRelease=4.17&baseStartTime=2024-09-01%2000%3A00%3A00&capability=Other&columnGroupBy=Architecture%2CNetwork%2CPlatform&component=Networking%20%2F%20openshift-sdn&confidence=95&dbGroupBy=Platform%2CArchitecture%2CNetwork%2CTopology%2CFeatureSet%2CUpgrade%2CSuite%2CInstaller&environment=amd64%20ovn%20aws&environment=amd64%20ovn%20aws&flakeAsFailure=false&ignoreDisruption=true&ignoreMissing=false&includeMultiReleaseAnalysis=false&includeVariant=Architecture%3Aamd64&includeVariant=FeatureSet%3Adefault&includeVariant=Installer%3Aipi&includeVariant=Installer%3Aupi&includeVariant=Network%3Aovn&includeVariant=Owner%3Aqe&includeVariant=Owner%3Aservice-delivery&includeVariant=Platform%3Aaws&includeVariant=Platform%3Aazure&includeVariant=Platform%3Agcp&includeVariant=Platform%3Arosa&includeVariant=Platform%3Avsphere&includeVariant=Topology%3Aha&minFail=3&passRateAllTests=0&passRateNewTests=0&pity=5&sampleEndTime=2025-02-01%2023%3A59%3A59&sampleRelease=4.18&sampleStartTime=2025-01-25%2000%3A00%3A00

              sseethar Surya Seetharaman
              weliang1@redhat.com Weibin Liang
              Weibin Liang Weibin Liang
              Votes:
              0 Vote for this issue
              Watchers:
              3 Start watching this issue

                Created:
                Updated:
                Resolved: