-
Bug
-
Resolution: Done
-
Undefined
-
netobserv-1.5-candidate
-
None
-
False
-
None
-
False
-
-
-
NetObserv - Sprint 249
-
Critical
Description of problem:
The cypress test for FlowRTT feature verification is failing on ppc64le due to the empty panels without the flowlogs data in the Network Traffic UI. To verify the issue I have manualy deployed the flowcollector with flowRTT enabled and the RTT metrics in "spec.processor.metrics.includeList" flowcollector CRD by following steps from https://polarion.engineering.redhat.com/polarion/#/project/OSE/workitem?id=OCP-68246. When checked the eBPF pods status all pods were in "CrashLoopBackOff" state and the data were not coming up on the network-traffic UI page. #Cluster details: [root@rdr-noo-415-rc5-bastion-0 ~]# oc get clusterversion NAME VERSION AVAILABLE PROGRESSING SINCE STATUS version 4.15.0-rc.5 True False 39h Cluster version is 4.15.0-rc.5 [root@rdr-noo-415-rc5-bastion-0 ~]# oc version Client Version: 4.15.0-rc.5 Kustomize Version: v5.0.4-0.20230601165947-6ce0bf390ce3 Server Version: 4.15.0-rc.5 Kubernetes Version: v1.28.6+0fb4726 [root@rdr-noo-415-rc5-bastion-0 ~]# arch ppc64le [root@rdr-noo-415-rc5-bastion-0 ~]# #Operator status: [root@rdr-noo-415-rc5-bastion-0 ~]# oc get csv NAME DISPLAY VERSION REPLACES PHASE loki-operator.v5.8.3 Loki Operator 5.8.3 loki-operator.v5.8.2 Succeeded network-observability-operator.v1.5.0 Network Observability 1.5.0 network-observability-operator.v1.4.2 Succeeded #flowlogs and loki pods [root@rdr-noo-415-rc5-bastion-0 ~]# oc get po NAME READY STATUS RESTARTS AGE flowlogs-pipeline-2g756 1/1 Running 0 54m flowlogs-pipeline-7rcv6 1/1 Running 0 54m flowlogs-pipeline-qhwsf 1/1 Running 0 54m flowlogs-pipeline-wb7ql 1/1 Running 0 54m flowlogs-pipeline-zbb4m 1/1 Running 0 54m lokistack-compactor-0 1/1 Running 0 58m lokistack-distributor-77db85d4f-4bpmp 1/1 Running 0 58m lokistack-distributor-77db85d4f-hzvvs 1/1 Running 0 58m lokistack-gateway-5db4574d87-8qbqh 2/2 Running 0 58m lokistack-gateway-5db4574d87-m2x2f 2/2 Running 0 58m lokistack-index-gateway-0 1/1 Running 0 58m lokistack-index-gateway-1 1/1 Running 0 58m lokistack-ingester-0 1/1 Running 0 58m lokistack-ingester-1 1/1 Running 0 57m lokistack-querier-7774f6f866-bn9ld 1/1 Running 0 58m lokistack-querier-7774f6f866-jwkpz 1/1 Running 0 58m lokistack-query-frontend-5bf5fd8796-4wrd7 1/1 Running 0 58m lokistack-query-frontend-5bf5fd8796-xghpg 1/1 Running 0 58m netobserv-plugin-7c678dfc6b-jxxgk 1/1 Running 0 54m [root@rdr-noo-415-rc5-bastion-0 ~]# #eBPF pods [root@rdr-noo-415-rc5-bastion-0 ~]# oc get po -n netobserv-privileged NAME READY STATUS RESTARTS AGE netobserv-ebpf-agent-4mjf9 0/1 CrashLoopBackOff 15 (3m9s ago) 55m netobserv-ebpf-agent-frmd2 0/1 CrashLoopBackOff 15 (3m6s ago) 55m netobserv-ebpf-agent-nrjrh 0/1 CrashLoopBackOff 15 (3m9s ago) 55m netobserv-ebpf-agent-pfbhv 0/1 CrashLoopBackOff 15 (3m48s ago) 55m netobserv-ebpf-agent-rrmxq 0/1 CrashLoopBackOff 15 (2m57s ago) 55m [root@rdr-noo-415-rc5-bastion-0 ~]# # eBPF pod logs [root@rdr-noo-415-rc5-bastion-0 ~]# oc logs netobserv-ebpf-agent-4mjf9 -n netobserv-privileged time="2024-02-14T10:44:09Z" level=info msg="starting NetObserv eBPF Agent" time="2024-02-14T10:44:09Z" level=info msg="initializing Flows agent" component=agent.Flows time="2024-02-14T10:44:09Z" level=fatal msg="can't instantiate NetObserv eBPF Agent" error="failed to attach the BPF program to tcpReceiveFentry: create raw tracepoint: not supported" [root@rdr-noo-415-rc5-bastion-0 ~]# Currenly this issue producing only on ppc64le for 4.12 to 4.15 OCP versions
flowcollector CRD:
[root@rdr-noo-415-rc5-bastion-0 ~]# oc get flowcollector cluster -n netobserv -o yaml apiVersion: flows.netobserv.io/v1beta2 kind: FlowCollector metadata: annotations: flows.netobserv.io/flowcollectorlegacy-namespace: netobserv flows.netobserv.io/flpparent-namespace: netobserv kubectl.kubernetes.io/last-applied-configuration: | {"apiVersion":"flows.netobserv.io/v1beta2","kind":"FlowCollector","metadata":{"annotations":{},"name":"cluster"},"spec":{"agent":{"ebpf":{"cacheActiveTimeout":"5s","cacheMaxFlows":100000,"excludeInterfaces":["lo"],"features":["FlowRTT"],"imagePullPolicy":"IfNotPresent","kafkaBatchSize":10485760,"logLevel":"info","resources":{"limits":{"memory":"800Mi"},"requests":{"cpu":"100m","memory":"50Mi"}},"sampling":1},"ipfix":{"cacheActiveTimeout":"20s","cacheMaxFlows":400,"clusterNetworkOperator":{"namespace":"openshift-network-operator"},"forceSampleAll":false,"ovnKubernetes":{"containerName":"ovnkube-node","daemonSetName":"ovnkube-node","namespace":"ovn-kubernetes"},"sampling":400},"type":"eBPF"},"consolePlugin":{"advanced":{"port":9001,"register":true},"autoscaler":{"maxReplicas":3,"metrics":[{"resource":{"name":"cpu","target":{"averageUtilization":50,"type":"Utilization"}},"type":"Resource"}],"minReplicas":1,"status":"Disabled"},"imagePullPolicy":"IfNotPresent","logLevel":"debug","portNaming":{"enable":true,"portNames":{"3100":"loki"}},"quickFilters":[{"default":true,"filter":{"dst_namespace!":"openshift-,netobserv","src_namespace!":"openshift-,netobserv"},"name":"Applications"},{"filter":{"dst_namespace":"openshift-,netobserv","src_namespace":"openshift-,netobserv"},"name":"Infrastructure"},{"default":true,"filter":{"dst_kind":"Pod","src_kind":"Pod"},"name":"Pods network"},{"filter":{"dst_kind":"Service"},"name":"Services network"}],"replicas":1,"resources":{"limits":{"memory":"100Mi"},"requests":{"cpu":"100m","memory":"50Mi"}}},"deploymentModel":"Direct","exporters":[],"kafka":{"address":"kafka-cluster-kafka-bootstrap.netobserv","tls":{"caCert":{"certFile":"ca.crt","name":"kafka-cluster-cluster-ca-cert","type":"secret"},"enable":false,"insecureSkipVerify":false,"userCert":{"certFile":"user.crt","certKey":"user.key","name":"flp-kafka","type":"secret"}},"topic":"network-flows"},"loki":{"advanced":{"writeMaxBackoff":"5s","writeMinBackoff":"1s"},"enable":true,"lokiStack":{"name":"lokistack"},"maxRetries":2,"mode":"LokiStack","writeBatchSize":10485760,"writeBatchWait":"1s"},"namespace":"netobserv","processor":{"advanced":{"dropUnusedFields":true,"enableKubeProbes":true,"healthPort":8080,"port":2055},"imagePullPolicy":"IfNotPresent","kafkaConsumerAutoscaler":{"maxReplicas":0,"status":"Disabled"},"kafkaConsumerBatchSize":10485760,"kafkaConsumerQueueCapacity":1000,"kafkaConsumerReplicas":3,"logLevel":"info","metrics":{"includeList":["node_ingress_bytes_total","workload_ingress_bytes_total","namespace_flows_total","node_rtt_seconds","namespace_rtt_seconds","workload_rtt_seconds"],"server":{"port":9102,"tls":{"type":"Disabled"}}},"resources":{"limits":{"memory":"800Mi"},"requests":{"cpu":"100m","memory":"100Mi"}}}}} creationTimestamp: "2024-02-14T09:52:10Z" finalizers: - flows.netobserv.io/finalizer generation: 4 name: cluster resourceVersion: "952785" uid: 5f6d3d13-d559-4ff2-86dd-4308adbc6e73 spec: agent: ebpf: cacheActiveTimeout: 5s cacheMaxFlows: 100000 excludeInterfaces: - lo features: - FlowRTT imagePullPolicy: IfNotPresent kafkaBatchSize: 10485760 logLevel: info resources: limits: memory: 800Mi requests: cpu: 100m memory: 50Mi sampling: 1 ipfix: cacheActiveTimeout: 20s cacheMaxFlows: 400 clusterNetworkOperator: namespace: openshift-network-operator ovnKubernetes: containerName: ovnkube-node daemonSetName: ovnkube-node namespace: ovn-kubernetes sampling: 400 type: eBPF consolePlugin: autoscaler: maxReplicas: 3 metrics: - resource: name: cpu target: averageUtilization: 50 type: Utilization type: Resource minReplicas: 1 status: Disabled enable: true imagePullPolicy: IfNotPresent logLevel: debug portNaming: enable: true portNames: "3100": loki quickFilters: - default: true filter: dst_namespace!: openshift-,netobserv src_namespace!: openshift-,netobserv name: Applications - filter: dst_namespace: openshift-,netobserv src_namespace: openshift-,netobserv name: Infrastructure - default: true filter: dst_kind: Pod src_kind: Pod name: Pods network - filter: dst_kind: Service name: Services network replicas: 1 resources: limits: memory: 100Mi requests: cpu: 100m memory: 50Mi deploymentModel: Direct exporters: [] kafka: address: kafka-cluster-kafka-bootstrap.netobserv sasl: clientIDReference: {} clientSecretReference: {} type: Disabled tls: caCert: certFile: ca.crt name: kafka-cluster-cluster-ca-cert type: secret userCert: certFile: user.crt certKey: user.key name: flp-kafka type: secret topic: network-flows loki: enable: true lokiStack: name: lokistack manual: authToken: Disabled ingesterUrl: http://loki:3100/ querierUrl: http://loki:3100/ statusTls: caCert: {} userCert: {} tenantID: netobserv tls: caCert: {} userCert: {} microservices: ingesterUrl: http://loki-distributor:3100/ querierUrl: http://loki-query-frontend:3100/ tenantID: netobserv tls: caCert: {} userCert: {} mode: LokiStack monolithic: tenantID: netobserv tls: caCert: {} userCert: {} url: http://loki:3100/ readTimeout: 30s writeBatchSize: 10485760 writeBatchWait: 1s writeTimeout: 10s namespace: netobserv processor: imagePullPolicy: IfNotPresent kafkaConsumerAutoscaler: maxReplicas: 0 status: Disabled kafkaConsumerBatchSize: 10485760 kafkaConsumerQueueCapacity: 1000 kafkaConsumerReplicas: 3 logLevel: info logTypes: Flows metrics: includeList: - node_ingress_bytes_total - workload_ingress_bytes_total - namespace_flows_total - node_rtt_seconds - namespace_rtt_seconds - workload_rtt_seconds server: port: 9102 tls: type: Disabled multiClusterDeployment: false resources: limits: memory: 800Mi requests: cpu: 100m memory: 100Mi status: conditions: - lastTransitionTime: "2024-02-14T09:52:20Z" message: 4 ready components, 0 with failure, 0 pending reason: Ready status: "True" type: Ready - lastTransitionTime: "2024-02-14T09:52:20Z" message: "" reason: Ready status: "True" type: FlowCollectorLegacyReady - lastTransitionTime: "2024-02-14T09:52:11Z" message: "" reason: Ready status: "True" type: MonitoringReady - lastTransitionTime: "2024-02-14T11:40:43Z" message: "" reason: Ready status: "True" type: FLPParentReady - lastTransitionTime: "2024-02-14T09:52:13Z" message: "" reason: Ready status: "True" type: FLPMonolithReady - lastTransitionTime: "2024-02-14T09:52:11Z" message: Transformer only used with Kafka reason: ComponentUnused status: Unknown type: FLPTransformOnlyReady - lastTransitionTime: "2024-02-14T09:52:11Z" message: Ingester only used with Kafka and without eBPF reason: ComponentUnused status: Unknown type: FLPIngestOnlyReady [root@rdr-noo-415-rc5-bastion-0 ~]#
Steps to Reproduce:
Install the netobserv operator v1.5.0 with the latest build and deploy the flowcollector with the below changes for enabling the flowRTT feature: 1. Enable the flowRTT to the eBPF agent spec.ebpf.features - FlowRTT 2. Add the following items to metrics list spec.processor.metrics.includeList: - node_rtt_seconds - namespace_rtt_seconds - workload_rtt_seconds
Actual results:
Unable to get the data in the metric panels on the network-traffic page.
Expected results:
Should be able to see the flowlogs data on network-traffic and flowRTT panels.