Uploaded image for project: 'Distributed Tracing'
  1. Distributed Tracing
  2. TRACING-5708

OpenTelemetryCollector mutating webook not working as expected if used with another mutating webhook technology

XMLWordPrintable

    • Tracing Sprint # 279

      As seen with KNative (OpenShift Serverless), when using the KNative mutating hooks to inject KNative serving functions any opentelemetry collector mutation is dropped and therefor fails on several occasions.

      Reproducer

      • installing OpenShift Serverless operator
      oc create namespace openshift-serverless
      
      cat <<'EOF' | oc -n openshift-serverless create -f- 
      apiVersion: operators.coreos.com/v1alpha1
      kind: Subscription
      metadata:
        labels:
          operators.coreos.com/serverless-operator.openshift-serverless: ''
        name: serverless-operator
        namespace: openshift-serverless
      spec:
        channel: stable
        installPlanApproval: Automatic
        name: serverless-operator
        source: redhat-operators
        sourceNamespace: openshift-marketplace
        startingCSV: serverless-operator.v1.36.1
      EOF
      •  Instanziating KNative-serving

       

      cat <<'EOF' | oc -n knative-serving create -f-
      apiVersion: operator.knative.dev/v1beta1
      kind: KnativeServing
      metadata:
        name: knative-serving
        namespace: knative-serving
      spec:
        high-availability:
          replicas: 1
        ingress:
          contour:
            enabled: false
          istio:
            enabled: false
          kourier:
            enabled: true
        registry: {}
      EOF

       

      • Adding the necessary Trace ClusterRoleBinding for the otel-collector ServiceAccount

       

      cat <<'EOF' | oc create -f-
      apiVersion: rbac.authorization.k8s.io/v1
      kind: ClusterRoleBinding
      metadata:
        name: traces-writer-user-knative
      roleRef:
        apiGroup: rbac.authorization.k8s.io
        kind: ClusterRole
        name: traces-writer-user
      subjects:
        - kind: ServiceAccount
          name: otel-collector
          namespace: us
      EOF

       

      • Deploying an OpenTelemetryCollector in sidecar to be utilized with the KNative function

       

      oc create namespace us 
      cat <<'EOF' | oc -n us create -f-
      apiVersion: opentelemetry.io/v1beta1
      kind: OpenTelemetryCollector
      metadata:
        name: otel
        namespace: us
      spec:
        config:
          exporters:
            debug:
              sampling_initial: 5
              sampling_thereafter: 200
              verbosity: detailed
            otlp/tempo2:
              auth:
                authenticator: bearertokenauth
              endpoint: tempo-tempo-observe-gateway.tempo2.svc.cluster.local:8090
              headers:
                X-Scope-OrgID: user
              tls:
                ca_file: /var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt
            otlphttp/loki:
              endpoint: https://loki.apps.example.com/otlp
              tls:
                insecure_skip_verify: false
                ca_file: /var/conf/certs/ca-bundle.crt/configmap-example-ca-trustbundle/ca-bundle.crt
           extensions:
            bearertokenauth:
              filename: /var/run/secrets/kubernetes.io/serviceaccount/token
          processors:
            batch: {}
            memory_limiter:
              check_interval: 1s
              limit_mib: 1000
              spike_limit_percentage: 10
          receivers:
            otlp:
              protocols:
                grpc:
                  endpoint: 0.0.0.0:4317
          service:
            extensions:
              - bearertokenauth
            pipelines:
              logs:
                exporters:
                  - debug
                  - otlphttp/loki
                receivers:
                  - otlp
              traces:
                exporters:
                  - debug
                  - otlp/tempo2
                processors:
                  - memory_limiter
                  - batch
                receivers:
                  - otlp
            telemetry:
              metrics:
                readers:
                  - pull:
                      exporter:
                        prometheus:
                          host: 0.0.0.0
                          port: 8888
        configVersions: 3
        daemonSetUpdateStrategy: {}
        deploymentUpdateStrategy: {}
        ingress:
          route: {}
        ipFamilyPolicy: SingleStack
        managementState: managed
        mode: sidecar
        networkPolicy:
          enabled: true
        observability:
          metrics: {}
        podDnsConfig: {}
        ports:
          - appProtocol: grpc
            name: otlp
            port: 4317
            protocol: TCP
            targetPort: 4317
          - appProtocol: http
            name: otlp-http
            port: 4318
            protocol: TCP
            targetPort: 4318
        replicas: 1
        resources:
          limits:
            cpu: 200m
            memory: 128Mi
          requests:
            cpu: 10m
            memory: 32Mi
        targetAllocator:
          allocationStrategy: consistent-hashing
          collectorNotReadyGracePeriod: 30s
          collectorTargetReloadInterval: 30s
          filterStrategy: relabel-config
          observability:
            metrics: {}
          prometheusCR:
            scrapeInterval: 30s
          resources: {}
        upgradeStrategy: automatic
        configmaps:
          - mountpath: /certs/ca-bundle.crt
            name: example-ca-trustbundle
      EOF
      • creating the KNative function with following command

       

      kn -n us service apply s0 --scale-min=1 \
       --image=quay.example.com/infrastructure/mockbin:aio-1.0.1 \
       -a 'queue.sidecar.serving.knative.dev/cpu-resource-request=10m' \
       -a 'queue.sidecar.serving.knative.dev/cpu-resource-limit=500m' \
       -a 'queue.sidecar.serving.knative.dev/memory-resource-request=32Mi' \
       -a 'queue.sidecar.serving.knative.dev/memory-resource-limit=512Mi' \
       --request='cpu=10m,memory=32Mi' \
       --limit='cpu=500m,memory=128Mi' \
       -a 'sidecar.opentelemetry.io/inject=true' 

      will fail as KNative serving will not be able to process the pod startup as the OTC init container fails with a missing volume which is the `configMap` volume needed for CA trust established exporter sessions.

       

      Now removing the `configMap` and the `ca_file` and instead using `insecure_skip_verify: true` instead still ends the collector with `permission denied` dropping any trace as the `serviceAccount` is set by the KNative mutating webhook and therefor the ClusterRoleBinding needs to be adjusted with the `default` serviceAccount

      • replace the ClusterRoleBinding with the default serviceAccount 
      cat <<'EOF' | oc replace -f-
      apiVersion: rbac.authorization.k8s.io/v1
      kind: ClusterRoleBinding
      metadata:
        name: traces-writer-user-knative
      roleRef:
        apiGroup: rbac.authorization.k8s.io
        kind: ClusterRole
        name: traces-writer-user
      subjects:
        - kind: ServiceAccount
          name: default
          namespace: us
      EOF 

       

              ploffay@redhat.com Pavol Loffay
              rhn-support-milang Michaela Lang
              Votes:
              0 Vote for this issue
              Watchers:
              2 Start watching this issue

                Created:
                Updated:
                Resolved: