Uploaded image for project: 'Distributed Tracing'
  1. Distributed Tracing
  2. TRACING-5478

TempoStack refuses to accept logs sent by OTEL collector on grpc

XMLWordPrintable

    • Icon: Task Task
    • Resolution: Done
    • Icon: Undefined Undefined
    • None
    • rhosdt-3.6
    • Tempo
    • None
    • Tracing Sprint # 275, Tracing Sprint # 276
    • Low

      the exact error one receives is 

       

      2025-07-03T10:37:18.335Z    error    internal/queue_sender.go:57    Exporting failed. Dropping data.    {"resource": {}, "otelcol.component.id": "otlp", "otelcol.component.kind": "exporter", "otelcol.signal": "logs", "error": "not retryable error: Permanent error: rpc error: code = PermissionDenied desc = method never permitted", "dropped_items": 3} 

       

      Debugging with rhn-support-dgautam and with the new Red Hat Build Of Telemetry collector we tried to forward application generated logs in OTLP through a collector to a DR TempoStack.

      Reproducer:

      TempoStack CR spec

      spec:
        observability:
          grafana:
            instanceSelector: {}
          metrics: {}
          tracing:
            jaeger_agent_endpoint: 'localhost:6831'
            otlp_http_endpoint: 'http://localhost:4320'
        timeout: 30s
        resources: {}
        search:
          defaultResultLimit: 20
          maxDuration: 0s
        tenants:
          authentication:
            - tenantId: 1610b0c3-c509-4592-a256-a1871353dbfa
              tenantName: user
            - tenantId: 1610b0c3-c509-4592-a256-a1871353dbfb
              tenantName: platform
          mode: openshift
        managementState: Managed
        limits:
          global:
            ingestion: {}
            query:
              maxSearchDuration: 0s
        serviceAccount: tempo-tempo-observe
        images: {}
        template:
          compactor:
            replicas: 1
          distributor:
            component:
              replicas: 1
            tls:
              enabled: false
          gateway:
            component:
              replicas: 1
            enabled: true
            ingress:
              route:
                termination: reencrypt
              type: route
            rbac:
              enabled: false
          ingester:
            replicas: 1
          querier:
            replicas: 1
          queryFrontend:
            component:
              replicas: 1
            jaegerQuery:
              enabled: true
              ingress:
                route: {}
              monitorTab:
                enabled: true
                prometheusEndpoint: 'https://thanos-querier.openshift-monitoring.svc.cluster.local:9091'
              servicesQueryDuration: 72h0m0s
              tempoQuery: {}
        replicationFactor: 1
        storage:
          secret:
            name: tempo-s3
            type: s3
          tls:
            enabled: true
        storageSize: 1Gi
        hashRing:
          memberlist: {}
        retention:
          global:
            traces: 48h0m0s

      OTEL collector CR spec

      spec:
        observability:
          metrics:
            enableMetrics: true
        deploymentUpdateStrategy: {}
        config:
          connectors:
            spanmetrics:
              dimensions:
                - name: k8s.namespace.name
              metrics_flush_interval: 5s
          exporters:
            debug:
              sampling_initial: 5
              sampling_thereafter: 200
              verbosity: detailed
            otlp:
              auth:
                authenticator: bearertokenauth
              endpoint: 'tempo-tempo-observe-gateway.tempo2.svc.cluster.local:8090'
              headers:
                X-Scope-OrgID: user
              sending_queue:
                queue_size: 150000
              tls:
                ca_file: /var/run/secrets/kubernetes.io/serviceaccount/service-ca.crt
            prometheus:
              add_metric_suffixes: false
              endpoint: '0.0.0.0:8889'
              resource_to_telemetry_conversion:
                enabled: true
          extensions:
            bearertokenauth:
              filename: /var/run/secrets/kubernetes.io/serviceaccount/token
          processors:
            k8sattributes: {}
          receivers:
            jaeger:
              protocols:
                thrift_compact:
                  endpoint: '0.0.0.0:6831'
            otlp:
              protocols:
                grpc:
                  endpoint: '0.0.0.0:4317'
                http:
                  endpoint: '0.0.0.0:4318'
          service:
            extensions:
              - bearertokenauth
            pipelines:
              logs:
                exporters:
                  - debug
                  - otlp
                receivers:
                  - otlp
              metrics:
                exporters:
                  - prometheus
                receivers:
                  - spanmetrics
              traces:
                exporters:
                  - otlp
                  - spanmetrics
                processors:
                  - k8sattributes
                receivers:
                  - otlp
                  - jaeger
            telemetry:
              metrics:
                address: '0.0.0.0:8888'
                readers:
                  - pull:
                      exporter:
                        prometheus:
                          host: 0.0.0.0
                          port: 8888
        mode: deployment
        resources: {}
        podDnsConfig: {}
        managementState: managed
        upgradeStrategy: automatic
        ingress:
          route: {}
        daemonSetUpdateStrategy: {}
        targetAllocator:
          allocationStrategy: consistent-hashing
          collectorNotReadyGracePeriod: 30s
          filterStrategy: relabel-config
          observability:
            metrics: {}
          prometheusCR:
            scrapeInterval: 30s
          resources: {}
        replicas: 1
        ipFamilyPolicy: SingleStack
        configVersions: 1 

      Creating logs with an Application that exports them through OTLP, we can see the error message reported.

      The Logs themself are accepted by the Collector (removing the tempoStack from the pipeline output's debug but does not report that error)

      We also initially tried to set different ClusterRole permissions to the collector serviceAccount but reading the message twice show's even though it sounds like an RBAC issue it is a GRPC method call issue. (cluster-admin was assigned to the SA to verify, still Permission Denied)

      Verifying that Tempo has the necessary configuration we looked into the `distributor` configuration applied through the Operator specs which show's the necessary OTLP receiver configuration accordingly 

      distributor:
        receivers:
          otlp:
            protocols:
              grpc:
                endpoint: 0.0.0.0:4317
                tls:
                  client_ca_file:  /var/run/ca/service-ca.crt
                  cert_file: /var/run/tls/server/tls.crt
                  key_file: /var/run/tls/server/tls.key
                  min_version: "1.3"
              http:
                endpoint: 0.0.0.0:4318
                tls:
                  client_ca_file:  /var/run/ca/service-ca.crt
                  cert_file: /var/run/tls/server/tls.crt
                  key_file: /var/run/tls/server/tls.key
                  min_version: "1.3" 

       

              agerstma@redhat.com Andreas Gerstmayr
              rhn-support-milang Michaela Lang
              Votes:
              0 Vote for this issue
              Watchers:
              4 Start watching this issue

                Created:
                Updated:
                Resolved: