After installing Red Hat OpenShift distributed tracing platform in version 1.29.1 OpenShift Container Platform 4 - Monitoring is firing the alert TargetDown because Jaeger Operator metrics can be scrapped.
{"data":[{"alerts":[{"activeAt":"2022-02-09T15:19:05Z","annotations":{"description":"100% of the jaeger-operator-metrics/jaeger-operator-metrics targets in openshift-operators namespace have been unreachable for more than 15 minutes. This may be a symptom of network connectivity issues, down nodes, or failures within these components. Assess the health of the infrastructure and nodes running these targets and then contact support.","summary":"Some targets were not reachable from the monitoring server for an extended period of time."},"labels":{"alertname":"TargetDown","job":"jaeger-operator-metrics","namespace":"openshift-operators","service":"jaeger-operator-metrics","severity":"warning"},"state":"firing","value":"1e+02"}],"annotations":{"description":"{{ printf \"%.4g\" $value }}% of the {{ $labels.job }}/{{ $labels.service }} targets in {{ $labels.namespace }} namespace have been unreachable for more than 15 minutes. This may be a symptom of network connectivity issues, down nodes, or failures within these components. Assess the health of the infrastructure and nodes running these targets and then contact support.","summary":"Some targets were not reachable from the monitoring server for an extended period of time."},"duration":900,"evaluationTime":0.003437612,"health":"ok","labels":{"severity":"warning"},"lastEvaluation":"2022-02-11T11:07:05.422800701Z","name":"TargetDown","query":"100 * (count by(job, namespace, service) (up == 0 unless on(node) max by(node) (kube_node_spec_unschedulable == 1)) / count by(job, namespace, service) (up unless on(node) max by(node) (kube_node_spec_unschedulable == 1))) \u003e 10","state":"firing","type":"alerting"}]}
Problem is that in kube-rbac-proxy container the "--upstream=http://127.0.0.1:8080/", is set to port 8080 while the jaeger-operator is listening on port 8383 for serving metrics:
$ oc get deployment jaeger-operator -o json | jq '.spec.template.spec' { "containers": [ { "args": [ "start", "--leader-elect", "--jaeger-agent-image=registry.redhat.io/rhosdt/jaeger-agent-rhel8@sha256:7101e64a92c126c2fbc9ae6dd44828505b062785fd1a1563c1c32f61ab36ca59", "--jaeger-query-image=registry.redhat.io/rhosdt/jaeger-query-rhel8@sha256:7322390e980b88cf5dcd95da070e0010c700fdf89c51e35dd93af18bec201dcc", "--jaeger-collector-image=registry.redhat.io/rhosdt/jaeger-collector-rhel8@sha256:6cc9103c02899375d5dc2bf0551ac7d560c38cf2329122f86d878f57f3d35629", "--jaeger-ingester-image=registry.redhat.io/rhosdt/jaeger-ingester-rhel8@sha256:e7e9d4fafc394d475ea97f52b35b306cace6be9a41e0b98d2ad093e32dcfa1ee", "--jaeger-all-in-one-image=registry.redhat.io/rhosdt/jaeger-all-in-one-rhel8@sha256:3701077d113120f3280df17b6d3e71239e3fa6a3d1e077a910328f89d57b869f", "--jaeger-es-index-cleaner-image=registry.redhat.io/rhosdt/jaeger-es-index-cleaner-rhel8@sha256:7e165c8cfebb53d6feee9ff0a3b59eb15bf44449451edd4a5ba50a64d60d49eb", "--jaeger-es-rollover-image=registry.redhat.io/rhosdt/jaeger-es-rollover-rhel8@sha256:9b67dc95983853f1597e225eb54f6caceb1c116c7f4d649dbc876b3de84e6ff9", "--openshift-oauth-proxy-image=registry.redhat.io/openshift4/ose-oauth-proxy:latest", "--openshift-oauth-proxy-imagestream-ns=openshift", "--openshift-oauth-proxy-imagestream-name=oauth-proxy", "--documentation-url=https://access.redhat.com/documentation/en-us/openshift_container_platform/4.9/html/distributed_tracing/index" ], "env": [ { "name": "WATCH_NAMESPACE", "valueFrom": { "fieldRef": { "apiVersion": "v1", "fieldPath": "metadata.annotations['olm.targetNamespaces']" } } }, { "name": "POD_NAME", "valueFrom": { "fieldRef": { "apiVersion": "v1", "fieldPath": "metadata.name" } } }, { "name": "POD_NAMESPACE", "valueFrom": { "fieldRef": { "apiVersion": "v1", "fieldPath": "metadata.namespace" } } }, { "name": "OPERATOR_NAME", "value": "jaeger-operator" }, { "name": "OPERATOR_CONDITION_NAME", "value": "jaeger-operator.v1.29.1" } ], "image": "registry.redhat.io/rhosdt/jaeger-rhel8-operator@sha256:07b7811edb93d5181c062101269e216c96598ee2db97f158fcffee59a89e7e81", "imagePullPolicy": "Always", "name": "jaeger-operator", "ports": [ { "containerPort": 8383, "name": "http-metrics", "protocol": "TCP" }, { "containerPort": 8686, "name": "cr-metrics", "protocol": "TCP" } ], "resources": {}, "terminationMessagePath": "/dev/termination-log", "terminationMessagePolicy": "File" }, { "args": [ "--secure-listen-address=0.0.0.0:8443", "--upstream=http://127.0.0.1:8080/", "--logtostderr=true", "--v=10" ], "env": [ { "name": "OPERATOR_CONDITION_NAME", "value": "jaeger-operator.v1.29.1" } ], "image": "registry.redhat.io/openshift4/ose-kube-rbac-proxy@sha256:7a7145e20786d051ce14f0b566481ffe5dc71324acc1dffae31dca84dcbf0e24", "imagePullPolicy": "IfNotPresent", "name": "kube-rbac-proxy", "ports": [ { "containerPort": 8443, "name": "https", "protocol": "TCP" } ], "resources": {}, "terminationMessagePath": "/dev/termination-log", "terminationMessagePolicy": "File" } ], "dnsPolicy": "ClusterFirst", "restartPolicy": "Always", "schedulerName": "default-scheduler", "securityContext": {}, "serviceAccount": "jaeger-operator", "serviceAccountName": "jaeger-operator", "terminationGracePeriodSeconds": 30 }
Changing the port to 8383 in the CSV jaeger-operator.v1.29.1 solves the problem. But obviously this is not persistent and thus needs to be addressed accordingly.