Uploaded image for project: 'OpenShift Logging'
  1. OpenShift Logging
  2. LOG-3953

Ruler does not restart after updates to RulerConfig CR.

XMLWordPrintable

    • False
    • None
    • False
    • NEW
    • OBSDA-115 - Create alerting rules based on logs
    • VERIFIED
    • Before this update, the LokiStack ruler did not restart caused when the administrator change the RulerConfig custom resource. With this update, the Loki Operator restarts the Ruler pods on RulerConfig changes resolves the issue.
    • Bug Fix
    • Log Storage - Sprint 235, Log Storage - Sprint 236, Log Storage - Sprint 237, Log Storage - Sprint 238

      LO does not reconcile loki ruler after updates are made to Ruler Config. As a result, for e.g. if changes to alert relabeling are made, AM still shows the pre-change data. Need to manually restart the ruler for changes to be effective.

      Steps to reproduce:
      1) Deploy CLO and EO
      2) Forward logs to Loki
      3) Create Alerting rules for a tenant
      4) Create RulerConfig CR with alert relabeling configuration.
      5) Query AM for alerts
      6) Update some labels in RulerConfig
      7) Query AM after some time

      RulerConfig CR: http://pastebin.test.redhat.com/1097239

      runtime-config before updating relabeling rules

      —
      overrides:
        application:
          ruler_alertmanager_config:
            alertmanager_url: https://_web._tcp.alertmanager-operated.openshift-user-workload-monitoring.svc
            enable_alertmanager_v2: true
            enable_alertmanager_discovery: true
            alertmanager_refresh_interval: 1m
            alertmanager_client:
              tls_ca_path: /var/run/ca/alertmanager/service-ca.crt
              tls_server_name: alertmanager-user-workload.openshift-user-workload-monitoring.svc.cluster.local
              type: Bearer
              credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token
            alert_relabel_configs:
            -
              source_labels: ["severity"]
              regex: info
              action: replace
              separator: ;
              replacement: critical
              target_label: severity

      runtime-config after updating relabeling rules.

      —
      overrides:
        application:
          ruler_alertmanager_config:
            alertmanager_url: https://_web._tcp.alertmanager-operated.openshift-user-workload-monitoring.svc
            enable_alertmanager_v2: true
            enable_alertmanager_discovery: true
            alertmanager_refresh_interval: 1m
            alertmanager_client:
              tls_ca_path: /var/run/ca/alertmanager/service-ca.crt
              tls_server_name: alertmanager-user-workload.openshift-user-workload-monitoring.svc.cluster.local
              type: Bearer
              credentials_file: /var/run/secrets/kubernetes.io/serviceaccount/token
            alert_relabel_configs:
            -
              source_labels: ["severity","project"]
              regex: info;my-app-1
              action: replace
              separator: ;
              replacement: uwmlogs-relabeled
              target_label: project

      AM Query still shows pre-update data

      $ oc -n openshift-monitoring exec -c prometheus prometheus-k8s-0 – curl -k -H "Authorization: Bearer $(oc whoami -t)" 'https://alertmanager-user-workload.openshift-user-workload-monitoring.svc:9095/api/v2/alerts' | jq
       {
          "annotations":
      {       "description": "My application 1 has high amount of logs.",       "summary": "project \"my-app-1\" log volume is high."     }
      ,
          "endsAt": "2023-04-13T22:04:20.791Z",
          "fingerprint": "32c2eee0f392b57f",
          "receivers": [
           
      {         "name": "Default"       }
          ],
          "startsAt": "2023-04-13T21:48:20.791Z",
          "status":
      {       "inhibitedBy": [],       "silencedBy": [],       "state": "active"     }
      ,
          "updatedAt": "2023-04-13T22:00:20.812Z",
          "generatorURL": "/graph?g0.expr=%28count_over_time%28%7Bkubernetes_namespace_name%3D%22my-app-1%22%7D%5B2m%5D%29+%3E+10%29&g0.tab=1",
          "labels":
      {       "alertname": "MyApplication1LogVolumeIsHigh",       "environment": "prod",       "kubernetes_container_name": "centos-logtest",       "kubernetes_host": "kbharti-0413-gcp3-gdc8g-worker-a-5ndxk.c.openshift-qe.internal",       "kubernetes_namespace_name": "my-app-1",       "kubernetes_pod_name": "centos-logtest-pwndm",       "log_type": "application",       "project": "my-app-1",       "region": "us-east-1",       "severity": "critical",       "tenantId": "application"     }
        }
      ]
      

      After manual restart of ruler

        {
          "annotations": {
            "description": "My application 1 has high amount of logs.",
            "summary": "project \"my-app-1\" log volume is high."
          },
          "endsAt": "2023-04-13T22:15:20.791Z",
          "fingerprint": "a0dc6f48180befe9",
          "receivers": [
            {
              "name": "Default"
            }
          ],
          "startsAt": "2023-04-13T22:09:20.791Z",
          "status": {
            "inhibitedBy": [],
            "silencedBy": [],
            "state": "active"
          },
          "updatedAt": "2023-04-13T22:11:20.800Z",
          "generatorURL": "/graph?g0.expr=%28count_over_time%28%7Bkubernetes_namespace_name%3D%22my-app-1%22%7D%5B2m%5D%29+%3E+10%29&g0.tab=1",
          "labels": {
            "alertname": "MyApplication1LogVolumeIsHigh",
            "environment": "prod",
            "kubernetes_container_name": "centos-logtest",
            "kubernetes_host": "kbharti-0413-gcp3-gdc8g-worker-a-5ndxk.c.openshift-qe.internal",
            "kubernetes_namespace_name": "my-app-1",
            "kubernetes_pod_name": "centos-logtest-pwndm",
            "log_type": "application",
            "project": "uwmlogs-relabeled",
            "region": "us-east-1",
            "severity": "info",
            "tenantId": "application"
          }
        }
      ] 
      

      Version: OCP 4.13, Logging 5.7

      How reproducible: always

      Expected Result: Updated relabel rules should be applied on the AM

      Actual Result: AM still shows pre-change/update labels.

              btaani@redhat.com Bayan Taani (Inactive)
              rhn-support-kbharti Kabir Bharti
              Kabir Bharti Kabir Bharti
              Votes:
              0 Vote for this issue
              Watchers:
              3 Start watching this issue

                Created:
                Updated:
                Resolved: