ts=2022-09-26T17:24:55.137Z caller=main.go:552 level=info msg="Starting Prometheus Server" mode=server version="(version=2.36.2, branch=rhaos-4.11-rhel-8, revision=0d81ba04ce410df37ca2c0b1ec619e1bc02e19ef)" ts=2022-09-26T17:24:55.137Z caller=main.go:557 level=info build_context="(go=go1.18.4, user=root@371541f17026, date=20220916-14:15:37)" ts=2022-09-26T17:24:55.137Z caller=main.go:558 level=info host_details="(Linux 4.18.0-372.26.1.el8_6.x86_64 #1 SMP Sat Aug 27 02:44:20 EDT 2022 x86_64 prometheus-k8s-1 (none))" ts=2022-09-26T17:24:55.137Z caller=main.go:559 level=info fd_limits="(soft=1048576, hard=1048576)" ts=2022-09-26T17:24:55.137Z caller=main.go:560 level=info vm_limits="(soft=unlimited, hard=unlimited)" ts=2022-09-26T17:24:55.138Z caller=web.go:553 level=info component=web msg="Start listening for connections" address=127.0.0.1:9090 ts=2022-09-26T17:24:55.138Z caller=main.go:989 level=info msg="Starting TSDB ..." ts=2022-09-26T17:24:55.139Z caller=tls_config.go:231 level=info component=web msg="TLS is disabled." http2=false ts=2022-09-26T17:24:55.139Z caller=repair.go:56 level=info component=tsdb msg="Found healthy block" mint=1663015386574 maxt=1663027200000 ulid=01GCTKHBKEFXSYY1JBSQHF4T7H ts=2022-09-26T17:24:55.140Z caller=repair.go:56 level=info component=tsdb msg="Found healthy block" mint=1663027200101 maxt=1663092000000 ulid=01GCWHDDPFEQ2M34Q1YHMPNM7S ts=2022-09-26T17:24:55.140Z caller=repair.go:56 level=info component=tsdb msg="Found healthy block" mint=1663092000000 maxt=1663156800000 ulid=01GCY89FDFA33PZXCQDWYE0TWM ts=2022-09-26T17:24:55.141Z caller=repair.go:56 level=info component=tsdb msg="Found healthy block" mint=1663156800000 maxt=1663214400000 ulid=01GD4FCJ3R2QXB1MNBT4PGRXAY ts=2022-09-26T17:24:55.141Z caller=repair.go:56 level=info component=tsdb msg="Found healthy block" mint=1663362582236 maxt=1663416000000 ulid=01GD66B1C4F2YAW0XHACSPAB34 ts=2022-09-26T17:24:55.142Z caller=repair.go:56 level=info component=tsdb msg="Found healthy block" mint=1663416000000 maxt=1663480800000 ulid=01GD7X7KV9V6FHX9KFKPNN7G0T ts=2022-09-26T17:24:55.143Z caller=repair.go:56 level=info component=tsdb msg="Found healthy block" mint=1663480800078 maxt=1663545600000 ulid=01GDA1Y4RK365QMCA88KAXSFZQ ts=2022-09-26T17:24:55.143Z caller=repair.go:56 level=info component=tsdb msg="Found healthy block" mint=1663545600135 maxt=1663610400000 ulid=01GDBZRKEWS3S6B3K6Q29NMWD7 ts=2022-09-26T17:24:55.144Z caller=repair.go:56 level=info component=tsdb msg="Found healthy block" mint=1663610400083 maxt=1663675200000 ulid=01GDDXHPH9R57DZSTXZM6PVDB6 ts=2022-09-26T17:24:55.144Z caller=repair.go:56 level=info component=tsdb msg="Found healthy block" mint=1663675200144 maxt=1663740000000 ulid=01GDFV9WY6MQCH1GW360E93BRK ts=2022-09-26T17:24:55.145Z caller=repair.go:56 level=info component=tsdb msg="Found healthy block" mint=1663740000170 maxt=1663804800000 ulid=01GDHS5DXSZCMM6Q1CEK6N4QFR ts=2022-09-26T17:24:55.145Z caller=repair.go:56 level=info component=tsdb msg="Found healthy block" mint=1663804800000 maxt=1663869600000 ulid=01GDKG29JPG3AC9PW69RJ4KPQQ ts=2022-09-26T17:24:55.146Z caller=repair.go:56 level=info component=tsdb msg="Found healthy block" mint=1663869600059 maxt=1663934400000 ulid=01GDNMQEWVXT069HRJ7H44JAAS ts=2022-09-26T17:24:55.146Z caller=repair.go:56 level=info component=tsdb msg="Found healthy block" mint=1663934400073 maxt=1663999200000 ulid=01GDQJG8RKJX13V2NK0Q3DB320 ts=2022-09-26T17:24:55.147Z caller=repair.go:56 level=info component=tsdb msg="Found healthy block" mint=1663999200000 maxt=1664064000000 ulid=01GDS9DAD3KS8RXXK7S24503NA ts=2022-09-26T17:24:55.147Z caller=repair.go:56 level=info component=tsdb msg="Found healthy block" mint=1664064000006 maxt=1664128800000 ulid=01GDVE39EF4TR4HBQHJCJWNW5C ts=2022-09-26T17:24:55.148Z caller=repair.go:56 level=info component=tsdb msg="Found healthy block" mint=1664128800058 maxt=1664150400000 ulid=01GDW2NSEV6CRHCVKRTBTYVC39 ts=2022-09-26T17:24:55.148Z caller=repair.go:56 level=info component=tsdb msg="Found healthy block" mint=1664172000058 maxt=1664179200000 ulid=01GDWGCKXBME10WYE78A1AV6X4 ts=2022-09-26T17:24:55.148Z caller=repair.go:56 level=info component=tsdb msg="Found healthy block" mint=1664179200058 maxt=1664186400000 ulid=01GDWQ8B52K8VH5GRC5FRHVCSB ts=2022-09-26T17:24:55.149Z caller=repair.go:56 level=info component=tsdb msg="Found healthy block" mint=1664150400058 maxt=1664172000000 ulid=01GDWQ955SG3ND1WYBR9F835DZ ts=2022-09-26T17:24:55.150Z caller=repair.go:56 level=info component=tsdb msg="Found healthy block" mint=1664186400058 maxt=1664193600000 ulid=01GDWY426QHVEAR34G94KW5TB9 ts=2022-09-26T17:24:55.150Z caller=repair.go:56 level=info component=tsdb msg="Found healthy block" mint=1664193600000 maxt=1664200800000 ulid=01GDX4ZSHJCX9Z3AAC9BE7K5FK ts=2022-09-26T17:25:04.358Z caller=head.go:493 level=info component=tsdb msg="Replaying on-disk memory mappable chunks if any" ts=2022-09-26T17:25:05.866Z caller=head.go:536 level=info component=tsdb msg="On-disk memory mappable chunks replay completed" duration=1.508205038s ts=2022-09-26T17:25:05.866Z caller=head.go:542 level=info component=tsdb msg="Replaying WAL, this may take a while" ts=2022-09-26T17:27:49.119Z caller=head.go:578 level=info component=tsdb msg="WAL checkpoint loaded" ts=2022-09-26T17:27:54.906Z caller=head.go:613 level=info component=tsdb msg="WAL segment loaded" segment=965 maxSegment=985 ts=2022-09-26T17:28:00.356Z caller=head.go:613 level=info component=tsdb msg="WAL segment loaded" segment=966 maxSegment=985 ts=2022-09-26T17:28:07.545Z caller=head.go:613 level=info component=tsdb msg="WAL segment loaded" segment=967 maxSegment=985 ts=2022-09-26T17:28:17.932Z caller=head.go:613 level=info component=tsdb msg="WAL segment loaded" segment=968 maxSegment=985 ts=2022-09-26T17:28:23.249Z caller=head.go:613 level=info component=tsdb msg="WAL segment loaded" segment=969 maxSegment=985 ts=2022-09-26T17:28:25.348Z caller=head.go:613 level=info component=tsdb msg="WAL segment loaded" segment=970 maxSegment=985 ts=2022-09-26T17:28:30.600Z caller=head.go:613 level=info component=tsdb msg="WAL segment loaded" segment=971 maxSegment=985 ts=2022-09-26T17:28:39.304Z caller=head.go:613 level=info component=tsdb msg="WAL segment loaded" segment=972 maxSegment=985 ts=2022-09-26T17:28:52.469Z caller=head.go:613 level=info component=tsdb msg="WAL segment loaded" segment=973 maxSegment=985 ts=2022-09-26T17:29:01.256Z caller=head.go:613 level=info component=tsdb msg="WAL segment loaded" segment=974 maxSegment=985 ts=2022-09-26T17:29:08.350Z caller=head.go:613 level=info component=tsdb msg="WAL segment loaded" segment=975 maxSegment=985 ts=2022-09-26T17:29:19.696Z caller=head.go:613 level=info component=tsdb msg="WAL segment loaded" segment=976 maxSegment=985 ts=2022-09-26T17:29:29.404Z caller=head.go:613 level=info component=tsdb msg="WAL segment loaded" segment=977 maxSegment=985 ts=2022-09-26T17:29:36.967Z caller=head.go:613 level=info component=tsdb msg="WAL segment loaded" segment=978 maxSegment=985 ts=2022-09-26T17:29:37.734Z caller=head.go:613 level=info component=tsdb msg="WAL segment loaded" segment=979 maxSegment=985 ts=2022-09-26T17:29:54.770Z caller=head.go:613 level=info component=tsdb msg="WAL segment loaded" segment=980 maxSegment=985 ts=2022-09-26T17:30:17.359Z caller=head_wal.go:337 level=warn component=tsdb msg="Unknown series references" samples=33 exemplars=0 ts=2022-09-26T17:30:17.359Z caller=head.go:613 level=info component=tsdb msg="WAL segment loaded" segment=981 maxSegment=985 ts=2022-09-26T17:30:29.668Z caller=head.go:613 level=info component=tsdb msg="WAL segment loaded" segment=982 maxSegment=985 ts=2022-09-26T17:30:56.003Z caller=head.go:613 level=info component=tsdb msg="WAL segment loaded" segment=983 maxSegment=985 ts=2022-09-26T17:31:17.754Z caller=head.go:613 level=info component=tsdb msg="WAL segment loaded" segment=984 maxSegment=985 ts=2022-09-26T17:31:17.754Z caller=head.go:613 level=info component=tsdb msg="WAL segment loaded" segment=985 maxSegment=985 ts=2022-09-26T17:31:17.754Z caller=head.go:619 level=info component=tsdb msg="WAL replay completed" checkpoint_replay_duration=2m43.253036348s wal_replay_duration=3m28.635477051s total_replay_duration=6m13.39681296s ts=2022-09-26T17:31:29.033Z caller=main.go:1010 level=info fs_type=EXT4_SUPER_MAGIC ts=2022-09-26T17:31:29.033Z caller=main.go:1013 level=info msg="TSDB started" ts=2022-09-26T17:31:29.033Z caller=main.go:1194 level=info msg="Loading configuration file" filename=/etc/prometheus/config_out/prometheus.env.yaml ts=2022-09-26T17:31:29.054Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.055Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.055Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.057Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.057Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.057Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.058Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.058Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.058Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.059Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.059Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.059Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.059Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.059Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.060Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.060Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.060Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.060Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.060Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.061Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.061Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.061Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.061Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.061Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.062Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.062Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.062Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.062Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.063Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.066Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.067Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.067Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.067Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.068Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.068Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.068Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.068Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.069Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.069Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.069Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.070Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.070Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.071Z caller=kubernetes.go:325 level=info component="discovery manager scrape" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.071Z caller=kubernetes.go:325 level=info component="discovery manager notify" discovery=kubernetes msg="Using pod service account via in-cluster config" ts=2022-09-26T17:31:29.159Z caller=main.go:1231 level=info msg="Completed loading of configuration file" filename=/etc/prometheus/config_out/prometheus.env.yaml totalDuration=125.216262ms db_storage=1.32µs remote_storage=1.4µs web_handler=620ns query_engine=1.07µs scrape=191.694µs scrape_sd=18.228351ms notify=136.252µs notify_sd=332.266µs rules=87.260692ms tracing=6.41µs ts=2022-09-26T17:31:29.159Z caller=main.go:974 level=info msg="Server is ready to receive web requests." ts=2022-09-26T17:31:29.159Z caller=manager.go:937 level=info component="rule manager" msg="Starting rule manager..." ts=2022-09-26T17:32:45.977Z caller=main.go:824 level=warn msg="Received SIGTERM, exiting gracefully..." ts=2022-09-26T17:32:46.216Z caller=main.go:848 level=info msg="Stopping scrape discovery manager..." ts=2022-09-26T17:32:46.217Z caller=main.go:862 level=info msg="Stopping notify discovery manager..." ts=2022-09-26T17:32:46.217Z caller=manager.go:951 level=info component="rule manager" msg="Stopping rule manager..." ts=2022-09-26T17:32:48.454Z caller=main.go:844 level=info msg="Scrape discovery manager stopped" ts=2022-09-26T17:32:48.423Z caller=main.go:858 level=info msg="Notify discovery manager stopped" ts=2022-09-26T17:34:49.796Z caller=manager.go:634 level=warn component="rule manager" file=/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-kube-apiserver-kube-apiserver-slos-extended-94db1e1b-f9e4-4b01-975a-3e6fa4190a73.yaml group=kube-apiserver.rules name=apiserver_request:burnrate2h index=0 msg="Evaluating rule failed" rule="record: apiserver_request:burnrate2h\nexpr: label_replace(sum(rate(apiserver_request_total{code=~\"5..\",job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))\n / scalar(sum(rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))),\n \"type\", \"error\", \"_none_\", \"\") or label_replace((sum(rate(apiserver_request_duration_seconds_count{job=\"apiserver\",scope=\"resource\",subresource!~\"proxy|log|exec\",verb=~\"LIST|GET\"}[2h]))\n - (sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",le=\"1\",scope=\"resource\",subresource!~\"proxy|log|exec\",verb=~\"LIST|GET\"}[2h]))\n or vector(0))) / scalar(sum(rate(apiserver_request_total{job=\"apiserver\",subresource!~\"proxy|log|exec\",verb=~\"LIST|GET\"}[2h]))),\n \"type\", \"slow-resource\", \"_none_\", \"\") or label_replace((sum(rate(apiserver_request_duration_seconds_count{job=\"apiserver\",scope=\"namespace\",subresource!~\"proxy|log|exec\",verb=~\"LIST|GET\"}[2h]))\n - sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",le=\"5\",scope=\"namespace\",subresource!~\"proxy|log|exec\",verb=~\"LIST|GET\"}[2h])))\n / scalar(sum(rate(apiserver_request_total{job=\"apiserver\",subresource!~\"proxy|log|exec\",verb=~\"LIST|GET\"}[2h]))),\n \"type\", \"slow-namespace\", \"_none_\", \"\") or label_replace((sum(rate(apiserver_request_duration_seconds_count{job=\"apiserver\",scope=\"cluster\",verb=~\"LIST|GET\"}[2h]))\n - sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",le=\"30\",scope=\"cluster\",verb=~\"LIST|GET\"}[2h])))\n / scalar(sum(rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[2h]))),\n \"type\", \"slow-cluster\", \"_none_\", \"\")\nlabels:\n verb: read\n" err="query timed out in expression evaluation" ts=2022-09-26T17:34:47.839Z caller=manager.go:634 level=warn component="rule manager" file=/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-machine-config-operator-machine-config-daemon-5cf57d1b-1aa9-4d6b-a48c-498035e7c291.yaml group=high-overall-control-plane-memory name=HighOverallControlPlaneMemory index=0 msg="Evaluating rule failed" rule="alert: HighOverallControlPlaneMemory\nexpr: (1 - sum(node_memory_MemFree_bytes + node_memory_Buffers_bytes + node_memory_Cached_bytes\n and on(instance) label_replace(kube_node_role{role=\"master\"}, \"instance\", \"$1\",\n \"node\", \"(.+)\")) / sum(node_memory_MemTotal_bytes and on(instance) label_replace(kube_node_role{role=\"master\"},\n \"instance\", \"$1\", \"node\", \"(.+)\"))) * 100 > 60\nfor: 1h\nlabels:\n severity: warning\nannotations:\n description: Given three control plane nodes, the overall memory utilization may\n only be about 2/3 of all available capacity. This is because if a single control\n plane node fails, the kube-apiserver and etcd my be slow to respond. To fix this,\n increase memory of the control plane nodes.\n summary: Memory utilization across all control plane nodes is high, and could impact\n responsiveness and stability.\n" err="query timed out in expression evaluation" ts=2022-09-26T17:34:47.859Z caller=manager.go:634 level=warn component="rule manager" file=/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-prometheus-k8s-thanos-sidecar-rules-f2bd2ced-9729-4038-8ee2-f839c36accd3.yaml group=thanos-sidecar name=ThanosSidecarBucketOperationsFailed index=0 msg="Evaluating rule failed" rule="alert: ThanosSidecarBucketOperationsFailed\nexpr: sum by(namespace, job, instance) (rate(thanos_objstore_bucket_operation_failures_total{job=~\"prometheus-(k8s|user-workload)-thanos-sidecar\"}[5m]))\n > 0\nfor: 1h\nlabels:\n severity: warning\nannotations:\n description: Thanos Sidecar {{$labels.instance}} in {{$labels.namespace}} bucket\n operations are failing\n summary: Thanos Sidecar bucket operations are failing\n" err="query timed out in expression evaluation" ts=2022-09-26T17:34:47.860Z caller=manager.go:634 level=warn component="rule manager" file=/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-image-registry-image-registry-rules-1bec7069-c165-4a65-9cf8-845d835a1489.yaml group=imageregistry.operations.rules name=imageregistry:operations_count:sum index=0 msg="Evaluating rule failed" rule="record: imageregistry:operations_count:sum\nexpr: label_replace(label_replace(sum by(operation) (imageregistry_request_duration_seconds_count{operation=\"BlobStore.ServeBlob\"}),\n \"operation\", \"get\", \"operation\", \"(.+)\"), \"resource_type\", \"blob\", \"resource_type\",\n \"\")\n" err="query timed out in expression evaluation" ts=2022-09-26T17:34:47.550Z caller=manager.go:634 level=warn component="rule manager" file=/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-machine-api-machine-api-operator-prometheus-rules-83d9c525-dba6-4d07-addc-a71ae36a3676.yaml group=machine-not-yet-deleted name=MachineNotYetDeleted index=0 msg="Evaluating rule failed" rule="alert: MachineNotYetDeleted\nexpr: sum by(name, namespace) (avg_over_time(mapi_machine_created_timestamp_seconds{phase=\"Deleting\"}[15m]))\n > 0\nfor: 6h\nlabels:\n severity: warning\nannotations:\n description: |\n The machine is not properly deleting, this may be due to a configuration issue with the\n infrastructure provider, or because workloads on the node have PodDisruptionBudgets or\n long termination periods which are preventing deletion.\n summary: machine {{ $labels.name }} has been in Deleting phase for more than 6 hours\n" err="query timed out in expression evaluation" ts=2022-09-26T17:34:47.775Z caller=manager.go:634 level=warn component="rule manager" file=/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-cluster-version-cluster-version-operator-88e8413a-5a1f-4428-a16b-2bb3a5e22a99.yaml group=cluster-version name=ClusterVersionOperatorDown index=0 msg="Evaluating rule failed" rule="alert: ClusterVersionOperatorDown\nexpr: absent(up{job=\"cluster-version-operator\"} == 1)\nfor: 10m\nlabels:\n severity: critical\nannotations:\n description: The operator may be down or disabled. The cluster will not be kept\n up to date and upgrades will not be possible. Inspect the openshift-cluster-version\n namespace for events or changes to the cluster-version-operator deployment or\n pods to diagnose and repair. {{ with $console_url := \"console_url\" | query }}{{\n if ne (len (label \"url\" (first $console_url ) ) ) 0}} For more information refer\n to {{ label \"url\" (first $console_url ) }}/k8s/cluster/projects/openshift-cluster-version.{{\n end }}{{ end }}\n summary: Cluster version operator has disappeared from Prometheus target discovery.\n" err="query timed out in expression evaluation" ts=2022-09-26T17:34:50.075Z caller=manager.go:634 level=warn component="rule manager" file=/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-kube-apiserver-kube-apiserver-slos-3009928a-abf6-4fc6-80dd-61daf8a50bc3.yaml group=kube-apiserver-slos name=KubeAPIErrorBudgetBurn index=0 msg="Evaluating rule failed" rule="alert: KubeAPIErrorBudgetBurn\nexpr: sum(apiserver_request:burnrate1h) > (14.4 * 0.01) and sum(apiserver_request:burnrate5m)\n > (14.4 * 0.01)\nfor: 2m\nlabels:\n long: 1h\n severity: critical\n short: 5m\nannotations:\n description: The API server is burning too much error budget. This alert fires when\n too many requests are failing with high latency. Use the 'API Performance' monitoring\n dashboards to narrow down the request states and latency. The 'etcd' monitoring\n dashboards also provides metrics to help determine etcd stability and performance.\n summary: The API server is burning too much error budget.\n" err="query timed out in expression evaluation" ts=2022-09-26T17:34:50.091Z caller=manager.go:634 level=warn component="rule manager" file=/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-node-exporter-rules-6b1713a2-6686-4ac4-94b9-3fbb5804293c.yaml group=node-exporter name=NodeFilesystemSpaceFillingUp index=0 msg="Evaluating rule failed" rule="alert: NodeFilesystemSpaceFillingUp\nexpr: (node_filesystem_avail_bytes{fstype!=\"\",job=\"node-exporter\"} / node_filesystem_size_bytes{fstype!=\"\",job=\"node-exporter\"}\n * 100 < 15 and predict_linear(node_filesystem_avail_bytes{fstype!=\"\",job=\"node-exporter\"}[6h],\n 24 * 60 * 60) < 0 and node_filesystem_readonly{fstype!=\"\",job=\"node-exporter\"} ==\n 0)\nfor: 1h\nlabels:\n severity: warning\nannotations:\n description: Filesystem on {{ $labels.device }} at {{ $labels.instance }} has only\n {{ printf \"%.2f\" $value }}% available space left and is filling up.\n runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/NodeFilesystemSpaceFillingUp.md\n summary: Filesystem is predicted to run out of space within the next 24 hours.\n" err="query timed out in expression evaluation" ts=2022-09-26T17:34:47.738Z caller=manager.go:634 level=warn component="rule manager" file=/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-kube-apiserver-cpu-aggregation-8af4500e-cc64-4601-b2b8-0169928a5900.yaml group=cpu-aggregation.rules name=cluster:usage:kube_node_ready:avg5m index=0 msg="Evaluating rule failed" rule="record: cluster:usage:kube_node_ready:avg5m\nexpr: avg_over_time((((count((max by(node) (up{job=\"kubelet\",metrics_path=\"/metrics\"}\n == 1) and max by(node) (kube_node_status_condition{condition=\"Ready\",status=\"true\"}\n == 1) and min by(node) (kube_node_spec_unschedulable == 0))) / scalar(count(min\n by(node) (kube_node_spec_unschedulable == 0))))))[5m:1s])\n" err="query timed out in expression evaluation" ts=2022-09-26T17:34:48.355Z caller=manager.go:634 level=warn component="rule manager" file=/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-kubernetes-monitoring-rules-4cc4656f-dce5-4e7e-85cf-80c894909cb6.yaml group=kubernetes-system name=KubeClientErrors index=0 msg="Evaluating rule failed" rule="alert: KubeClientErrors\nexpr: (sum by(cluster, instance, job, namespace) (rate(rest_client_requests_total{code=~\"5..\"}[5m]))\n / sum by(cluster, instance, job, namespace) (rate(rest_client_requests_total[5m])))\n > 0.01\nfor: 15m\nlabels:\n severity: warning\nannotations:\n description: Kubernetes API server client '{{ $labels.job }}/{{ $labels.instance\n }}' is experiencing {{ $value | humanizePercentage }} errors.'\n summary: Kubernetes API server client is experiencing errors.\n" err="query timed out in expression evaluation" ts=2022-09-26T17:34:47.798Z caller=manager.go:634 level=warn component="rule manager" file=/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-insights-insights-prometheus-rules-a0a78a55-5c89-4507-8e81-d1c7e2381b56.yaml group=insights name=InsightsDisabled index=0 msg="Evaluating rule failed" rule="alert: InsightsDisabled\nexpr: cluster_operator_conditions{condition=\"Disabled\",name=\"insights\"} == 1\nfor: 5m\nlabels:\n severity: info\nannotations:\n description: 'Insights operator is disabled. In order to enable Insights and benefit\n from recommendations specific to your cluster, please follow steps listed in the\n documentation: https://docs.openshift.com/container-platform/latest/support/remote_health_monitoring/enabling-remote-health-reporting.html'\n summary: Insights operator is disabled.\n" err="query timed out in query execution" ts=2022-09-26T17:34:48.055Z caller=manager.go:634 level=warn component="rule manager" file=/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-kube-apiserver-api-usage-79127854-2086-4349-bd6f-66d56b3852eb.yaml group=pre-release-lifecycle name=APIRemovedInNextReleaseInUse index=0 msg="Evaluating rule failed" rule="alert: APIRemovedInNextReleaseInUse\nexpr: group by(group, version, resource) (apiserver_requested_deprecated_apis{removed_release=\"1.25\"})\n and (sum by(group, version, resource) (rate(apiserver_request_total{system_client!=\"cluster-policy-controller\",system_client!=\"kube-controller-manager\"}[4h])))\n > 0\nfor: 1h\nlabels:\n namespace: openshift-kube-apiserver\n severity: info\nannotations:\n description: Deprecated API that will be removed in the next version is being used.\n Removing the workload that is using the {{ $labels.group }}.{{ $labels.version\n }}/{{ $labels.resource }} API might be necessary for a successful upgrade to the\n next cluster version. Refer to `oc get apirequestcounts {{ $labels.resource }}.{{\n $labels.version }}.{{ $labels.group }} -o yaml` to identify the workload.\n summary: Deprecated API that will be removed in the next version is being used.\n" err="expanding series: context deadline exceeded" ts=2022-09-26T17:34:49.071Z caller=manager.go:634 level=warn component="rule manager" file=/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-prow-rules-7a06677b-b483-4f43-a97f-9ed9b3b32d1a.yaml group=prow name=prow:pod index=0 msg="Evaluating rule failed" rule="record: prow:pod\nexpr: max by(namespace, pod, node, pod_ip) (kube_pod_info) * on(namespace, pod) group_left(phase)\n (kube_pod_status_phase{namespace=\"ci\"} == 1)\n" err="query timed out in expression evaluation" ts=2022-09-26T17:34:47.762Z caller=manager.go:634 level=warn component="rule manager" file=/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-machine-api-machine-api-operator-prometheus-rules-83d9c525-dba6-4d07-addc-a71ae36a3676.yaml group=machine-health-check-unterminated-short-circuit name=MachineHealthCheckUnterminatedShortCircuit index=0 msg="Evaluating rule failed" rule="alert: MachineHealthCheckUnterminatedShortCircuit\nexpr: mapi_machinehealthcheck_short_circuit == 1\nfor: 30m\nlabels:\n severity: warning\nannotations:\n description: |\n The number of unhealthy machines has exceeded the `maxUnhealthy` limit for the check, you should check\n the status of machines in the cluster.\n summary: machine health check {{ $labels.name }} has been disabled by short circuit\n for more than 30 minutes\n" err="query timed out in expression evaluation" ts=2022-09-26T17:34:50.720Z caller=manager.go:634 level=warn component="rule manager" file=/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-kube-apiserver-podsecurity-2330843d-09aa-4028-888d-f04f703e47bb.yaml group=pod-security-violation name=PodSecurityViolation index=0 msg="Evaluating rule failed" rule="alert: PodSecurityViolation\nexpr: sum by(policy_level) (increase(pod_security_evaluations_total{decision=\"deny\",mode=\"audit\",resource=\"pod\"}[1d]))\n > 0\nlabels:\n namespace: openshift-kube-apiserver\n severity: info\nannotations:\n description: A workload (pod, deployment, deamonset, ...) was created somewhere\n in the cluster but it did not match the PodSecurity \"{{ $labels.policy_level }}\"\n profile defined by its namespace either via the cluster-wide configuration (which\n triggers on a \"restricted\" profile violations) or by the namespace local Pod Security\n labels. Refer to Kubernetes documentation on Pod Security Admission to learn more\n about these violations.\n summary: One or more workloads users created in the cluster don't match their Pod\n Security profile\n" err="expanding series: context deadline exceeded" ts=2022-09-26T17:35:04.644Z caller=manager.go:634 level=warn component="rule manager" file=/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-kubernetes-monitoring-rules-4cc4656f-dce5-4e7e-85cf-80c894909cb6.yaml group=kubernetes-storage name=KubePersistentVolumeFillingUp index=0 msg="Evaluating rule failed" rule="alert: KubePersistentVolumeFillingUp\nexpr: (kubelet_volume_stats_available_bytes{job=\"kubelet\",metrics_path=\"/metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"}\n / kubelet_volume_stats_capacity_bytes{job=\"kubelet\",metrics_path=\"/metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"})\n < 0.03 and kubelet_volume_stats_used_bytes{job=\"kubelet\",metrics_path=\"/metrics\",namespace=~\"(openshift-.*|kube-.*|default)\"}\n > 0 unless on(namespace, persistentvolumeclaim) kube_persistentvolumeclaim_access_mode{access_mode=\"ReadOnlyMany\",namespace=~\"(openshift-.*|kube-.*|default)\"}\n == 1 unless on(namespace, persistentvolumeclaim) kube_persistentvolumeclaim_labels{label_alerts_k8s_io_kube_persistent_volume_filling_up=\"disabled\",namespace=~\"(openshift-.*|kube-.*|default)\"}\n == 1\nfor: 1m\nlabels:\n severity: critical\nannotations:\n description: The PersistentVolume claimed by {{ $labels.persistentvolumeclaim }}\n in Namespace {{ $labels.namespace }} is only {{ $value | humanizePercentage }}\n free.\n runbook_url: https://github.com/openshift/runbooks/blob/master/alerts/cluster-monitoring-operator/KubePersistentVolumeFillingUp.md\n summary: PersistentVolume is filling up.\n" err="query timed out in expression evaluation" ts=2022-09-26T17:35:04.651Z caller=notifier.go:525 level=error component=notifier alertmanager=https://10.129.24.35:9095/api/v2/alerts count=1 msg="Error sending alert" err="Post \"https://10.129.24.35:9095/api/v2/alerts\": context deadline exceeded" ts=2022-09-26T17:35:04.855Z caller=manager.go:634 level=warn component="rule manager" file=/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-kube-apiserver-kube-apiserver-slos-3009928a-abf6-4fc6-80dd-61daf8a50bc3.yaml group=kube-apiserver.rules name=apiserver_request:burnrate3d index=6 msg="Evaluating rule failed" rule="record: apiserver_request:burnrate3d\nexpr: label_replace(sum(rate(apiserver_request_total{code=~\"5..\",job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))\n / scalar(sum(rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))),\n \"type\", \"error\", \"_none_\", \"\") or label_replace((sum(rate(apiserver_request_duration_seconds_count{job=\"apiserver\",scope=\"resource\",subresource!~\"proxy|log|exec\",verb=~\"LIST|GET\"}[3d]))\n - (sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",le=\"0.1\",scope=\"resource\",subresource!~\"proxy|log|exec\",verb=~\"LIST|GET\"}[3d]))\n or vector(0))) / scalar(sum(rate(apiserver_request_total{job=\"apiserver\",subresource!~\"proxy|log|exec\",verb=~\"LIST|GET\"}[3d]))),\n \"type\", \"slow-resource\", \"_none_\", \"\") or label_replace((sum(rate(apiserver_request_duration_seconds_count{job=\"apiserver\",scope=\"namespace\",subresource!~\"proxy|log|exec\",verb=~\"LIST|GET\"}[3d]))\n - sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",le=\"0.5\",scope=\"namespace\",subresource!~\"proxy|log|exec\",verb=~\"LIST|GET\"}[3d])))\n / scalar(sum(rate(apiserver_request_total{job=\"apiserver\",subresource!~\"proxy|log|exec\",verb=~\"LIST|GET\"}[3d]))),\n \"type\", \"slow-namespace\", \"_none_\", \"\") or label_replace((sum(rate(apiserver_request_duration_seconds_count{job=\"apiserver\",scope=\"cluster\",verb=~\"LIST|GET\"}[3d]))\n - sum(rate(apiserver_request_duration_seconds_bucket{job=\"apiserver\",le=\"5\",scope=\"cluster\",verb=~\"LIST|GET\"}[3d])))\n / scalar(sum(rate(apiserver_request_total{job=\"apiserver\",verb=~\"LIST|GET\"}[3d]))),\n \"type\", \"slow-cluster\", \"_none_\", \"\")\nlabels:\n verb: read\n" err="query timed out in expression evaluation" ts=2022-09-26T17:35:04.965Z caller=manager.go:634 level=warn component="rule manager" file=/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-monitoring-kubernetes-monitoring-rules-4cc4656f-dce5-4e7e-85cf-80c894909cb6.yaml group=node.rules name=node_namespace_pod:kube_pod_info: index=0 msg="Evaluating rule failed" rule="record: 'node_namespace_pod:kube_pod_info:'\nexpr: topk by(cluster, namespace, pod) (1, max by(cluster, node, namespace, pod) (label_replace(kube_pod_info{job=\"kube-state-metrics\",node!=\"\"},\n \"pod\", \"$1\", \"pod\", \"(.*)\")))\n" err="query timed out in expression evaluation" ts=2022-09-26T17:35:05.880Z caller=manager.go:687 level=warn component="rule manager" file=/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-kube-apiserver-kube-apiserver-slos-3009928a-abf6-4fc6-80dd-61daf8a50bc3.yaml group=kube-apiserver.rules name=apiserver_request:burnrate1h index=8 msg="Error on ingesting out-of-order result from rule evaluation" numDropped=1 ts=2022-09-26T17:35:05.948Z caller=manager.go:687 level=warn component="rule manager" file=/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-kube-apiserver-kube-apiserver-slos-3009928a-abf6-4fc6-80dd-61daf8a50bc3.yaml group=kube-apiserver.rules name=apiserver_request:burnrate30m index=10 msg="Error on ingesting out-of-order result from rule evaluation" numDropped=1 ts=2022-09-26T17:35:06.304Z caller=manager.go:634 level=warn component="rule manager" file=/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-kube-apiserver-operator-kube-apiserver-operator-9b370559-6f09-4f05-bc6d-cbddfa9c44e1.yaml group=cluster-version name=TechPreviewNoUpgrade index=0 msg="Evaluating rule failed" rule="alert: TechPreviewNoUpgrade\nexpr: cluster_feature_set{name!=\"\",namespace=\"openshift-kube-apiserver-operator\"}\n == 0\nfor: 10m\nlabels:\n severity: warning\nannotations:\n description: Cluster has enabled Technology Preview features that cannot be undone\n and will prevent upgrades. The TechPreviewNoUpgrade feature set is not recommended\n on production clusters.\n summary: Cluster has enabled tech preview features that will prevent upgrades.\n" err="query timed out in expression evaluation" ts=2022-09-26T17:35:09.367Z caller=manager.go:687 level=warn component="rule manager" file=/etc/prometheus/rules/prometheus-k8s-rulefiles-0/openshift-kube-apiserver-kube-apiserver-slos-3009928a-abf6-4fc6-80dd-61daf8a50bc3.yaml group=kube-apiserver.rules name=apiserver_request:burnrate6h index=13 msg="Error on ingesting out-of-order result from rule evaluation" numDropped=1 ts=2022-09-26T17:35:09.560Z caller=manager.go:961 level=info component="rule manager" msg="Rule manager stopped" ts=2022-09-26T17:35:09.560Z caller=main.go:899 level=info msg="Stopping scrape manager..." ts=2022-09-26T17:35:11.779Z caller=main.go:891 level=info msg="Scrape manager stopped" ts=2022-09-26T17:35:12.033Z caller=db.go:829 level=error component=tsdb msg="compaction failed" err="compact head: persist head block: 2 errors: populate block: context canceled; context canceled" ts=2022-09-26T17:35:12.093Z caller=notifier.go:599 level=info component=notifier msg="Stopping notification manager..." ts=2022-09-26T17:35:12.093Z caller=main.go:1120 level=info msg="Notifier manager stopped" ts=2022-09-26T17:35:12.093Z caller=main.go:1132 level=info msg="See you next time!"