-
Task
-
Resolution: Done
-
Normal
-
None
-
None
-
None
-
3
-
False
-
-
False
-
-
-
3
-
Observability Sprint 38, Observability Sprint 39
-
None
https://polarion.engineering.redhat.com/polarion/#/project/RHACM4K/workitem?id=RHACM4K-43019
(See Example MCO CR below)
- Deploy MCO CR and verify all pods are running
Acceptance Criteria
- e2e tests follow the correct naming pattern: `<Polarion test Id>: Observability: <Test case name>`
- e2e tests includes relevant tagging (@e2e etc), see the following: https://github.com/stolostron/acmqe-autotest
- e2e tests is marked as automated in Polarion
Scenario 1: Update log level for receive and verify it is set on receive pod
advanced:
receive:
containers:
- args:
- receive
- --log.level=debug
- --log.format=logfmt
- --grpc-address=0.0.0.0:10901
- --http-address=0.0.0.0:10902
- --remote-write.address=0.0.0.0:19291
- --receive.replication-factor=3
- --tsdb.path=/var/thanos/receive
- --tsdb.retention=5d
- --label=replica="$(NAME)"
- --label=receive="true"
- --objstore.config=$(OBJSTORE_CONFIG)
- --receive.local-endpoint=$(NAME).observability-thanos-receive-default.$(NAMESPACE).svc.cluster.local:10901
- --receive.hashrings-file=/var/lib/thanos-receive/hashrings.json
name: thanos-receive
Scenario 2: Update log level for compact and it is set on the compactor pod
Alternatively, update other compactor parameters that large scale environments use and verify it is reflected
compact:
containers:
- args:
- compact
- --wait
- --log.level=debug
- --log.format=logfmt
- --objstore.config=$(OBJSTORE_CONFIG)
- --data-dir=/var/thanos/compact
- --debug.accept-malformed-index
- --retention.resolution-raw=6d
- --retention.resolution-5m=15d
- --retention.resolution-1h=31d
- --delete-delay=50h
- --compact.concurrency=1
- --downsample.concurrency=1
- --deduplication.replica-label=replica
name: thanos-compact
Example MCO CR:
apiVersion: observability.open-cluster-management.io/v1beta2
kind: MultiClusterObservability
metadata:
name: observability
spec:
observabilityAddonSpec: {}
storageConfig:
metricObjectStorage:
name: thanos-object-storage
key: thanos.yaml
advanced:
receive:
containers:
- name: thanos-receive
args:
- receive
- --log.level=debug
- --log.format=logfmt
- --grpc-address=0.0.0.0:10901
- --http-address=0.0.0.0:10902
- --remote-write.address=0.0.0.0:19291
- --receive.replication-factor=1
- --tsdb.path=/var/thanos/receive
- --tsdb.retention=4d
- --label=replica="$(NAME)"
- --label=receive="true"
- --objstore.config=$(OBJSTORE_CONFIG)
- --receive.local-endpoint=$(NAME).observatorium-xyz-thanos-receive-default.$(NAMESPACE).svc.cluster.local:10901
- --receive.hashrings-file=/var/lib/thanos-receive/hashrings.json
store:
containers:
- name: thanos-store
args:
- store
- --log.level=debug
- --log.format=logfmt
- --data-dir=/var/thanos/store
- --grpc-address=0.0.0.0:10901
- --http-address=0.0.0.0:10902
- --objstore.config=$(OBJSTORE_CONFIG)
- --ignore-deletion-marks-delay=24h
- |-
--index-cache.config="config":
"addresses":
- "dnssrv+_client._tcp.observability-thanos-store-memcached.open-cluster-management-observability.svc"
"dns_provider_update_interval": "10s"
"max_async_buffer_size": 100000
"max_async_concurrency": 100
"max_get_multi_batch_size": 1000
"max_get_multi_concurrency": 900
"max_idle_connections": 1000
"max_item_size": "1MiB"
"timeout": "2s"
"type": "memcached"
- |-
--store.caching-bucket.config="blocks_iter_ttl": "5m"
"chunk_object_attrs_ttl": "24h"
"chunk_subrange_size": 16000
"chunk_subrange_ttl": "24h"
"config":
"addresses":
- "dnssrv+_client._tcp.observability-thanos-store-memcached.open-cluster-management-observability.svc"
"dns_provider_update_interval": "10s"
"max_async_buffer_size": 100000
"max_async_concurrency": 100
"max_get_multi_batch_size": 1000
"max_get_multi_concurrency": 900
"max_idle_connections": 1000
"max_item_size": "1MiB"
"timeout": "2s"
"max_chunks_get_range_requests": 3
"metafile_content_ttl": "24h"
"metafile_doesnt_exist_ttl": "15m"
"metafile_exists_ttl": "2h"
"metafile_max_size": "1MiB"
"type": "memcached"
- |
--selector.relabel-config=
- action: hashmod
source_labels: ["__block_id"]
target_label: shard
modulus: 2
- action: keep
source_labels: ["shard"]
regex: 0
query:
containers:
- name: thanos-query
args:
- query
- --grpc-address=0.0.0.0:10901
- --http-address=0.0.0.0:9090
- --log.level=info
- --log.format=logfmt
- --query.replica-label=prometheus_replica
- --query.replica-label=rule_replica
- --query.replica-label=replica
- --store=dnssrv+_grpc._tcp.observability-thanos-rule.open-cluster-management-observability.svc.cluster.local
- --store=dnssrv+_grpc._tcp.observability-thanos-store-shard-0.open-cluster-management-observability.svc.cluster.local
- --store=dnssrv+_grpc._tcp.observability-thanos-store-shard-1.open-cluster-management-observability.svc.cluster.local
- --store=dnssrv+_grpc._tcp.observability-thanos-receive-default.open-cluster-management-observability.svc.cluster.local
- --query.timeout=15m
- --query.lookback-delta=500s
- --query.auto-downsampling
rule:
containers:
- name: thanos-rule
args:
- rule
- --log.level=debug
- --log.format=logfmt
- --grpc-address=0.0.0.0:10901
- --http-address=0.0.0.0:10902
- --objstore.config=$(OBJSTORE_CONFIG)
- --data-dir=/var/thanos/rule
- --label=rule_replica="$(NAME)"
- --alert.label-drop=rule_replica
- --tsdb.retention=24h
- --tsdb.block-duration=2h
- --query=dnssrv+_http._tcp.observability-thanos-query.open-cluster-management-observability.svc.cluster.local
- --alertmanagers.config-file=/etc/thanos/config/thanos-ruler-config/config.yaml
- --rule-file=/etc/thanos/rules/thanos-ruler-default-rules/default_rules.yaml
compact:
containers:
- name: thanos-compact
args:
- compact
- --wait
- --log.level=debug
- --log.format=logfmt
- --objstore.config=$(OBJSTORE_CONFIG)
- --data-dir=/var/thanos/compact
- --debug.accept-malformed-index
- --retention.resolution-raw=30d
- --retention.resolution-5m=180d
- --retention.resolution-1h=0d
- --delete-delay=48h
- --compact.concurrency=1
- --downsample.concurrency=1
- --deduplication.replica-label=replica
queryFrontend:
containers:
- name: thanos-query-frontend
args:
- query-frontend
- --log.level=debug
- --log.format=logfmt
- --query-frontend.compress-responses
- --http-address=0.0.0.0:9090
- --query-frontend.downstream-url=http://observability-thanos-query.open-cluster-management-observability.svc.cluster.local.:9090
- --query-range.split-interval=24h
- --labels.split-interval=24h
- --query-range.max-retries-per-request=0
- --labels.max-retries-per-request=0
- --query-frontend.log-queries-longer-than=5s
- |-
--query-range.response-cache-config="config":
"addresses":
- "dnssrv+_client._tcp.observability-thanos-query-frontend-memcached.open-cluster-management-observability.svc"
"dns_provider_update_interval": "10s"
"max_async_buffer_size": 10000
"max_async_concurrency": 20
"max_get_multi_batch_size": 0
"max_get_multi_concurrency": 100
"max_idle_connections": 100
"max_item_size": "1MiB"
"timeout": "500ms"
"type": "memcached"
- |-
--labels.response-cache-config="config":
"addresses":
- "dnssrv+_client._tcp.observability-thanos-query-frontend-memcached.open-cluster-management-observability.svc"
"dns_provider_update_interval": "10s"
"max_async_buffer_size": 10000
"max_async_concurrency": 20
"max_get_multi_batch_size": 0
"max_get_multi_concurrency": 100
"max_idle_connections": 100
"max_item_size": "1MiB"
"timeout": "500ms"
"type": "memcached"