#!/bin/bash oc create ns open-cluster-management-observability oc apply -f - <<EOF apiVersion: apps/v1 kind: Deployment metadata: name: minio namespace: open-cluster-management-observability labels: app.kubernetes.io/name: minio spec: replicas: 1 selector: matchLabels: app.kubernetes.io/name: minio strategy: type: Recreate template: metadata: labels: app.kubernetes.io/name: minio spec: containers: - command: - /bin/sh - -c - mkdir -p /storage/thanos && /usr/bin/minio server /storage env: - name: MINIO_ACCESS_KEY value: minio - name: MINIO_SECRET_KEY value: minio123 image: quay.io/minio/minio:RELEASE.2021-08-25T00-41-18Z name: minio ports: - containerPort: 9000 protocol: TCP volumeMounts: - mountPath: /storage name: storage volumes: - name: storage persistentVolumeClaim: claimName: minio EOF oc apply -f - <<EOF apiVersion: v1 kind: PersistentVolumeClaim metadata: labels: app.kubernetes.io/name: minio name: minio namespace: open-cluster-management-observability spec: storageClassName: gp3-csi accessModes: - ReadWriteOnce resources: requests: storage: "1Gi" EOF oc apply -f - <<EOF apiVersion: v1 stringData: thanos.yaml: | type: s3 config: bucket: "thanos" endpoint: "minio:9000" insecure: true access_key: "minio" secret_key: "minio123" kind: Secret metadata: name: thanos-object-storage namespace: open-cluster-management-observability type: Opaque EOF oc apply -f - <<EOF apiVersion: v1 stringData: thanos.yaml: | type: s3 config: bucket: "thanos" endpoint: "minio:9000" insecure: true access_key: "minio" secret_key: "minio123" kind: Secret metadata: name: thanos-object-storage namespace: open-cluster-management-observability type: Opaque EOF oc apply -f - <<EOF apiVersion: observability.open-cluster-management.io/v1beta2 kind: MultiClusterObservability metadata: name: observability spec: observabilityAddonSpec: {} storageConfig: metricObjectStorage: name: thanos-object-storage key: thanos.yaml EOF oc apply -f - <<EOF apiVersion: v1 data: custom_rules.yaml: | groups: - name: alertrule-testing rules: - alert: Watchdog annotations: summary: An alert that should always be firing to certify that Alertmanager is working properly. description: This is an alert meant to ensure that the entire alerting pipeline is functional. expr: vector(1) labels: instance: "local" cluster: "local" clusterID: "111111111" severity: info - alert: Watchdog-spoke annotations: summary: An alert that should always be firing to certify that Alertmanager is working properly. description: This is an alert meant to ensure that the entire alerting pipeline is functional. expr: vector(1) labels: instance: "spoke" cluster: "spoke" clusterID: "22222222" severity: warn - name: cluster-health rules: - alert: ClusterCPUHealth-jb annotations: summary: Notify when CPU utilization on a cluster is greater than the defined utilization limit description: "The cluster has a high CPU usage: core for." expr: | max(cluster:cpu_usage_cores:sum) by (clusterID, cluster, prometheus) > 0 labels: cluster: "{{ $labels.cluster }}" prometheus: "{{ $labels.prometheus }}" severity: critical kind: ConfigMap metadata: annotations: kubectl.kubernetes.io/last-applied-configuration: | {"apiVersion":"v1","data":{"custom_rules.yaml":"groups:\n - name: alertrule-testing\n rules:\n - alert: Watchdog\n annotations:\n summary: An alert that should always be firing to certify that Alertmanager is working properly.\n description: This is an alert meant to ensure that the entire alerting pipeline is functional.\n expr: vector(1)\n labels:\n instance: \"local\"\n cluster: \"local\"\n clusterID: \"111111111\"\n severity: info\n - alert: Watchdog-spoke\n annotations:\n summary: An alert that should always be firing to certify that Alertmanager is working properly.\n description: This is an alert meant to ensure that the entire alerting pipeline is functional.\n expr: vector(1)\n labels:\n instance: \"spoke\"\n cluster: \"spoke\"\n clusterID: \"22222222\"\n severity: warn\n - name: cluster-health\n rules:\n - alert: ClusterCPUHealth-jb\n annotations:\n summary: Notify when CPU utilization on a cluster is greater than the defined utilization limit\n description: \"The cluster has a high CPU usage: {{ }} core for {{ .cluster }} {{ .clusterID }}.\"\n expr: |\n max(cluster:cpu_usage_cores:sum) by (clusterID, cluster, prometheus) \u003e 0\n labels:\n cluster: \"{{ .cluster }}\"\n prometheus: \"{{ .prometheus }}\"\n severity: critical\n"},"kind":"ConfigMap","metadata":{"annotations":{},"name":"thanos-ruler-custom-rules","namespace":"open-cluster-management-observability"}} creationTimestamp: "2024-11-01T06:16:10Z" labels: cluster.open-cluster-management.io/backup: "" name: thanos-ruler-custom-rules namespace: open-cluster-management-observability resourceVersion: "192432" uid: 969bf381-0963-45fb-b1cb-11270c982ea2 EOF