-
Bug
-
Resolution: Unresolved
-
Undefined
-
None
-
rhos-18.0.13
-
False
-
-
False
-
?
-
None
-
-
-
-
Moderate
We require the ability to customize the deployed Node Exporter collectors within the EDPM Telemetry configuration for critical business monitoring needs.
Currently, the collector list is hardcoded in the EDPM Ansible role ([1]), which prevents us from gathering specific metrics essential for our environment.
Bug impact
- Impact: No possibility to gather required metrics for business operations and performance monitoring.
Known workaround
Customer have implemented a temporary workaround by creating a custom OpenStackDataPlaneService. This service overrides the default Node Exporter deployment with a new container that includes our required collectors (e.g., specific systemd units, sysctl includes, and collector disabling).
apiVersion: dataplane.openstack.org/v1beta1 kind: OpenStackDataPlaneService metadata: name: xxxx-node-exporter-service namespace: openstack spec: playbookContents: | - hosts: all become: true tasks: - name: Prepare textfile directory ansible.builtin.file: state: directory path: /var/lib/node_exporter/textfile_collector/ recurse: true mode: '0755' owner: root group: root # Based on upstream https://github.com/openstack-k8s-operators/edpm-ansible/blob/main/roles/edpm_telemetry/templates/node_exporter.json.j2 # Modified for XXXXX needs - name: Write node_exporter.json config file ansible.builtin.copy: dest: /var/lib/openstack/config/telemetry/node_exporter.json mode: '0644' content: | { "image": "{{ edpm_telemetry_node_exporter_image }}", "restart": "always", "recreate": true, "user": "root", "privileged": true, "ports": ["9100:9100"], "command": [ "--web.config.file=/etc/node_exporter/node_exporter.yaml", "--collector.systemd", "--collector.systemd.unit-include=(edpm_.*|ovs.*|openvswitch|virt.*|rsyslog|auditbeat|qualys-cloud-agent|puppet)\\.service", "--web.disable-exporter-metrics", "--collector.netclass.ignored-devices=^(lo|tap.+|genev.+|ovs.+|br.+)$", "--collector.netdev.device-exclude=^(lo|tap.+|genev.+|ovs.+|br.+)$", "--collector.sysctl", "--collector.sysctl.include=fs.aio-nr", "--collector.sysctl.include=fs.aio-max-nr", "--collector.textfile.directory=/var/lib/node_exporter/textfile_collector", "--no-collector.cpufreq", "--no-collector.entropy", "--no-collector.fibrechannel", "--no-collector.infiniband", "--no-collector.thermal_zone", "--no-collector.time", "--no-collector.timex", "--no-collector.stat", "--no-collector.hwmon", "--no-collector.selinux", "--no-collector.powersupplyclass", "--no-collector.pressure", "--no-collector.rapl" ], "net": "host", "environment": { "OS_ENDPOINT_TYPE":"internal" }, "healthcheck": { "test": "/openstack/healthcheck node_exporter", "mount": "/var/lib/openstack/healthchecks/node_exporter" }, "volumes": [ "/var/lib/openstack/config/telemetry/node_exporter.yaml:/etc/node_exporter/node_exporter.yaml:z", "/var/lib/openstack/certs/telemetry/default:/etc/node_exporter/tls:z", "/var/run/dbus/system_bus_socket:/var/run/dbus/system_bus_socket:rw", "/var/lib/node_exporter/textfile_collector:/var/lib/node_exporter/textfile_collector:ro,z" ] } - name: Deploy node-exporter container using upstream method ansible.builtin.include_role: name: osp.edpm.edpm_container_manage vars: edpm_container_manage_config: "/var/lib/openstack/config/telemetry" edpm_container_manage_healthcheck_disabled: true edpm_container_manage_clean_orphans: false edpm_container_manage_config_patterns: "node_exporter.json" - name: Restart node-exporter container become: true ansible.builtin.systemd: state: restarted name: "edpm_node_exporter.service"