From 262db0e6d3e524107152844014049148cf553fcd Mon Sep 17 00:00:00 2001 From: Alex Welsh Date: Wed, 17 Jun 2026 08:49:57 +0100 Subject: [PATCH] Remove hostname from AIO alert matches In the past we would reboot AIOs and set a known hostname, so we could match on controller0. The reboot is no longer required, so AIOs get random names, so matching doesn't work. --- stackhpc_cloud_tests/monitoring/test_prometheus.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/stackhpc_cloud_tests/monitoring/test_prometheus.py b/stackhpc_cloud_tests/monitoring/test_prometheus.py index 8a1a160..05c74b8 100644 --- a/stackhpc_cloud_tests/monitoring/test_prometheus.py +++ b/stackhpc_cloud_tests/monitoring/test_prometheus.py @@ -62,14 +62,14 @@ def test_prometheus_alerts_inactive(prom): # TODO - find a way of configuring this for SCT running in other environments. aio_alerts_to_ignore = [ # We know our volumes are small. - { "alertname": "StorageFillingUp", "instance": "controller0" }, - { "alertname": "ElasticsearchDiskSpaceLow", "instance": "controller0" }, + { "alertname": "StorageFillingUp" }, + { "alertname": "ElasticsearchDiskSpaceLow" }, # This is probably due to storage space.. - { "alertname": "ElasticsearchClusterYellow", "instance": "controller0" }, + { "alertname": "ElasticsearchClusterYellow" }, # ..or because we're running in a single instance and it wants to be clustered across multiple nodes. - { "alertname": "ElasticsearchUnassignedShards", "instance": "controller0" }, + { "alertname": "ElasticsearchUnassignedShards" }, # It's a small AIO! - { "alertname": "LowMemory", "instance": "controller0" }, + { "alertname": "LowMemory" }, # It's only one node and expects three, see https://github.com/stackhpc/stackhpc-kayobe-config/pull/1579 { "alertname": "RabbitMQNodeDown" }, # This is probably because Tempest runs before pytest so the container has been recently stopped.