From d6e8a2e3e3fe1b68d3e07fb4a7ad8fd0e1f5a491 Mon Sep 17 00:00:00 2001 From: Alan Bounds Date: Fri, 19 Jun 2026 09:11:51 -0500 Subject: [PATCH] Add network-group-affinity/anti-affinity server group policies Introduces two new server group policies that constrain instance placement based on the physical network group (VLAN group / cabinet switch pair) an Ironic node belongs to. New policies: - network-group-affinity: schedule only onto nodes in the specified network group - network-group-anti-affinity: schedule only onto nodes NOT in the specified network group The network group is specified via the server group rules field: openstack server group create --policy network-group-affinity \ --rule network_group=a1-1-network my-cabinet-group The filters match CUSTOM_NETGROUP_* traits reported by Ironic nodes via the Placement service. This eliminates the need for per-network- group flavor proliferation. Changes: - New NetworkGroupAffinityFilter and NetworkGroupAntiAffinityFilter - Schema updated to accept new policies and network_group rule - API controller validates rules per policy type - InstanceGroup.rules property parses network_group - Scheduler utils recognizes new policies - New filters added to default enabled_filters - Unit tests (15 tests, all passing) - Lint clean (flake8 + hacking) --- .../compute/schemas/server_groups.py | 11 +- nova/api/openstack/compute/server_groups.py | 28 ++- nova/conf/scheduler.py | 2 + nova/objects/instance_group.py | 2 + .../scheduler/filters/network_group_filter.py | 135 +++++++++++++ nova/scheduler/utils.py | 3 +- .../filters/test_network_group_filter.py | 187 ++++++++++++++++++ 7 files changed, 359 insertions(+), 9 deletions(-) create mode 100644 nova/scheduler/filters/network_group_filter.py create mode 100644 nova/tests/unit/scheduler/filters/test_network_group_filter.py diff --git a/nova/api/openstack/compute/schemas/server_groups.py b/nova/api/openstack/compute/schemas/server_groups.py index 805e78e80f2..71a240263f3 100644 --- a/nova/api/openstack/compute/schemas/server_groups.py +++ b/nova/api/openstack/compute/schemas/server_groups.py @@ -64,7 +64,8 @@ create_v264['properties']['server_group']['properties']['policy'] = { 'type': 'string', 'enum': ['anti-affinity', 'affinity', - 'soft-anti-affinity', 'soft-affinity'], + 'soft-anti-affinity', 'soft-affinity', + 'network-group-affinity', 'network-group-anti-affinity'], } create_v264['properties']['server_group']['properties']['rules'] = { @@ -72,6 +73,11 @@ 'properties': { 'max_server_per_host': parameter_types.positive_integer, + 'network_group': { + 'type': 'string', + 'minLength': 1, + 'maxLength': 255, + }, }, 'additionalProperties': False, } @@ -160,12 +166,15 @@ 'anti-affinity', 'soft-affinity', 'soft-anti-affinity', + 'network-group-affinity', + 'network-group-anti-affinity', ], }, 'rules': { 'type': 'object', 'properties': { 'max_server_per_host': {'type': 'integer'}, + 'network_group': {'type': 'string'}, }, 'required': [], 'additionalProperties': False, diff --git a/nova/api/openstack/compute/server_groups.py b/nova/api/openstack/compute/server_groups.py index 6d4eb7822dd..879ca66f335 100644 --- a/nova/api/openstack/compute/server_groups.py +++ b/nova/api/openstack/compute/server_groups.py @@ -235,14 +235,28 @@ def create(self, req, body): if api_version_request.is_supported(req, "2.64"): policy = vals['policy'] rules = vals.get('rules', {}) - if policy != 'anti-affinity' and rules: - msg = _("Only anti-affinity policy supports rules.") + if policy == 'anti-affinity': + # NOTE(yikun): This should be removed in Stein version. + if not _should_enable_custom_max_server_rules(context, rules): + msg = _("Creating an anti-affinity group with rule " + "max_server_per_host > 1 is not yet supported.") + raise exc.HTTPConflict(explanation=msg) + elif policy in ('network-group-affinity', + 'network-group-anti-affinity'): + if 'max_server_per_host' in rules: + msg = _("network-group-affinity and " + "network-group-anti-affinity policies do not " + "support the max_server_per_host rule.") + raise exc.HTTPBadRequest(explanation=msg) + if 'network_group' not in rules: + msg = _("network-group-affinity and " + "network-group-anti-affinity policies require " + "a network_group rule.") + raise exc.HTTPBadRequest(explanation=msg) + elif rules: + msg = _("Only anti-affinity, network-group-affinity, and " + "network-group-anti-affinity policies support rules.") raise exc.HTTPBadRequest(explanation=msg) - # NOTE(yikun): This should be removed in Stein version. - if not _should_enable_custom_max_server_rules(context, rules): - msg = _("Creating an anti-affinity group with rule " - "max_server_per_host > 1 is not yet supported.") - raise exc.HTTPConflict(explanation=msg) sg = objects.InstanceGroup(context, policy=policy, rules=rules) else: diff --git a/nova/conf/scheduler.py b/nova/conf/scheduler.py index a7dcf3f943c..8652c34e3cd 100644 --- a/nova/conf/scheduler.py +++ b/nova/conf/scheduler.py @@ -330,6 +330,8 @@ "ImagePropertiesFilter", "ServerGroupAntiAffinityFilter", "ServerGroupAffinityFilter", + "NetworkGroupAffinityFilter", + "NetworkGroupAntiAffinityFilter", ], help=""" Filters that the scheduler will use. diff --git a/nova/objects/instance_group.py b/nova/objects/instance_group.py index e1156196a66..2b638302623 100644 --- a/nova/objects/instance_group.py +++ b/nova/objects/instance_group.py @@ -157,6 +157,8 @@ def rules(self): if 'max_server_per_host' in self._rules: rules['max_server_per_host'] = \ int(self._rules['max_server_per_host']) + if 'network_group' in self._rules: + rules['network_group'] = self._rules['network_group'] return rules def obj_make_compatible(self, primitive, target_version): diff --git a/nova/scheduler/filters/network_group_filter.py b/nova/scheduler/filters/network_group_filter.py new file mode 100644 index 00000000000..e717b06e03a --- /dev/null +++ b/nova/scheduler/filters/network_group_filter.py @@ -0,0 +1,135 @@ +# Copyright 2025 Rackspace Technology, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +"""Scheduler filters for network group affinity and anti-affinity. + +These filters constrain instance placement based on the physical network +group (VLAN group / cabinet switch pair) that an Ironic node belongs to. + +The network group is specified in a server group's ``rules`` field at +creation time and is matched against ``CUSTOM_NETGROUP_*`` traits reported +by Ironic nodes via the Placement service. +""" + +from oslo_log import log as logging + +from nova.scheduler import filters + +LOG = logging.getLogger(__name__) + +# Prefix used when converting a network group name to a trait. +# Example: "a1-1-network" -> "CUSTOM_NETGROUP_A1_1_NETWORK" +_TRAIT_PREFIX = "CUSTOM_NETGROUP_" + + +def _network_group_to_trait(network_group): + """Convert a network group name to its corresponding Placement trait. + + :param network_group: The network group name (e.g. "a1-1-network") + :returns: The trait string (e.g. "CUSTOM_NETGROUP_A1_1_NETWORK") + """ + normalised = network_group.upper().replace("-", "_").replace("/", "_") + return _TRAIT_PREFIX + normalised + + +class NetworkGroupAffinityFilter(filters.BaseHostFilter): + """Schedule instances onto hosts within a specific network group. + + When a server group has the ``network-group-affinity`` policy and a + ``network_group`` rule, this filter only passes hosts whose reported + traits include the matching ``CUSTOM_NETGROUP_*`` trait. + + Hosts without the required trait are rejected. + """ + + # The trait set of a host does not change within a single scheduling + # request. + run_filter_once_per_request = True + + RUN_ON_REBUILD = False + + def host_passes(self, host_state, spec_obj): + instance_group = spec_obj.instance_group + if not instance_group: + return True + + policy = instance_group.policy if instance_group else None + if policy != 'network-group-affinity': + return True + + rules = instance_group.rules + network_group = rules.get('network_group') if rules else None + if not network_group: + return True + + required_trait = _network_group_to_trait(network_group) + + host_traits = set() + if hasattr(host_state, 'traits'): + host_traits = host_state.traits + + passes = required_trait in host_traits + if not passes: + LOG.debug( + "NetworkGroupAffinityFilter: host %(host)s rejected. " + "Required trait %(trait)s not found in host traits.", + {'host': host_state.host, 'trait': required_trait}) + return passes + + +class NetworkGroupAntiAffinityFilter(filters.BaseHostFilter): + """Schedule instances onto hosts NOT within a specific network group. + + When a server group has the ``network-group-anti-affinity`` policy and + a ``network_group`` rule, this filter rejects hosts whose reported + traits include the matching ``CUSTOM_NETGROUP_*`` trait. + + This is useful for spreading workloads across cabinets or ensuring + instances avoid a particular switch pair. + """ + + # The trait set of a host does not change within a single scheduling + # request. + run_filter_once_per_request = True + + RUN_ON_REBUILD = False + + def host_passes(self, host_state, spec_obj): + instance_group = spec_obj.instance_group + if not instance_group: + return True + + policy = instance_group.policy if instance_group else None + if policy != 'network-group-anti-affinity': + return True + + rules = instance_group.rules + network_group = rules.get('network_group') if rules else None + if not network_group: + return True + + excluded_trait = _network_group_to_trait(network_group) + + host_traits = set() + if hasattr(host_state, 'traits'): + host_traits = host_state.traits + + passes = excluded_trait not in host_traits + if not passes: + LOG.debug( + "NetworkGroupAntiAffinityFilter: host %(host)s rejected. " + "Excluded trait %(trait)s found in host traits.", + {'host': host_state.host, 'trait': excluded_trait}) + return passes diff --git a/nova/scheduler/utils.py b/nova/scheduler/utils.py index e8d832574e1..f7c3d3779d9 100644 --- a/nova/scheduler/utils.py +++ b/nova/scheduler/utils.py @@ -1210,7 +1210,8 @@ def _get_group_details(context, instance_uuid, user_group_hosts=None): return policies = set(('anti-affinity', 'affinity', 'soft-affinity', - 'soft-anti-affinity')) + 'soft-anti-affinity', 'network-group-affinity', + 'network-group-anti-affinity')) if group.policy in policies: if not _SUPPORTS_AFFINITY and 'affinity' == group.policy: msg = _("ServerGroupAffinityFilter not configured") diff --git a/nova/tests/unit/scheduler/filters/test_network_group_filter.py b/nova/tests/unit/scheduler/filters/test_network_group_filter.py new file mode 100644 index 00000000000..b25bc3b929f --- /dev/null +++ b/nova/tests/unit/scheduler/filters/test_network_group_filter.py @@ -0,0 +1,187 @@ +# Copyright 2025 Rackspace Technology, Inc. +# All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); you may +# not use this file except in compliance with the License. You may obtain +# a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +# WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +# License for the specific language governing permissions and limitations +# under the License. + +from unittest import mock + +from nova import objects +from nova.scheduler.filters import network_group_filter +from nova import test +from nova.tests.unit.scheduler import fakes + + +class TestNetworkGroupAffinityFilter(test.NoDBTestCase): + + def setUp(self): + super(TestNetworkGroupAffinityFilter, self).setUp() + self.filt_cls = network_group_filter.NetworkGroupAffinityFilter() + + def _make_host_state(self, host, traits=None): + host_state = fakes.FakeHostState(host, 'node', {}) + host_state.traits = traits or set() + return host_state + + def _make_spec_obj(self, policy=None, rules=None): + spec_obj = mock.Mock() + if policy: + instance_group = objects.InstanceGroup( + policy=policy, + _rules=rules or {}, + ) + spec_obj.instance_group = instance_group + else: + spec_obj.instance_group = None + return spec_obj + + def test_passes_no_instance_group(self): + host_state = self._make_host_state('host1') + spec_obj = self._make_spec_obj() + self.assertTrue(self.filt_cls.host_passes(host_state, spec_obj)) + + def test_passes_different_policy(self): + host_state = self._make_host_state('host1') + spec_obj = self._make_spec_obj(policy='anti-affinity') + self.assertTrue(self.filt_cls.host_passes(host_state, spec_obj)) + + def test_passes_no_network_group_rule(self): + host_state = self._make_host_state('host1') + spec_obj = self._make_spec_obj( + policy='network-group-affinity', rules={}) + self.assertTrue(self.filt_cls.host_passes(host_state, spec_obj)) + + def test_passes_host_has_matching_trait(self): + host_state = self._make_host_state( + 'host1', + traits={'CUSTOM_NETGROUP_A1_1_NETWORK', 'CUSTOM_OTHER'}) + spec_obj = self._make_spec_obj( + policy='network-group-affinity', + rules={'network_group': 'a1-1-network'}) + self.assertTrue(self.filt_cls.host_passes(host_state, spec_obj)) + + def test_fails_host_missing_trait(self): + host_state = self._make_host_state( + 'host1', + traits={'CUSTOM_NETGROUP_B2_3_NETWORK', 'CUSTOM_OTHER'}) + spec_obj = self._make_spec_obj( + policy='network-group-affinity', + rules={'network_group': 'a1-1-network'}) + self.assertFalse(self.filt_cls.host_passes(host_state, spec_obj)) + + def test_fails_host_no_traits(self): + host_state = self._make_host_state('host1', traits=set()) + spec_obj = self._make_spec_obj( + policy='network-group-affinity', + rules={'network_group': 'a1-1-network'}) + self.assertFalse(self.filt_cls.host_passes(host_state, spec_obj)) + + def test_passes_cross_rack_network_group(self): + """Test VLAN groups that span paired racks (slash in name).""" + host_state = self._make_host_state( + 'host1', + traits={'CUSTOM_NETGROUP_A11_12_A11_13_NETWORK'}) + spec_obj = self._make_spec_obj( + policy='network-group-affinity', + rules={'network_group': 'a11-12/a11-13-network'}) + self.assertTrue(self.filt_cls.host_passes(host_state, spec_obj)) + + def test_trait_conversion(self): + """Verify the name-to-trait conversion logic.""" + self.assertEqual( + 'CUSTOM_NETGROUP_A1_1_NETWORK', + network_group_filter._network_group_to_trait('a1-1-network')) + self.assertEqual( + 'CUSTOM_NETGROUP_A11_12_A11_13_NETWORK', + network_group_filter._network_group_to_trait( + 'a11-12/a11-13-network')) + self.assertEqual( + 'CUSTOM_NETGROUP_F20_1_NETWORK', + network_group_filter._network_group_to_trait('f20-1-network')) + + +class TestNetworkGroupAntiAffinityFilter(test.NoDBTestCase): + + def setUp(self): + super(TestNetworkGroupAntiAffinityFilter, self).setUp() + self.filt_cls = network_group_filter.NetworkGroupAntiAffinityFilter() + + def _make_host_state(self, host, traits=None): + host_state = fakes.FakeHostState(host, 'node', {}) + host_state.traits = traits or set() + return host_state + + def _make_spec_obj(self, policy=None, rules=None): + spec_obj = mock.Mock() + if policy: + instance_group = objects.InstanceGroup( + policy=policy, + _rules=rules or {}, + ) + spec_obj.instance_group = instance_group + else: + spec_obj.instance_group = None + return spec_obj + + def test_passes_no_instance_group(self): + host_state = self._make_host_state('host1') + spec_obj = self._make_spec_obj() + self.assertTrue(self.filt_cls.host_passes(host_state, spec_obj)) + + def test_passes_different_policy(self): + host_state = self._make_host_state('host1') + spec_obj = self._make_spec_obj(policy='affinity') + self.assertTrue(self.filt_cls.host_passes(host_state, spec_obj)) + + def test_passes_no_network_group_rule(self): + host_state = self._make_host_state('host1') + spec_obj = self._make_spec_obj( + policy='network-group-anti-affinity', rules={}) + self.assertTrue(self.filt_cls.host_passes(host_state, spec_obj)) + + def test_passes_host_does_not_have_excluded_trait(self): + host_state = self._make_host_state( + 'host1', + traits={'CUSTOM_NETGROUP_B2_3_NETWORK', 'CUSTOM_OTHER'}) + spec_obj = self._make_spec_obj( + policy='network-group-anti-affinity', + rules={'network_group': 'a1-1-network'}) + self.assertTrue(self.filt_cls.host_passes(host_state, spec_obj)) + + def test_fails_host_has_excluded_trait(self): + host_state = self._make_host_state( + 'host1', + traits={'CUSTOM_NETGROUP_A1_1_NETWORK', 'CUSTOM_OTHER'}) + spec_obj = self._make_spec_obj( + policy='network-group-anti-affinity', + rules={'network_group': 'a1-1-network'}) + self.assertFalse(self.filt_cls.host_passes(host_state, spec_obj)) + + def test_passes_host_no_traits(self): + """Hosts with no traits pass anti-affinity (they're not in any + network group). + """ + host_state = self._make_host_state('host1', traits=set()) + spec_obj = self._make_spec_obj( + policy='network-group-anti-affinity', + rules={'network_group': 'a1-1-network'}) + self.assertTrue(self.filt_cls.host_passes(host_state, spec_obj)) + + def test_fails_cross_rack_network_group(self): + """Test anti-affinity with cross-rack VLAN group name.""" + host_state = self._make_host_state( + 'host1', + traits={'CUSTOM_NETGROUP_A11_12_A11_13_NETWORK'}) + spec_obj = self._make_spec_obj( + policy='network-group-anti-affinity', + rules={'network_group': 'a11-12/a11-13-network'}) + self.assertFalse(self.filt_cls.host_passes(host_state, spec_obj))