From bf46c66bef823f809cf7a6adc1fd711aa42535ee Mon Sep 17 00:00:00 2001 From: vr4manta Date: Mon, 15 Jun 2026 09:23:43 -0400 Subject: [PATCH] Added new VAP for vSphere infra validation against current cpms and machines --- .../0000_30_machine-api-operator_09_rbac.yaml | 16 + pkg/operator/operator.go | 4 +- pkg/operator/operator_test.go | 35 +- pkg/operator/sync.go | 52 +++ pkg/operator/sync_test.go | 93 +++++ pkg/webhooks/vap.go | 357 ++++++++++++++++++ pkg/webhooks/vap_test.go | 260 +++++++++++++ 7 files changed, 804 insertions(+), 13 deletions(-) create mode 100644 pkg/webhooks/vap.go create mode 100644 pkg/webhooks/vap_test.go diff --git a/install/0000_30_machine-api-operator_09_rbac.yaml b/install/0000_30_machine-api-operator_09_rbac.yaml index e70deb082..4e44ad6b2 100644 --- a/install/0000_30_machine-api-operator_09_rbac.yaml +++ b/install/0000_30_machine-api-operator_09_rbac.yaml @@ -472,6 +472,8 @@ rules: resources: - validatingwebhookconfigurations - mutatingwebhookconfigurations + - validatingadmissionpolicies + - validatingadmissionpolicybindings verbs: - get - list @@ -479,6 +481,20 @@ rules: - create - update + # The machine-api-operator service account must be able to read Machines, + # ControlPlaneMachineSets, and MachineSets so the kube-apiserver can resolve + # them as VAP params when validating Infrastructure/cluster updates. + - apiGroups: + - machine.openshift.io + resources: + - machines + - controlplanemachinesets + - machinesets + verbs: + - get + - list + - watch + --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole diff --git a/pkg/operator/operator.go b/pkg/operator/operator.go index f5886a08d..52781ffd7 100644 --- a/pkg/operator/operator.go +++ b/pkg/operator/operator.go @@ -80,6 +80,7 @@ type Operator struct { mutatingWebhookListerSynced cache.InformerSynced featureGateAccessor featuregates.FeatureGateAccess + featureGates featuregates.FeatureGate // queue only ever has one item, but it has nice error handling backoff/retry semantics queue workqueue.TypedRateLimitingInterface[string] @@ -167,6 +168,7 @@ func New( recorder, ) featureGateAccessor.SetChangeHandler(func(featureChange featuregates.FeatureChange) { + optr.featureGates = featuregates.NewFeatureGate(featureChange.New.Enabled, featureChange.New.Disabled) if featureChange.Previous == nil { // When the initial featuregate is set, the previous version is nil. // Nothing to do in this case, it's handled by the 1st sync, which only runs after the initial feature set was received. @@ -176,7 +178,7 @@ func New( klog.V(4).InfoS("FeatureGates changed", "enabled", featureChange.New.Enabled, "disabled", featureChange.New.Disabled) prevDisableMHC := featuregates.NewFeatureGate(featureChange.Previous.Enabled, featureChange.Previous.Disabled). Enabled(apifeatures.FeatureGateMachineAPIOperatorDisableMachineHealthCheckController) - newDisableMHC := featuregates.NewFeatureGate(featureChange.New.Enabled, featureChange.New.Disabled). + newDisableMHC := optr.featureGates. Enabled(apifeatures.FeatureGateMachineAPIOperatorDisableMachineHealthCheckController) if prevDisableMHC != newDisableMHC { diff --git a/pkg/operator/operator_test.go b/pkg/operator/operator_test.go index 71d74e350..ae05d88ac 100644 --- a/pkg/operator/operator_test.go +++ b/pkg/operator/operator_test.go @@ -48,6 +48,11 @@ var ( {Name: apifeatures.FeatureGateAWSDedicatedHosts}, } + disabledFeatureGates = []openshiftv1.FeatureGateAttributes{ + {Name: apifeatures.FeatureGateMachineAPIOperatorDisableMachineHealthCheckController}, + {Name: apifeatures.FeatureGateVSphereMultiVCenterDay2}, + } + enabledFeatureMap = map[string]bool{ "MachineAPIMigration": true, "AzureWorkloadIdentity": true, @@ -80,7 +85,7 @@ func newFakeOperator(kubeObjects, osObjects, machineObjects []runtime.Object, im { Version: "", Enabled: enabledFeatureGates, - Disabled: []openshiftv1.FeatureGateAttributes{{Name: apifeatures.FeatureGateMachineAPIOperatorDisableMachineHealthCheckController}}, + Disabled: disabledFeatureGates, }, }, }, @@ -91,6 +96,11 @@ func newFakeOperator(kubeObjects, osObjects, machineObjects []runtime.Object, im return nil, fmt.Errorf("error adding event handler to deployments informer: %v", err) } + featureGates, err := featureGateAccessor.CurrentFeatureGates() + if err != nil { + return nil, fmt.Errorf("error getting current feature gates: %v", err) + } + optr := &Operator{ kubeClient: kubeClient, osClient: osClient, @@ -102,6 +112,7 @@ func newFakeOperator(kubeObjects, osObjects, machineObjects []runtime.Object, im mutatingWebhookLister: mutatingWebhookInformer.Lister(), validatingWebhookLister: validatingWebhookInformer.Lister(), featureGateAccessor: featureGateAccessor, + featureGates: featureGates, imagesFile: imagesFile, namespace: targetNamespace, eventRecorder: record.NewFakeRecorder(50), @@ -387,7 +398,7 @@ func TestMAOConfigFromInfrastructure(t *testing.T) { { Version: "", Enabled: enabledFeatureGates, - Disabled: []openshiftv1.FeatureGateAttributes{{Name: apifeatures.FeatureGateMachineAPIOperatorDisableMachineHealthCheckController}}, + Disabled: disabledFeatureGates, }, }, }, @@ -423,7 +434,7 @@ func TestMAOConfigFromInfrastructure(t *testing.T) { { Version: "", Enabled: enabledFeatureGates, - Disabled: []openshiftv1.FeatureGateAttributes{{Name: apifeatures.FeatureGateMachineAPIOperatorDisableMachineHealthCheckController}}, + Disabled: disabledFeatureGates, }, }, }, @@ -459,7 +470,7 @@ func TestMAOConfigFromInfrastructure(t *testing.T) { { Version: "", Enabled: enabledFeatureGates, - Disabled: []openshiftv1.FeatureGateAttributes{{Name: apifeatures.FeatureGateMachineAPIOperatorDisableMachineHealthCheckController}}, + Disabled: disabledFeatureGates, }, }, }, @@ -495,7 +506,7 @@ func TestMAOConfigFromInfrastructure(t *testing.T) { { Version: "", Enabled: enabledFeatureGates, - Disabled: []openshiftv1.FeatureGateAttributes{{Name: apifeatures.FeatureGateMachineAPIOperatorDisableMachineHealthCheckController}}, + Disabled: disabledFeatureGates, }, }, }, @@ -531,7 +542,7 @@ func TestMAOConfigFromInfrastructure(t *testing.T) { { Version: "", Enabled: enabledFeatureGates, - Disabled: []openshiftv1.FeatureGateAttributes{{Name: apifeatures.FeatureGateMachineAPIOperatorDisableMachineHealthCheckController}}, + Disabled: disabledFeatureGates, }, }, }, @@ -567,7 +578,7 @@ func TestMAOConfigFromInfrastructure(t *testing.T) { { Version: "", Enabled: enabledFeatureGates, - Disabled: []openshiftv1.FeatureGateAttributes{{Name: apifeatures.FeatureGateMachineAPIOperatorDisableMachineHealthCheckController}}, + Disabled: disabledFeatureGates, }, }, }, @@ -603,7 +614,7 @@ func TestMAOConfigFromInfrastructure(t *testing.T) { { Version: "", Enabled: enabledFeatureGates, - Disabled: []openshiftv1.FeatureGateAttributes{{Name: apifeatures.FeatureGateMachineAPIOperatorDisableMachineHealthCheckController}}, + Disabled: disabledFeatureGates, }, }, }, @@ -639,7 +650,7 @@ func TestMAOConfigFromInfrastructure(t *testing.T) { { Version: "", Enabled: enabledFeatureGates, - Disabled: []openshiftv1.FeatureGateAttributes{{Name: apifeatures.FeatureGateMachineAPIOperatorDisableMachineHealthCheckController}}, + Disabled: disabledFeatureGates, }, }, }, @@ -677,7 +688,7 @@ func TestMAOConfigFromInfrastructure(t *testing.T) { { Version: "", Enabled: enabledFeatureGates, - Disabled: []openshiftv1.FeatureGateAttributes{{Name: apifeatures.FeatureGateMachineAPIOperatorDisableMachineHealthCheckController}}, + Disabled: disabledFeatureGates, }, }, }, @@ -713,7 +724,7 @@ func TestMAOConfigFromInfrastructure(t *testing.T) { { Version: "", Enabled: append(enabledFeatureGates, openshiftv1.FeatureGateAttributes{Name: apifeatures.FeatureGateMachineAPIOperatorDisableMachineHealthCheckController}), - Disabled: []openshiftv1.FeatureGateAttributes{}, + Disabled: []openshiftv1.FeatureGateAttributes{{Name: apifeatures.FeatureGateVSphereMultiVCenterDay2}}, }, }, }, @@ -749,7 +760,7 @@ func TestMAOConfigFromInfrastructure(t *testing.T) { { Version: "", Enabled: enabledFeatureGates, - Disabled: []openshiftv1.FeatureGateAttributes{{Name: apifeatures.FeatureGateMachineAPIOperatorDisableMachineHealthCheckController}}, + Disabled: disabledFeatureGates, }, }, }, diff --git a/pkg/operator/sync.go b/pkg/operator/sync.go index 986f86d91..631468f8d 100644 --- a/pkg/operator/sync.go +++ b/pkg/operator/sync.go @@ -23,6 +23,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/reconcile" configv1 "github.com/openshift/api/config/v1" + apifeatures "github.com/openshift/api/features" machinev1beta1 "github.com/openshift/api/machine/v1beta1" utiltls "github.com/openshift/controller-runtime-common/pkg/tls" libgocrypto "github.com/openshift/library-go/pkg/crypto" @@ -268,6 +269,57 @@ func (optr *Operator) syncWebhookConfiguration(config *OperatorConfig) error { return err } } + if config.PlatformType == configv1.VSpherePlatformType { + if err := optr.syncVSphereFailureDomainVAPs(); err != nil { + return err + } + } + return nil +} + +// syncVSphereFailureDomainVAPs ensures that the ValidatingAdmissionPolicies and their +// bindings for protecting vSphere failure domains are present and up to date. +// These policies prevent an administrator from removing a failure domain from the +// Infrastructure/cluster CR while it is still referenced by Machines or +// ControlPlaneMachineSets managed by the Machine API Operator. +func (optr *Operator) syncVSphereFailureDomainVAPs() error { + if !optr.featureGates.Enabled(apifeatures.FeatureGateVSphereMultiVCenterDay2) { + return nil + } + + recorder := events.NewLoggingEventRecorder(optr.name, clock.RealClock{}) + + if _, _, err := resourceapply.ApplyValidatingAdmissionPolicyV1(context.TODO(), + optr.kubeClient.AdmissionregistrationV1(), recorder, + mapiwebhooks.NewVSphereFailureDomainMachineVAP(), optr.cache); err != nil { + return fmt.Errorf("failed to apply vSphere failure domain Machine ValidatingAdmissionPolicy: %w", err) + } + if _, _, err := resourceapply.ApplyValidatingAdmissionPolicyBindingV1(context.TODO(), + optr.kubeClient.AdmissionregistrationV1(), recorder, + mapiwebhooks.NewVSphereFailureDomainMachineVAPBinding(), optr.cache); err != nil { + return fmt.Errorf("failed to apply vSphere failure domain Machine ValidatingAdmissionPolicyBinding: %w", err) + } + if _, _, err := resourceapply.ApplyValidatingAdmissionPolicyV1(context.TODO(), + optr.kubeClient.AdmissionregistrationV1(), recorder, + mapiwebhooks.NewVSphereFailureDomainCPMSVAP(), optr.cache); err != nil { + return fmt.Errorf("failed to apply vSphere failure domain ControlPlaneMachineSet ValidatingAdmissionPolicy: %w", err) + } + if _, _, err := resourceapply.ApplyValidatingAdmissionPolicyBindingV1(context.TODO(), + optr.kubeClient.AdmissionregistrationV1(), recorder, + mapiwebhooks.NewVSphereFailureDomainCPMSVAPBinding(), optr.cache); err != nil { + return fmt.Errorf("failed to apply vSphere failure domain ControlPlaneMachineSet ValidatingAdmissionPolicyBinding: %w", err) + } + if _, _, err := resourceapply.ApplyValidatingAdmissionPolicyV1(context.TODO(), + optr.kubeClient.AdmissionregistrationV1(), recorder, + mapiwebhooks.NewVSphereFailureDomainMachineSetVAP(), optr.cache); err != nil { + return fmt.Errorf("failed to apply vSphere failure domain MachineSet ValidatingAdmissionPolicy: %w", err) + } + if _, _, err := resourceapply.ApplyValidatingAdmissionPolicyBindingV1(context.TODO(), + optr.kubeClient.AdmissionregistrationV1(), recorder, + mapiwebhooks.NewVSphereFailureDomainMachineSetVAPBinding(), optr.cache); err != nil { + return fmt.Errorf("failed to apply vSphere failure domain MachineSet ValidatingAdmissionPolicyBinding: %w", err) + } + return nil } diff --git a/pkg/operator/sync_test.go b/pkg/operator/sync_test.go index cabc8b424..3066eeb22 100644 --- a/pkg/operator/sync_test.go +++ b/pkg/operator/sync_test.go @@ -9,6 +9,7 @@ import ( . "github.com/onsi/gomega" configv1 "github.com/openshift/api/config/v1" + apifeatures "github.com/openshift/api/features" machinev1beta1 "github.com/openshift/api/machine/v1beta1" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" @@ -506,6 +507,98 @@ func TestSyncWebhookConfiguration(t *testing.T) { } } +func TestSyncVSphereFailureDomainVAPs(t *testing.T) { + vsphereMultiVCenterDay2Enabled := &configv1.FeatureGate{ + ObjectMeta: metav1.ObjectMeta{Name: "cluster"}, + Status: configv1.FeatureGateStatus{ + FeatureGates: []configv1.FeatureGateDetails{ + { + Enabled: append(enabledFeatureGates, configv1.FeatureGateAttributes{Name: apifeatures.FeatureGateVSphereMultiVCenterDay2}), + Disabled: []configv1.FeatureGateAttributes{{Name: apifeatures.FeatureGateMachineAPIOperatorDisableMachineHealthCheckController}}, + }, + }, + }, + } + + testCases := []struct { + name string + platformType configv1.PlatformType + featureGate *configv1.FeatureGate + expectedNrValidatingAdmPolicies int + expectedNrValidatingAdmPBindings int + expectedPolicyNames []string + }{ + { + name: "no VAPs on non-vSphere platform", + platformType: configv1.AWSPlatformType, + expectedNrValidatingAdmPolicies: 0, + expectedNrValidatingAdmPBindings: 0, + }, + { + name: "no VAPs on vSphere platform when VSphereMultiVCenterDay2 is disabled", + platformType: configv1.VSpherePlatformType, + expectedNrValidatingAdmPolicies: 0, + expectedNrValidatingAdmPBindings: 0, + }, + { + name: "three VAPs and three bindings on vSphere platform when VSphereMultiVCenterDay2 is enabled", + platformType: configv1.VSpherePlatformType, + featureGate: vsphereMultiVCenterDay2Enabled, + expectedNrValidatingAdmPolicies: 3, + expectedNrValidatingAdmPBindings: 3, + expectedPolicyNames: []string{ + "vsphere-failure-domain-in-use-by-machine", + "vsphere-failure-domain-in-use-by-cpms", + "vsphere-failure-domain-in-use-by-machineset", + }, + }, + } + + for _, tc := range testCases { + t.Run(tc.name, func(t *testing.T) { + g := NewWithT(t) + + stopCh := make(chan struct{}) + defer close(stopCh) + optr, err := newFakeOperator(nil, nil, nil, "", tc.featureGate, stopCh) + if err != nil { + t.Fatal(err) + } + + err = optr.syncWebhookConfiguration(&OperatorConfig{PlatformType: tc.platformType}) + g.Expect(err).ToNot(HaveOccurred()) + + policies, err := optr.kubeClient.AdmissionregistrationV1(). + ValidatingAdmissionPolicies().List(t.Context(), metav1.ListOptions{}) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(policies.Items).To(HaveLen(tc.expectedNrValidatingAdmPolicies), + "unexpected number of ValidatingAdmissionPolicies") + + bindings, err := optr.kubeClient.AdmissionregistrationV1(). + ValidatingAdmissionPolicyBindings().List(t.Context(), metav1.ListOptions{}) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(bindings.Items).To(HaveLen(tc.expectedNrValidatingAdmPBindings), + "unexpected number of ValidatingAdmissionPolicyBindings") + + if len(tc.expectedPolicyNames) > 0 { + policyNames := make([]string, 0, len(policies.Items)) + for _, p := range policies.Items { + policyNames = append(policyNames, p.Name) + } + g.Expect(policyNames).To(ConsistOf(tc.expectedPolicyNames), + "unexpected ValidatingAdmissionPolicy names") + + bindingPolicyRefs := make([]string, 0, len(bindings.Items)) + for _, b := range bindings.Items { + bindingPolicyRefs = append(bindingPolicyRefs, b.Spec.PolicyName) + } + g.Expect(bindingPolicyRefs).To(ConsistOf(tc.expectedPolicyNames), + "binding PolicyNames must reference the created policies") + } + }) + } +} + func TestCheckDaemonSetRolloutStatus(t *testing.T) { testCases := []struct { name string diff --git a/pkg/webhooks/vap.go b/pkg/webhooks/vap.go new file mode 100644 index 000000000..e059347ee --- /dev/null +++ b/pkg/webhooks/vap.go @@ -0,0 +1,357 @@ +package webhooks + +import ( + admissionregistrationv1 "k8s.io/api/admissionregistration/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" +) + +const ( + // VAPMachineFailureDomainName is the name of the ValidatingAdmissionPolicy that guards + // against removing a vSphere failure domain that is still referenced by a Machine. + VAPMachineFailureDomainName = "vsphere-failure-domain-in-use-by-machine" + + // VAPCPMSFailureDomainName is the name of the ValidatingAdmissionPolicy that guards + // against removing a vSphere failure domain that is still referenced by a ControlPlaneMachineSet. + VAPCPMSFailureDomainName = "vsphere-failure-domain-in-use-by-cpms" + + // VAPMachineSetFailureDomainName is the name of the ValidatingAdmissionPolicy that guards + // against removing a vSphere failure domain that is still referenced by a MachineSet (including + // MachineSets with zero replicas that would have no running Machines to catch the check). + VAPMachineSetFailureDomainName = "vsphere-failure-domain-in-use-by-machineset" + + // machineRegionLabel is the label used to identify the region a Machine belongs to. + machineRegionLabel = "machine.openshift.io/region" + + // machineZoneLabel is the label used to identify the availability zone a Machine belongs to. + machineZoneLabel = "machine.openshift.io/zone" + + // vspherePlatformType is the platform type string as used in infrastructure.spec.platformSpec.type. + vspherePlatformType = "VSphere" + + // openMachineAPINamespace is the namespace where Machines and CPMS live. + openMachineAPINamespace = "openshift-machine-api" +) + +var ( + // vapDenyAction is the enforcement action that denies the admission request. + vapDenyAction = admissionregistrationv1.Deny + + // vapParamNotFoundAllow means: if no param object exists (e.g. no Machines yet), allow the infra update. + vapParamNotFoundAllow = admissionregistrationv1.AllowAction +) + +// NewVSphereFailureDomainMachineVAP returns a ValidatingAdmissionPolicy that prevents +// an infrastructure/cluster UPDATE from removing a vSphere failure domain that is still +// referenced by at least one Machine (identified via machine.openshift.io/region and +// machine.openshift.io/zone labels). +// +// The policy fires on every UPDATE of infrastructures.config.openshift.io. It is evaluated +// once per Machine that exists in the openshift-machine-api namespace (param binding). +// If any Machine's region+zone labels match a failure domain being removed, admission is denied. +func NewVSphereFailureDomainMachineVAP() *admissionregistrationv1.ValidatingAdmissionPolicy { + failurePolicy := admissionregistrationv1.Fail + + return &admissionregistrationv1.ValidatingAdmissionPolicy{ + ObjectMeta: metav1.ObjectMeta{ + Name: VAPMachineFailureDomainName, + }, + Spec: admissionregistrationv1.ValidatingAdmissionPolicySpec{ + // The param object is a Machine (one evaluation per Machine). + ParamKind: &admissionregistrationv1.ParamKind{ + APIVersion: "machine.openshift.io/v1beta1", + Kind: "Machine", + }, + // Fire on UPDATE of the Infrastructure CR only. + MatchConstraints: &admissionregistrationv1.MatchResources{ + ResourceRules: []admissionregistrationv1.NamedRuleWithOperations{ + { + RuleWithOperations: admissionregistrationv1.RuleWithOperations{ + Rule: admissionregistrationv1.Rule{ + APIGroups: []string{"config.openshift.io"}, + APIVersions: []string{"v1"}, + Resources: []string{"infrastructures"}, + }, + Operations: []admissionregistrationv1.OperationType{ + admissionregistrationv1.Update, + }, + }, + }, + }, + }, + // Only evaluate when the cluster is running on vSphere. + MatchConditions: []admissionregistrationv1.MatchCondition{ + { + Name: "is-vsphere-platform", + Expression: `object.?spec.platformSpec.type.orValue("") == "` + vspherePlatformType + `"`, + }, + }, + // Reusable sub-expressions. + Variables: []admissionregistrationv1.Variable{ + { + // fds: the failure domains list from the incoming (updated) Infrastructure spec. + Name: "fds", + Expression: `object.?spec.platformSpec.vsphere.failureDomains.orValue([])`, + }, + { + // machineRegion: the region label of the Machine param (empty string if absent). + Name: "machineRegion", + Expression: `params.?metadata.labels["` + machineRegionLabel + `"].orValue("")`, + }, + { + // machineZone: the zone label of the Machine param (empty string if absent). + Name: "machineZone", + Expression: `params.?metadata.labels["` + machineZoneLabel + `"].orValue("")`, + }, + }, + // Core validation: the Machine's region+zone must still exist in the updated infra spec. + Validations: []admissionregistrationv1.Validation{ + { + // Pass when: + // - Machine has no region/zone label (not a failure-domain-managed Machine), OR + // - The failure domain is still present in the incoming spec. + Expression: `variables.machineRegion == "" || variables.machineZone == "" || +variables.fds.exists(fd, + fd.region == variables.machineRegion && fd.zone == variables.machineZone +)`, + MessageExpression: `"Infrastructure update would remove vSphere failure domain (region=" + variables.machineRegion + ", zone=" + variables.machineZone + ") that is still in use by Machine '" + params.metadata.name + "'"`, + Reason: ptr.To(metav1.StatusReasonInvalid), + }, + }, + // Hard fail if the policy itself errors (e.g. param parse failure). + FailurePolicy: &failurePolicy, + }, + } +} + +// NewVSphereFailureDomainMachineVAPBinding returns the ValidatingAdmissionPolicyBinding that +// connects the Machine VAP to all Machines in the openshift-machine-api namespace. +// The policy is evaluated once per Machine (param). +func NewVSphereFailureDomainMachineVAPBinding() *admissionregistrationv1.ValidatingAdmissionPolicyBinding { + return &admissionregistrationv1.ValidatingAdmissionPolicyBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: VAPMachineFailureDomainName, + }, + Spec: admissionregistrationv1.ValidatingAdmissionPolicyBindingSpec{ + // Reference the policy defined above. + PolicyName: VAPMachineFailureDomainName, + // Param: iterate over all Machines in openshift-machine-api. + ParamRef: &admissionregistrationv1.ParamRef{ + // Empty selector matches all Machines. + Selector: &metav1.LabelSelector{}, + Namespace: openMachineAPINamespace, + ParameterNotFoundAction: &vapParamNotFoundAllow, + }, + // Deny if any validation fails. + ValidationActions: []admissionregistrationv1.ValidationAction{ + vapDenyAction, + }, + }, + } +} + +// NewVSphereFailureDomainCPMSVAP returns a ValidatingAdmissionPolicy that prevents +// an infrastructure/cluster UPDATE from removing a vSphere failure domain that is still +// referenced by a ControlPlaneMachineSet (CPMS) by failure domain name. +// +// The CPMS references failure domains by the Name field of VSpherePlatformFailureDomainSpec. +// The policy fires on every UPDATE of infrastructures.config.openshift.io and is evaluated +// once per ControlPlaneMachineSet in the openshift-machine-api namespace. +func NewVSphereFailureDomainCPMSVAP() *admissionregistrationv1.ValidatingAdmissionPolicy { + failurePolicy := admissionregistrationv1.Fail + + return &admissionregistrationv1.ValidatingAdmissionPolicy{ + ObjectMeta: metav1.ObjectMeta{ + Name: VAPCPMSFailureDomainName, + }, + Spec: admissionregistrationv1.ValidatingAdmissionPolicySpec{ + // The param object is a ControlPlaneMachineSet. + ParamKind: &admissionregistrationv1.ParamKind{ + APIVersion: "machine.openshift.io/v1", + Kind: "ControlPlaneMachineSet", + }, + // Fire on UPDATE of the Infrastructure CR only. + MatchConstraints: &admissionregistrationv1.MatchResources{ + ResourceRules: []admissionregistrationv1.NamedRuleWithOperations{ + { + RuleWithOperations: admissionregistrationv1.RuleWithOperations{ + Rule: admissionregistrationv1.Rule{ + APIGroups: []string{"config.openshift.io"}, + APIVersions: []string{"v1"}, + Resources: []string{"infrastructures"}, + }, + Operations: []admissionregistrationv1.OperationType{ + admissionregistrationv1.Update, + }, + }, + }, + }, + }, + // Only evaluate when the cluster is running on vSphere. + MatchConditions: []admissionregistrationv1.MatchCondition{ + { + Name: "is-vsphere-platform", + Expression: `object.?spec.platformSpec.type.orValue("") == "` + vspherePlatformType + `"`, + }, + }, + // Reusable sub-expressions. + Variables: []admissionregistrationv1.Variable{ + { + // fds: failure domains from the incoming (updated) Infrastructure spec. + Name: "fds", + Expression: `object.?spec.platformSpec.vsphere.failureDomains.orValue([])`, + }, + { + // cpmsFDs: the list of vSphere failure domain names referenced by the CPMS param. + // The CPMS template field path is: + // spec.template.machines_v1beta1_machine_openshift_io.failureDomains.vsphere[*].name + Name: "cpmsFDs", + Expression: `(has(params.spec.template.machines_v1beta1_machine_openshift_io) && + has(params.spec.template.machines_v1beta1_machine_openshift_io.failureDomains) && + has(params.spec.template.machines_v1beta1_machine_openshift_io.failureDomains.vsphere)) + ? params.spec.template.machines_v1beta1_machine_openshift_io.failureDomains.vsphere + : []`, + }, + }, + // Core validation: every CPMS failure domain name must still exist in the updated infra spec. + Validations: []admissionregistrationv1.Validation{ + { + // Pass when: + // - CPMS has no vSphere failure domains configured (empty list), OR + // - Every CPMS failure domain name is still present in the incoming infra spec. + Expression: `variables.cpmsFDs.size() == 0 || +variables.cpmsFDs.all(cpmsfd, + variables.fds.exists(infrafd, infrafd.name == cpmsfd.name) +)`, + MessageExpression: `"Infrastructure update would remove vSphere failure domain(s) still referenced by ControlPlaneMachineSet '" + params.metadata.name + "': [" + variables.cpmsFDs.filter(cpmsfd, !variables.fds.exists(infrafd, infrafd.name == cpmsfd.name)).map(cpmsfd, cpmsfd.name).join(", ") + "]"`, + Reason: ptr.To(metav1.StatusReasonInvalid), + }, + }, + // Hard fail if the policy itself errors. + FailurePolicy: &failurePolicy, + }, + } +} + +// NewVSphereFailureDomainCPMSVAPBinding returns the ValidatingAdmissionPolicyBinding that +// connects the CPMS VAP to all ControlPlaneMachineSets in the openshift-machine-api namespace. +func NewVSphereFailureDomainCPMSVAPBinding() *admissionregistrationv1.ValidatingAdmissionPolicyBinding { + return &admissionregistrationv1.ValidatingAdmissionPolicyBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: VAPCPMSFailureDomainName, + }, + Spec: admissionregistrationv1.ValidatingAdmissionPolicyBindingSpec{ + // Reference the CPMS policy. + PolicyName: VAPCPMSFailureDomainName, + // Param: iterate over all ControlPlaneMachineSets in openshift-machine-api. + ParamRef: &admissionregistrationv1.ParamRef{ + Selector: &metav1.LabelSelector{}, + Namespace: openMachineAPINamespace, + ParameterNotFoundAction: &vapParamNotFoundAllow, + }, + // Deny if any validation fails. + ValidationActions: []admissionregistrationv1.ValidationAction{ + vapDenyAction, + }, + }, + } +} + +// NewVSphereFailureDomainMachineSetVAP returns a ValidatingAdmissionPolicy that prevents an +// Infrastructure update from removing a vSphere failure domain that is still referenced by at +// least one MachineSet (identified via machine.openshift.io/region and +// machine.openshift.io/zone labels on the MachineSet template). This covers MachineSets with +// zero replicas, which would otherwise have no running Machines for the Machine VAP to check. +func NewVSphereFailureDomainMachineSetVAP() *admissionregistrationv1.ValidatingAdmissionPolicy { + failurePolicy := admissionregistrationv1.Fail + return &admissionregistrationv1.ValidatingAdmissionPolicy{ + ObjectMeta: metav1.ObjectMeta{ + Name: VAPMachineSetFailureDomainName, + }, + Spec: admissionregistrationv1.ValidatingAdmissionPolicySpec{ + // Param: each evaluation receives one MachineSet as a param object. + ParamKind: &admissionregistrationv1.ParamKind{ + APIVersion: "machine.openshift.io/v1beta1", + Kind: "MachineSet", + }, + // Trigger: UPDATE of the Infrastructure CR only. + MatchConstraints: &admissionregistrationv1.MatchResources{ + ResourceRules: []admissionregistrationv1.NamedRuleWithOperations{ + { + RuleWithOperations: admissionregistrationv1.RuleWithOperations{ + Rule: admissionregistrationv1.Rule{ + APIGroups: []string{"config.openshift.io"}, + APIVersions: []string{"v1"}, + Resources: []string{"infrastructures"}, + }, + Operations: []admissionregistrationv1.OperationType{ + admissionregistrationv1.Update, + }, + }, + }, + }, + }, + // Only evaluate when the cluster is running on vSphere. + MatchConditions: []admissionregistrationv1.MatchCondition{ + { + Name: "is-vsphere-platform", + Expression: `object.?spec.platformSpec.type.orValue("") == "` + vspherePlatformType + `"`, + }, + }, + // Reusable sub-expressions. + Variables: []admissionregistrationv1.Variable{ + { + // fds: the failure domains list from the incoming (updated) Infrastructure spec. + Name: "fds", + Expression: `object.?spec.platformSpec.vsphere.failureDomains.orValue([])`, + }, + { + // msRegion: the region label of the MachineSet template (empty string if absent). + Name: "msRegion", + Expression: `params.?spec.template.metadata.labels["` + machineRegionLabel + `"].orValue("")`, + }, + { + // msZone: the zone label of the MachineSet template (empty string if absent). + Name: "msZone", + Expression: `params.?spec.template.metadata.labels["` + machineZoneLabel + `"].orValue("")`, + }, + }, + // Core validation: the MachineSet template's region+zone must still exist in the updated infra spec. + Validations: []admissionregistrationv1.Validation{ + { + // MachineSet has no region/zone label in its template (not a failure-domain-managed + // MachineSet), OR the failure domain is still present in the incoming spec. + Expression: `variables.msRegion == "" || variables.msZone == "" || variables.fds.exists(fd, fd.region == variables.msRegion && fd.zone == variables.msZone)`, + MessageExpression: `"Infrastructure update would remove vSphere failure domain (region=" + variables.msRegion + ", zone=" + variables.msZone + ") that is still in use by MachineSet '" + params.metadata.name + "'"`, + Reason: ptr.To(metav1.StatusReasonInvalid), + }, + }, + // Hard fail if the VAP itself cannot be evaluated. + FailurePolicy: &failurePolicy, + }, + } +} + +// NewVSphereFailureDomainMachineSetVAPBinding returns a ValidatingAdmissionPolicyBinding that +// connects the MachineSet VAP to all MachineSets in the openshift-machine-api namespace. +func NewVSphereFailureDomainMachineSetVAPBinding() *admissionregistrationv1.ValidatingAdmissionPolicyBinding { + return &admissionregistrationv1.ValidatingAdmissionPolicyBinding{ + ObjectMeta: metav1.ObjectMeta{ + Name: VAPMachineSetFailureDomainName, + }, + Spec: admissionregistrationv1.ValidatingAdmissionPolicyBindingSpec{ + // Reference the MachineSet policy. + PolicyName: VAPMachineSetFailureDomainName, + // Param: iterate over all MachineSets in openshift-machine-api. + ParamRef: &admissionregistrationv1.ParamRef{ + Selector: &metav1.LabelSelector{}, + Namespace: openMachineAPINamespace, + ParameterNotFoundAction: &vapParamNotFoundAllow, + }, + // Deny if any validation fails. + ValidationActions: []admissionregistrationv1.ValidationAction{ + vapDenyAction, + }, + }, + } +} diff --git a/pkg/webhooks/vap_test.go b/pkg/webhooks/vap_test.go new file mode 100644 index 000000000..d6f72b5ff --- /dev/null +++ b/pkg/webhooks/vap_test.go @@ -0,0 +1,260 @@ +package webhooks + +import ( + "testing" + + . "github.com/onsi/gomega" + admissionregistrationv1 "k8s.io/api/admissionregistration/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" +) + +func TestNewVSphereFailureDomainMachineVAP(t *testing.T) { + g := NewWithT(t) + + policy := NewVSphereFailureDomainMachineVAP() + g.Expect(policy).NotTo(BeNil()) + g.Expect(policy.Name).To(Equal(VAPMachineFailureDomainName)) + + spec := policy.Spec + + // ParamKind must reference Machine. + g.Expect(spec.ParamKind).NotTo(BeNil()) + g.Expect(spec.ParamKind.APIVersion).To(Equal("machine.openshift.io/v1beta1")) + g.Expect(spec.ParamKind.Kind).To(Equal("Machine")) + + // Must fire only on UPDATE of infrastructures. + g.Expect(spec.MatchConstraints).NotTo(BeNil()) + g.Expect(spec.MatchConstraints.ResourceRules).To(HaveLen(1)) + rule := spec.MatchConstraints.ResourceRules[0] + g.Expect(rule.APIGroups).To(ConsistOf("config.openshift.io")) + g.Expect(rule.APIVersions).To(ConsistOf("v1")) + g.Expect(rule.Resources).To(ConsistOf("infrastructures")) + g.Expect(rule.Operations).To(ConsistOf(admissionregistrationv1.Update)) + + // Must have the platform match condition. + g.Expect(spec.MatchConditions).To(HaveLen(1)) + g.Expect(spec.MatchConditions[0].Name).To(Equal("is-vsphere-platform")) + g.Expect(spec.MatchConditions[0].Expression).To(ContainSubstring(`"VSphere"`)) + + // Must define the three CEL variables. + varNames := make([]string, 0, len(spec.Variables)) + for _, v := range spec.Variables { + varNames = append(varNames, v.Name) + } + g.Expect(varNames).To(ConsistOf("fds", "machineRegion", "machineZone")) + + // Must have exactly one validation rule. + g.Expect(spec.Validations).To(HaveLen(1)) + validation := spec.Validations[0] + g.Expect(validation.Expression).To(ContainSubstring("variables.machineRegion")) + g.Expect(validation.Expression).To(ContainSubstring("variables.machineZone")) + g.Expect(validation.Expression).To(ContainSubstring("variables.fds.exists")) + g.Expect(validation.MessageExpression).To(ContainSubstring("params.metadata.name")) + g.Expect(validation.Reason).NotTo(BeNil()) + g.Expect(*validation.Reason).To(Equal(metav1.StatusReasonInvalid)) + + // Failure policy must be Fail. + g.Expect(spec.FailurePolicy).NotTo(BeNil()) + g.Expect(*spec.FailurePolicy).To(Equal(admissionregistrationv1.Fail)) +} + +func TestNewVSphereFailureDomainMachineVAPBinding(t *testing.T) { + g := NewWithT(t) + + binding := NewVSphereFailureDomainMachineVAPBinding() + g.Expect(binding).NotTo(BeNil()) + g.Expect(binding.Name).To(Equal(VAPMachineFailureDomainName)) + + spec := binding.Spec + g.Expect(spec.PolicyName).To(Equal(VAPMachineFailureDomainName)) + + // ParamRef must select all Machines in openshift-machine-api. + g.Expect(spec.ParamRef).NotTo(BeNil()) + g.Expect(spec.ParamRef.Namespace).To(Equal(openMachineAPINamespace)) + g.Expect(spec.ParamRef.Selector).To(Equal(&metav1.LabelSelector{})) + g.Expect(spec.ParamRef.ParameterNotFoundAction).NotTo(BeNil()) + g.Expect(*spec.ParamRef.ParameterNotFoundAction).To(Equal(admissionregistrationv1.AllowAction)) + + // Enforcement must be Deny. + g.Expect(spec.ValidationActions).To(ConsistOf(admissionregistrationv1.Deny)) +} + +func TestNewVSphereFailureDomainCPMSVAP(t *testing.T) { + g := NewWithT(t) + + policy := NewVSphereFailureDomainCPMSVAP() + g.Expect(policy).NotTo(BeNil()) + g.Expect(policy.Name).To(Equal(VAPCPMSFailureDomainName)) + + spec := policy.Spec + + // ParamKind must reference ControlPlaneMachineSet. + g.Expect(spec.ParamKind).NotTo(BeNil()) + g.Expect(spec.ParamKind.APIVersion).To(Equal("machine.openshift.io/v1")) + g.Expect(spec.ParamKind.Kind).To(Equal("ControlPlaneMachineSet")) + + // Must fire only on UPDATE of infrastructures. + g.Expect(spec.MatchConstraints).NotTo(BeNil()) + g.Expect(spec.MatchConstraints.ResourceRules).To(HaveLen(1)) + rule := spec.MatchConstraints.ResourceRules[0] + g.Expect(rule.APIGroups).To(ConsistOf("config.openshift.io")) + g.Expect(rule.Resources).To(ConsistOf("infrastructures")) + g.Expect(rule.Operations).To(ConsistOf(admissionregistrationv1.Update)) + + // Must have the platform match condition. + g.Expect(spec.MatchConditions).To(HaveLen(1)) + g.Expect(spec.MatchConditions[0].Name).To(Equal("is-vsphere-platform")) + + // Must define the two CEL variables. + varNames := make([]string, 0, len(spec.Variables)) + for _, v := range spec.Variables { + varNames = append(varNames, v.Name) + } + g.Expect(varNames).To(ConsistOf("fds", "cpmsFDs")) + + // The cpmsFDs variable must reference the correct CPMS template field path. + for _, v := range spec.Variables { + if v.Name == "cpmsFDs" { + g.Expect(v.Expression).To(ContainSubstring("machines_v1beta1_machine_openshift_io")) + g.Expect(v.Expression).To(ContainSubstring("failureDomains")) + g.Expect(v.Expression).To(ContainSubstring("vsphere")) + } + } + + // Must have exactly one validation rule. + g.Expect(spec.Validations).To(HaveLen(1)) + validation := spec.Validations[0] + g.Expect(validation.Expression).To(ContainSubstring("variables.cpmsFDs")) + g.Expect(validation.Expression).To(ContainSubstring("variables.fds.exists")) + g.Expect(validation.MessageExpression).To(ContainSubstring("params.metadata.name")) + g.Expect(validation.Reason).NotTo(BeNil()) + g.Expect(*validation.Reason).To(Equal(metav1.StatusReasonInvalid)) + + // Failure policy must be Fail. + g.Expect(spec.FailurePolicy).NotTo(BeNil()) + g.Expect(*spec.FailurePolicy).To(Equal(admissionregistrationv1.Fail)) +} + +func TestNewVSphereFailureDomainCPMSVAPBinding(t *testing.T) { + g := NewWithT(t) + + binding := NewVSphereFailureDomainCPMSVAPBinding() + g.Expect(binding).NotTo(BeNil()) + g.Expect(binding.Name).To(Equal(VAPCPMSFailureDomainName)) + + spec := binding.Spec + g.Expect(spec.PolicyName).To(Equal(VAPCPMSFailureDomainName)) + + // ParamRef must select all CPMSes in openshift-machine-api. + g.Expect(spec.ParamRef).NotTo(BeNil()) + g.Expect(spec.ParamRef.Namespace).To(Equal(openMachineAPINamespace)) + g.Expect(spec.ParamRef.Selector).To(Equal(&metav1.LabelSelector{})) + g.Expect(spec.ParamRef.ParameterNotFoundAction).NotTo(BeNil()) + g.Expect(*spec.ParamRef.ParameterNotFoundAction).To(Equal(admissionregistrationv1.AllowAction)) + + // Enforcement must be Deny. + g.Expect(spec.ValidationActions).To(ConsistOf(admissionregistrationv1.Deny)) +} + +func TestNewVSphereFailureDomainMachineSetVAP(t *testing.T) { + g := NewWithT(t) + + policy := NewVSphereFailureDomainMachineSetVAP() + g.Expect(policy).NotTo(BeNil()) + g.Expect(policy.Name).To(Equal(VAPMachineSetFailureDomainName)) + + spec := policy.Spec + + // ParamKind must reference MachineSet. + g.Expect(spec.ParamKind).NotTo(BeNil()) + g.Expect(spec.ParamKind.APIVersion).To(Equal("machine.openshift.io/v1beta1")) + g.Expect(spec.ParamKind.Kind).To(Equal("MachineSet")) + + // Must fire only on UPDATE of infrastructures. + g.Expect(spec.MatchConstraints).NotTo(BeNil()) + g.Expect(spec.MatchConstraints.ResourceRules).To(HaveLen(1)) + rule := spec.MatchConstraints.ResourceRules[0] + g.Expect(rule.APIGroups).To(ConsistOf("config.openshift.io")) + g.Expect(rule.APIVersions).To(ConsistOf("v1")) + g.Expect(rule.Resources).To(ConsistOf("infrastructures")) + g.Expect(rule.Operations).To(ConsistOf(admissionregistrationv1.Update)) + + // Must have the platform match condition. + g.Expect(spec.MatchConditions).To(HaveLen(1)) + g.Expect(spec.MatchConditions[0].Name).To(Equal("is-vsphere-platform")) + g.Expect(spec.MatchConditions[0].Expression).To(ContainSubstring(`"VSphere"`)) + + // Must define the three CEL variables. + varNames := make([]string, 0, len(spec.Variables)) + for _, v := range spec.Variables { + varNames = append(varNames, v.Name) + } + g.Expect(varNames).To(ConsistOf("fds", "msRegion", "msZone")) + + // The msRegion and msZone variables must read from the template labels path using optional chaining. + for _, v := range spec.Variables { + switch v.Name { + case "msRegion": + g.Expect(v.Expression).To(ContainSubstring("params.?spec.template.metadata.labels")) + g.Expect(v.Expression).To(ContainSubstring(machineRegionLabel)) + case "msZone": + g.Expect(v.Expression).To(ContainSubstring("params.?spec.template.metadata.labels")) + g.Expect(v.Expression).To(ContainSubstring(machineZoneLabel)) + } + } + + // Must have exactly one validation rule. + g.Expect(spec.Validations).To(HaveLen(1)) + validation := spec.Validations[0] + g.Expect(validation.Expression).To(ContainSubstring("variables.msRegion")) + g.Expect(validation.Expression).To(ContainSubstring("variables.msZone")) + g.Expect(validation.Expression).To(ContainSubstring("variables.fds.exists")) + g.Expect(validation.MessageExpression).To(ContainSubstring("params.metadata.name")) + g.Expect(validation.Reason).NotTo(BeNil()) + g.Expect(*validation.Reason).To(Equal(metav1.StatusReasonInvalid)) + + // Failure policy must be Fail. + g.Expect(spec.FailurePolicy).NotTo(BeNil()) + g.Expect(*spec.FailurePolicy).To(Equal(admissionregistrationv1.Fail)) +} + +func TestNewVSphereFailureDomainMachineSetVAPBinding(t *testing.T) { + g := NewWithT(t) + + binding := NewVSphereFailureDomainMachineSetVAPBinding() + g.Expect(binding).NotTo(BeNil()) + g.Expect(binding.Name).To(Equal(VAPMachineSetFailureDomainName)) + + spec := binding.Spec + g.Expect(spec.PolicyName).To(Equal(VAPMachineSetFailureDomainName)) + + // ParamRef must select all MachineSets in openshift-machine-api. + g.Expect(spec.ParamRef).NotTo(BeNil()) + g.Expect(spec.ParamRef.Namespace).To(Equal(openMachineAPINamespace)) + g.Expect(spec.ParamRef.Selector).To(Equal(&metav1.LabelSelector{})) + g.Expect(spec.ParamRef.ParameterNotFoundAction).NotTo(BeNil()) + g.Expect(*spec.ParamRef.ParameterNotFoundAction).To(Equal(admissionregistrationv1.AllowAction)) + + // Enforcement must be Deny. + g.Expect(spec.ValidationActions).To(ConsistOf(admissionregistrationv1.Deny)) +} + +// TestVAPNamesAreConsistent ensures the binding policy names match the policy names. +func TestVAPNamesAreConsistent(t *testing.T) { + g := NewWithT(t) + + machineVAP := NewVSphereFailureDomainMachineVAP() + machineBinding := NewVSphereFailureDomainMachineVAPBinding() + g.Expect(machineBinding.Spec.PolicyName).To(Equal(machineVAP.Name), + "Machine binding PolicyName must match Machine VAP name") + + cpmsVAP := NewVSphereFailureDomainCPMSVAP() + cpmsBinding := NewVSphereFailureDomainCPMSVAPBinding() + g.Expect(cpmsBinding.Spec.PolicyName).To(Equal(cpmsVAP.Name), + "CPMS binding PolicyName must match CPMS VAP name") + + machineSetVAP := NewVSphereFailureDomainMachineSetVAP() + machineSetBinding := NewVSphereFailureDomainMachineSetVAPBinding() + g.Expect(machineSetBinding.Spec.PolicyName).To(Equal(machineSetVAP.Name), + "MachineSet binding PolicyName must match MachineSet VAP name") +}