Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
16 changes: 16 additions & 0 deletions install/0000_30_machine-api-operator_09_rbac.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -472,13 +472,29 @@ rules:
resources:
- validatingwebhookconfigurations
- mutatingwebhookconfigurations
- validatingadmissionpolicies
- validatingadmissionpolicybindings
verbs:
- get
- list
- watch
- create
- update

# The machine-api-operator service account must be able to read Machines,
# ControlPlaneMachineSets, and MachineSets so the kube-apiserver can resolve
# them as VAP params when validating Infrastructure/cluster updates.
Comment on lines +484 to +486

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Really? Is that documented somewhere in the upstream docs you can reference?

I can see why it would make sense, but I've never thought about it like that before

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah, so during work w/ coderabbit and claude, i tried to make namespace specific but that always seemed to fail. my only guess is because the infra spec is cluster scope. Claude made this comment based on what we learned and this note would help future me for sure remember this. I'll see if I can find any doc on this.

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Claude's response:

The requirement is documented in the official Kubernetes docs on Validating Admission Policies, in the "Parameter resources" section:

https://kubernetes.io/docs/reference/access-authn-authz/validating-admission-policy/#parameter-resources

The relevant excerpt (stable since Kubernetes 1.30 when VAP went GA) states:

The user creating the ValidatingAdmissionPolicy object must have get permission on all objects of the referenced paramKind.

This is also visible in the error message text itself — "must have 'get' permission on all objects of the referenced paramKind" — which is emitted directly by the kube-apiserver's VAP admission handler.

For the KEP (Kubernetes Enhancement Proposal) that introduced this design decision, the background is in:

https://github.com/kubernetes/enhancements/tree/master/keps/sig-api-machinery/3488-cel-admission-control

Specifically the Authorization section of KEP-3488, which describes why the creator must be authorized to read param objects (to prevent privilege escalation — a user shouldn't be able to create a policy that reads objects they don't have access to).


TL;DR for the team: The Kubernetes docs page at kubernetes.io/docs/reference/access-authn-authz/validating-admission-policy/#parameter-resources is the canonical reference. The rule is: whoever creates (or updates) the ValidatingAdmissionPolicy must have get on the paramKind resource, enforced by the kube-apiserver at policy creation time — which is why the machine-api-operator service account needs the permission before it can apply the VAP.

- apiGroups:
- machine.openshift.io
resources:
- machines
- controlplanemachinesets
- machinesets
verbs:
- get
- list
- watch

---
apiVersion: rbac.authorization.k8s.io/v1
kind: ClusterRole
Expand Down
4 changes: 3 additions & 1 deletion pkg/operator/operator.go
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,7 @@ type Operator struct {
mutatingWebhookListerSynced cache.InformerSynced

featureGateAccessor featuregates.FeatureGateAccess
featureGates featuregates.FeatureGate

// queue only ever has one item, but it has nice error handling backoff/retry semantics
queue workqueue.TypedRateLimitingInterface[string]
Expand Down Expand Up @@ -167,6 +168,7 @@ func New(
recorder,
)
featureGateAccessor.SetChangeHandler(func(featureChange featuregates.FeatureChange) {
optr.featureGates = featuregates.NewFeatureGate(featureChange.New.Enabled, featureChange.New.Disabled)
if featureChange.Previous == nil {
// When the initial featuregate is set, the previous version is nil.
// Nothing to do in this case, it's handled by the 1st sync, which only runs after the initial feature set was received.
Expand All @@ -176,7 +178,7 @@ func New(
klog.V(4).InfoS("FeatureGates changed", "enabled", featureChange.New.Enabled, "disabled", featureChange.New.Disabled)
prevDisableMHC := featuregates.NewFeatureGate(featureChange.Previous.Enabled, featureChange.Previous.Disabled).
Enabled(apifeatures.FeatureGateMachineAPIOperatorDisableMachineHealthCheckController)
newDisableMHC := featuregates.NewFeatureGate(featureChange.New.Enabled, featureChange.New.Disabled).
newDisableMHC := optr.featureGates.
Enabled(apifeatures.FeatureGateMachineAPIOperatorDisableMachineHealthCheckController)

if prevDisableMHC != newDisableMHC {
Expand Down
35 changes: 23 additions & 12 deletions pkg/operator/operator_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,11 @@ var (
{Name: apifeatures.FeatureGateAWSDedicatedHosts},
}

disabledFeatureGates = []openshiftv1.FeatureGateAttributes{
{Name: apifeatures.FeatureGateMachineAPIOperatorDisableMachineHealthCheckController},
{Name: apifeatures.FeatureGateVSphereMultiVCenterDay2},
}

enabledFeatureMap = map[string]bool{
"MachineAPIMigration": true,
"AzureWorkloadIdentity": true,
Expand Down Expand Up @@ -80,7 +85,7 @@ func newFakeOperator(kubeObjects, osObjects, machineObjects []runtime.Object, im
{
Version: "",
Enabled: enabledFeatureGates,
Disabled: []openshiftv1.FeatureGateAttributes{{Name: apifeatures.FeatureGateMachineAPIOperatorDisableMachineHealthCheckController}},
Disabled: disabledFeatureGates,
},
},
},
Expand All @@ -91,6 +96,11 @@ func newFakeOperator(kubeObjects, osObjects, machineObjects []runtime.Object, im
return nil, fmt.Errorf("error adding event handler to deployments informer: %v", err)
}

featureGates, err := featureGateAccessor.CurrentFeatureGates()
if err != nil {
return nil, fmt.Errorf("error getting current feature gates: %v", err)
}

optr := &Operator{
kubeClient: kubeClient,
osClient: osClient,
Expand All @@ -102,6 +112,7 @@ func newFakeOperator(kubeObjects, osObjects, machineObjects []runtime.Object, im
mutatingWebhookLister: mutatingWebhookInformer.Lister(),
validatingWebhookLister: validatingWebhookInformer.Lister(),
featureGateAccessor: featureGateAccessor,
featureGates: featureGates,
imagesFile: imagesFile,
namespace: targetNamespace,
eventRecorder: record.NewFakeRecorder(50),
Expand Down Expand Up @@ -387,7 +398,7 @@ func TestMAOConfigFromInfrastructure(t *testing.T) {
{
Version: "",
Enabled: enabledFeatureGates,
Disabled: []openshiftv1.FeatureGateAttributes{{Name: apifeatures.FeatureGateMachineAPIOperatorDisableMachineHealthCheckController}},
Disabled: disabledFeatureGates,
},
},
},
Expand Down Expand Up @@ -423,7 +434,7 @@ func TestMAOConfigFromInfrastructure(t *testing.T) {
{
Version: "",
Enabled: enabledFeatureGates,
Disabled: []openshiftv1.FeatureGateAttributes{{Name: apifeatures.FeatureGateMachineAPIOperatorDisableMachineHealthCheckController}},
Disabled: disabledFeatureGates,
},
},
},
Expand Down Expand Up @@ -459,7 +470,7 @@ func TestMAOConfigFromInfrastructure(t *testing.T) {
{
Version: "",
Enabled: enabledFeatureGates,
Disabled: []openshiftv1.FeatureGateAttributes{{Name: apifeatures.FeatureGateMachineAPIOperatorDisableMachineHealthCheckController}},
Disabled: disabledFeatureGates,
},
},
},
Expand Down Expand Up @@ -495,7 +506,7 @@ func TestMAOConfigFromInfrastructure(t *testing.T) {
{
Version: "",
Enabled: enabledFeatureGates,
Disabled: []openshiftv1.FeatureGateAttributes{{Name: apifeatures.FeatureGateMachineAPIOperatorDisableMachineHealthCheckController}},
Disabled: disabledFeatureGates,
},
},
},
Expand Down Expand Up @@ -531,7 +542,7 @@ func TestMAOConfigFromInfrastructure(t *testing.T) {
{
Version: "",
Enabled: enabledFeatureGates,
Disabled: []openshiftv1.FeatureGateAttributes{{Name: apifeatures.FeatureGateMachineAPIOperatorDisableMachineHealthCheckController}},
Disabled: disabledFeatureGates,
},
},
},
Expand Down Expand Up @@ -567,7 +578,7 @@ func TestMAOConfigFromInfrastructure(t *testing.T) {
{
Version: "",
Enabled: enabledFeatureGates,
Disabled: []openshiftv1.FeatureGateAttributes{{Name: apifeatures.FeatureGateMachineAPIOperatorDisableMachineHealthCheckController}},
Disabled: disabledFeatureGates,
},
},
},
Expand Down Expand Up @@ -603,7 +614,7 @@ func TestMAOConfigFromInfrastructure(t *testing.T) {
{
Version: "",
Enabled: enabledFeatureGates,
Disabled: []openshiftv1.FeatureGateAttributes{{Name: apifeatures.FeatureGateMachineAPIOperatorDisableMachineHealthCheckController}},
Disabled: disabledFeatureGates,
},
},
},
Expand Down Expand Up @@ -639,7 +650,7 @@ func TestMAOConfigFromInfrastructure(t *testing.T) {
{
Version: "",
Enabled: enabledFeatureGates,
Disabled: []openshiftv1.FeatureGateAttributes{{Name: apifeatures.FeatureGateMachineAPIOperatorDisableMachineHealthCheckController}},
Disabled: disabledFeatureGates,
},
},
},
Expand Down Expand Up @@ -677,7 +688,7 @@ func TestMAOConfigFromInfrastructure(t *testing.T) {
{
Version: "",
Enabled: enabledFeatureGates,
Disabled: []openshiftv1.FeatureGateAttributes{{Name: apifeatures.FeatureGateMachineAPIOperatorDisableMachineHealthCheckController}},
Disabled: disabledFeatureGates,
},
},
},
Expand Down Expand Up @@ -713,7 +724,7 @@ func TestMAOConfigFromInfrastructure(t *testing.T) {
{
Version: "",
Enabled: append(enabledFeatureGates, openshiftv1.FeatureGateAttributes{Name: apifeatures.FeatureGateMachineAPIOperatorDisableMachineHealthCheckController}),
Disabled: []openshiftv1.FeatureGateAttributes{},
Disabled: []openshiftv1.FeatureGateAttributes{{Name: apifeatures.FeatureGateVSphereMultiVCenterDay2}},
},
},
},
Expand Down Expand Up @@ -749,7 +760,7 @@ func TestMAOConfigFromInfrastructure(t *testing.T) {
{
Version: "",
Enabled: enabledFeatureGates,
Disabled: []openshiftv1.FeatureGateAttributes{{Name: apifeatures.FeatureGateMachineAPIOperatorDisableMachineHealthCheckController}},
Disabled: disabledFeatureGates,
},
},
},
Expand Down
52 changes: 52 additions & 0 deletions pkg/operator/sync.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ import (
"sigs.k8s.io/controller-runtime/pkg/reconcile"

configv1 "github.com/openshift/api/config/v1"
apifeatures "github.com/openshift/api/features"
machinev1beta1 "github.com/openshift/api/machine/v1beta1"
utiltls "github.com/openshift/controller-runtime-common/pkg/tls"
libgocrypto "github.com/openshift/library-go/pkg/crypto"
Expand Down Expand Up @@ -268,6 +269,57 @@ func (optr *Operator) syncWebhookConfiguration(config *OperatorConfig) error {
return err
}
}
if config.PlatformType == configv1.VSpherePlatformType {
if err := optr.syncVSphereFailureDomainVAPs(); err != nil {
return err
}
}
return nil
}

// syncVSphereFailureDomainVAPs ensures that the ValidatingAdmissionPolicies and their
// bindings for protecting vSphere failure domains are present and up to date.
// These policies prevent an administrator from removing a failure domain from the
// Infrastructure/cluster CR while it is still referenced by Machines or
// ControlPlaneMachineSets managed by the Machine API Operator.
func (optr *Operator) syncVSphereFailureDomainVAPs() error {
if !optr.featureGates.Enabled(apifeatures.FeatureGateVSphereMultiVCenterDay2) {
return nil
}

recorder := events.NewLoggingEventRecorder(optr.name, clock.RealClock{})

if _, _, err := resourceapply.ApplyValidatingAdmissionPolicyV1(context.TODO(),
optr.kubeClient.AdmissionregistrationV1(), recorder,
mapiwebhooks.NewVSphereFailureDomainMachineVAP(), optr.cache); err != nil {
return fmt.Errorf("failed to apply vSphere failure domain Machine ValidatingAdmissionPolicy: %w", err)
}
if _, _, err := resourceapply.ApplyValidatingAdmissionPolicyBindingV1(context.TODO(),
optr.kubeClient.AdmissionregistrationV1(), recorder,
mapiwebhooks.NewVSphereFailureDomainMachineVAPBinding(), optr.cache); err != nil {
return fmt.Errorf("failed to apply vSphere failure domain Machine ValidatingAdmissionPolicyBinding: %w", err)
}
if _, _, err := resourceapply.ApplyValidatingAdmissionPolicyV1(context.TODO(),
optr.kubeClient.AdmissionregistrationV1(), recorder,
mapiwebhooks.NewVSphereFailureDomainCPMSVAP(), optr.cache); err != nil {
return fmt.Errorf("failed to apply vSphere failure domain ControlPlaneMachineSet ValidatingAdmissionPolicy: %w", err)
}
if _, _, err := resourceapply.ApplyValidatingAdmissionPolicyBindingV1(context.TODO(),
optr.kubeClient.AdmissionregistrationV1(), recorder,
mapiwebhooks.NewVSphereFailureDomainCPMSVAPBinding(), optr.cache); err != nil {
Comment thread
coderabbitai[bot] marked this conversation as resolved.
return fmt.Errorf("failed to apply vSphere failure domain ControlPlaneMachineSet ValidatingAdmissionPolicyBinding: %w", err)
}
if _, _, err := resourceapply.ApplyValidatingAdmissionPolicyV1(context.TODO(),
optr.kubeClient.AdmissionregistrationV1(), recorder,
mapiwebhooks.NewVSphereFailureDomainMachineSetVAP(), optr.cache); err != nil {
return fmt.Errorf("failed to apply vSphere failure domain MachineSet ValidatingAdmissionPolicy: %w", err)
}
if _, _, err := resourceapply.ApplyValidatingAdmissionPolicyBindingV1(context.TODO(),
optr.kubeClient.AdmissionregistrationV1(), recorder,
mapiwebhooks.NewVSphereFailureDomainMachineSetVAPBinding(), optr.cache); err != nil {
return fmt.Errorf("failed to apply vSphere failure domain MachineSet ValidatingAdmissionPolicyBinding: %w", err)
}
Comment thread
vr4manta marked this conversation as resolved.

return nil
}

Expand Down
93 changes: 93 additions & 0 deletions pkg/operator/sync_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ import (

. "github.com/onsi/gomega"
configv1 "github.com/openshift/api/config/v1"
apifeatures "github.com/openshift/api/features"
machinev1beta1 "github.com/openshift/api/machine/v1beta1"
appsv1 "k8s.io/api/apps/v1"
corev1 "k8s.io/api/core/v1"
Expand Down Expand Up @@ -506,6 +507,98 @@ func TestSyncWebhookConfiguration(t *testing.T) {
}
}

func TestSyncVSphereFailureDomainVAPs(t *testing.T) {
vsphereMultiVCenterDay2Enabled := &configv1.FeatureGate{
ObjectMeta: metav1.ObjectMeta{Name: "cluster"},
Status: configv1.FeatureGateStatus{
FeatureGates: []configv1.FeatureGateDetails{
{
Enabled: append(enabledFeatureGates, configv1.FeatureGateAttributes{Name: apifeatures.FeatureGateVSphereMultiVCenterDay2}),
Disabled: []configv1.FeatureGateAttributes{{Name: apifeatures.FeatureGateMachineAPIOperatorDisableMachineHealthCheckController}},
},
},
},
}

testCases := []struct {
name string
platformType configv1.PlatformType
featureGate *configv1.FeatureGate
expectedNrValidatingAdmPolicies int
expectedNrValidatingAdmPBindings int
expectedPolicyNames []string
}{
{
name: "no VAPs on non-vSphere platform",
platformType: configv1.AWSPlatformType,
expectedNrValidatingAdmPolicies: 0,
expectedNrValidatingAdmPBindings: 0,
},
{
name: "no VAPs on vSphere platform when VSphereMultiVCenterDay2 is disabled",
platformType: configv1.VSpherePlatformType,
expectedNrValidatingAdmPolicies: 0,
expectedNrValidatingAdmPBindings: 0,
},
{
name: "three VAPs and three bindings on vSphere platform when VSphereMultiVCenterDay2 is enabled",
platformType: configv1.VSpherePlatformType,
featureGate: vsphereMultiVCenterDay2Enabled,
expectedNrValidatingAdmPolicies: 3,
expectedNrValidatingAdmPBindings: 3,
expectedPolicyNames: []string{
"vsphere-failure-domain-in-use-by-machine",
"vsphere-failure-domain-in-use-by-cpms",
"vsphere-failure-domain-in-use-by-machineset",
},
},
}

for _, tc := range testCases {
t.Run(tc.name, func(t *testing.T) {
g := NewWithT(t)

stopCh := make(chan struct{})
defer close(stopCh)
optr, err := newFakeOperator(nil, nil, nil, "", tc.featureGate, stopCh)
if err != nil {
t.Fatal(err)
}

err = optr.syncWebhookConfiguration(&OperatorConfig{PlatformType: tc.platformType})
g.Expect(err).ToNot(HaveOccurred())

policies, err := optr.kubeClient.AdmissionregistrationV1().
ValidatingAdmissionPolicies().List(t.Context(), metav1.ListOptions{})
g.Expect(err).ToNot(HaveOccurred())
g.Expect(policies.Items).To(HaveLen(tc.expectedNrValidatingAdmPolicies),
"unexpected number of ValidatingAdmissionPolicies")

bindings, err := optr.kubeClient.AdmissionregistrationV1().
ValidatingAdmissionPolicyBindings().List(t.Context(), metav1.ListOptions{})
g.Expect(err).ToNot(HaveOccurred())
g.Expect(bindings.Items).To(HaveLen(tc.expectedNrValidatingAdmPBindings),
"unexpected number of ValidatingAdmissionPolicyBindings")

if len(tc.expectedPolicyNames) > 0 {
policyNames := make([]string, 0, len(policies.Items))
for _, p := range policies.Items {
policyNames = append(policyNames, p.Name)
}
g.Expect(policyNames).To(ConsistOf(tc.expectedPolicyNames),
"unexpected ValidatingAdmissionPolicy names")

bindingPolicyRefs := make([]string, 0, len(bindings.Items))
for _, b := range bindings.Items {
bindingPolicyRefs = append(bindingPolicyRefs, b.Spec.PolicyName)
}
g.Expect(bindingPolicyRefs).To(ConsistOf(tc.expectedPolicyNames),
"binding PolicyNames must reference the created policies")
}
})
}
}

func TestCheckDaemonSetRolloutStatus(t *testing.T) {
testCases := []struct {
name string
Expand Down
Loading