diff --git a/cmd/main.go b/cmd/main.go index 1d87d0d..e149c38 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -215,6 +215,16 @@ func main() { p2pEndpointDomain := os.Getenv("SEI_P2P_ENDPOINT_DOMAIN") + nlbTargetType := os.Getenv("SEI_NLB_TARGET_TYPE") + switch nlbTargetType { + case "": + nlbTargetType = nodedeploymentcontroller.DefaultNLBTargetType + case nodedeploymentcontroller.NLBTargetTypeIP, nodedeploymentcontroller.NLBTargetTypeInstance: + default: + setupLog.Error(nil, "Invalid SEI_NLB_TARGET_TYPE — expected \"ip\" or \"instance\"", "value", nlbTargetType) + os.Exit(1) + } + //nolint:staticcheck // migrating to events.EventRecorder API is a separate effort recorder := mgr.GetEventRecorderFor("seinodedeployment-controller") if err := (&nodedeploymentcontroller.SeiNodeDeploymentReconciler{ @@ -226,6 +236,7 @@ func main() { GatewayDomain: platformCfg.GatewayDomain, GatewayPublicDomain: platformCfg.GatewayPublicDomain, P2PEndpointDomain: p2pEndpointDomain, + NLBTargetType: nlbTargetType, PlanExecutor: &planner.Executor[*seiv1alpha1.SeiNodeDeployment]{ ConfigFor: func(ctx context.Context, group *seiv1alpha1.SeiNodeDeployment) task.ExecutionConfig { var assemblerNode *seiv1alpha1.SeiNode diff --git a/internal/controller/nodedeployment/controller.go b/internal/controller/nodedeployment/controller.go index 799160b..ce44420 100644 --- a/internal/controller/nodedeployment/controller.go +++ b/internal/controller/nodedeployment/controller.go @@ -46,6 +46,15 @@ type SeiNodeDeploymentReconciler struct { // from SEI_P2P_ENDPOINT_DOMAIN. Empty disables the P2P endpoint path. P2PEndpointDomain string + // NLBTargetType picks the AWS NLB target mode for per-pod P2P endpoint + // Services. "ip" registers pod IPs directly — correct on VPC-CNI + // clusters (prod, dev). "instance" routes via NodePort — required on + // clusters where pod IPs aren't VPC-routable (e.g. harbor, where + // Cilium cluster-pool uses 100.64.0.0/14). Wired from SEI_NLB_TARGET_TYPE + // in cmd/main.go, which defaults the field to DefaultNLBTargetType + // when the env var is unset — the reconciler reads this field as-is. + NLBTargetType string + // PlanExecutor drives group-level task plans (e.g. genesis assembly). PlanExecutor planner.PlanExecutor[*seiv1alpha1.SeiNodeDeployment] } diff --git a/internal/controller/nodedeployment/p2p_endpoint.go b/internal/controller/nodedeployment/p2p_endpoint.go index dd9b0a3..deb7457 100644 --- a/internal/controller/nodedeployment/p2p_endpoint.go +++ b/internal/controller/nodedeployment/p2p_endpoint.go @@ -35,9 +35,18 @@ const ( awsLBCrossZoneAnnotation = "service.beta.kubernetes.io/aws-load-balancer-attributes" awsLBTypeValue = "external" awsLBSchemeValue = "internet-facing" - awsLBTargetTypeValue = "ip" awsLBCrossZoneAttributeStr = "load_balancing.cross_zone.enabled=true" + // NLBTargetTypeIP registers pod IPs directly with the NLB target group. + // Correct on VPC-CNI clusters where pod IPs are VPC-routable. + NLBTargetTypeIP = "ip" + // NLBTargetTypeInstance routes NLB traffic via the node's NodePort and + // relies on kube-proxy / Cilium socket-LB to forward to the pod. + // Required on clusters where pod IPs aren't VPC-routable. + NLBTargetTypeInstance = "instance" + // DefaultNLBTargetType matches the prior hardcoded behaviour. + DefaultNLBTargetType = NLBTargetTypeIP + p2pEndpointServiceSuffix = "-p2p" ) @@ -85,7 +94,7 @@ func p2pEndpointServiceName(group *seiv1alpha1.SeiNodeDeployment, ordinal int) s func (r *SeiNodeDeploymentReconciler) reconcileP2PEndpoints(ctx context.Context, group *seiv1alpha1.SeiNodeDeployment) error { desiredNames := make(map[string]struct{}, group.Spec.Replicas) for i := range int(group.Spec.Replicas) { - desired := generateP2PEndpointService(group, i, r.p2pEndpointHostname(group, i)) + desired := generateP2PEndpointService(group, i, r.p2pEndpointHostname(group, i), r.NLBTargetType) desired.SetGroupVersionKind(corev1.SchemeGroupVersion.WithKind("Service")) if err := ctrl.SetControllerReference(group, desired, r.Scheme); err != nil { return fmt.Errorf("setting owner reference on P2P endpoint Service %s: %w", desired.Name, err) @@ -181,7 +190,7 @@ func (r *SeiNodeDeploymentReconciler) deleteP2PEndpointForOrdinal(ctx context.Co return nil } -func generateP2PEndpointService(group *seiv1alpha1.SeiNodeDeployment, ordinal int, hostname string) *corev1.Service { +func generateP2PEndpointService(group *seiv1alpha1.SeiNodeDeployment, ordinal int, hostname, targetType string) *corev1.Service { name := p2pEndpointServiceName(group, ordinal) childName := seiNodeName(group, ordinal) @@ -195,11 +204,22 @@ func generateP2PEndpointService(group *seiv1alpha1.SeiNodeDeployment, ordinal in externalDNSHostnameAnnotation: hostname, awsLBTypeAnnotation: awsLBTypeValue, awsLBSchemeAnnotation: awsLBSchemeValue, - awsLBTargetTypeAnnotation: awsLBTargetTypeValue, + awsLBTargetTypeAnnotation: targetType, awsLBCrossZoneAnnotation: awsLBCrossZoneAttributeStr, managedByAnnotation: controllerName, } + // target-type=ip registers pod IPs directly; NodePort is not used so + // we explicitly disable allocation to preserve the limited 30000-32767 + // range. target-type=instance requires a NodePort for the NLB→node→pod + // hop (kube-proxy / Cilium socket-LB), so leave allocation at the + // kube default (true) by passing nil. + var allocateNodePorts *bool + if targetType == NLBTargetTypeIP { + f := false + allocateNodePorts = &f + } + return &corev1.Service{ ObjectMeta: metav1.ObjectMeta{ Name: name, @@ -210,7 +230,7 @@ func generateP2PEndpointService(group *seiv1alpha1.SeiNodeDeployment, ordinal in Spec: corev1.ServiceSpec{ Type: corev1.ServiceTypeLoadBalancer, ExternalTrafficPolicy: corev1.ServiceExternalTrafficPolicyTypeLocal, - AllocateLoadBalancerNodePorts: new(bool), + AllocateLoadBalancerNodePorts: allocateNodePorts, Selector: map[string]string{ noderesource.NodeLabel: childName, }, diff --git a/internal/controller/nodedeployment/p2p_endpoint_test.go b/internal/controller/nodedeployment/p2p_endpoint_test.go index 46fce55..15d2906 100644 --- a/internal/controller/nodedeployment/p2p_endpoint_test.go +++ b/internal/controller/nodedeployment/p2p_endpoint_test.go @@ -126,7 +126,7 @@ func TestGeneratePublishableService_Annotations(t *testing.T) { snd := &seiv1alpha1.SeiNodeDeployment{ ObjectMeta: metav1.ObjectMeta{Name: testChainAtlantic, Namespace: "sei-test-1"}, } - svc := generateP2PEndpointService(snd, 0, "atlantic-2-0-p2p.atlantic-2.prod.platform.sei.io") + svc := generateP2PEndpointService(snd, 0, "atlantic-2-0-p2p.atlantic-2.prod.platform.sei.io", NLBTargetTypeIP) g.Expect(svc.Annotations).To(HaveKeyWithValue( "external-dns.alpha.kubernetes.io/hostname", @@ -147,4 +147,24 @@ func TestGeneratePublishableService_Annotations(t *testing.T) { g.Expect(svc.Spec.Ports).To(HaveLen(1)) g.Expect(svc.Spec.Ports[0].Port).To(Equal(seiconfig.PortP2P)) g.Expect(svc.Spec.Selector).To(HaveKeyWithValue(noderesource.NodeLabel, "atlantic-2-0")) + + // target-type=ip → NodePort allocation explicitly disabled (preserve + // the 30000-32767 range; pod IP is the NLB target, no NodePort hop). + g.Expect(svc.Spec.AllocateLoadBalancerNodePorts).NotTo(BeNil()) + g.Expect(*svc.Spec.AllocateLoadBalancerNodePorts).To(BeFalse()) +} + +func TestGeneratePublishableService_InstanceTargetType(t *testing.T) { + g := NewWithT(t) + snd := &seiv1alpha1.SeiNodeDeployment{ + ObjectMeta: metav1.ObjectMeta{Name: testChainAtlantic, Namespace: "sei-test-1"}, + } + svc := generateP2PEndpointService(snd, 0, "atlantic-2-0-p2p.atlantic-2.harbor.platform.sei.io", NLBTargetTypeInstance) + + g.Expect(svc.Annotations).To(HaveKeyWithValue( + "service.beta.kubernetes.io/aws-load-balancer-nlb-target-type", "instance")) + + // target-type=instance needs a NodePort for the NLB→node→pod hop. + // Leaving the field nil lets kube default to true. + g.Expect(svc.Spec.AllocateLoadBalancerNodePorts).To(BeNil()) }