From a76f0b4c7c00fd528343fce01cd240f07621dd3b Mon Sep 17 00:00:00 2001 From: bdchatham Date: Thu, 28 May 2026 14:59:05 -0700 Subject: [PATCH 1/6] feat(publishable-p2p): SND-owned per-pod NLB with Spec.ExternalAddress MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reframe of the publishable-P2P controller path. Closes prior PR #362. Architectural shape: - Networking is an SND-layer concern. The SND reconciler owns the per-pod LoadBalancer Service (one per child SeiNode); the SeiNode reconciler stays fully networking-agnostic — no parent lookup, no Service watch, no env reads. - ExternalAddress lives on SeiNodeSpec (not Status). The SND injects a deterministic vanity hostname at child Create time inside the ensureSeiNode diff; the planner reads from Spec. Race window dissolves because Spec is populated atomically at Create. - Vanity hostname: -p2p.., with the CNAME produced by the existing external-dns Service-source flow. ChainID gets a DNS-1123 pattern constraint to keep the hostname legal. - Standalone SeiNodes (no SND parent) may set Spec.ExternalAddress directly; the controller respects the user-set value. - SEI_VPC_CIDR is parsed once at controller startup. Unset/unparseable ⇒ PublishabilityAvailable=false; SNDs with Networking.TCP set surface ConditionNetworkingReady=False/VPCCIDRNotConfigured and skip the publishable path. AWS LBC events remain the ground-truth signal for any cluster-CNI mismatch. - Lifecycle: opt-in stamps Spec.ExternalAddress + creates Service; opt-out clears the field on existing children + deletes Services; scale-down deletes the per-ordinal Service before deleting the child SeiNode. Runtime opt toggles still require a manual pod delete to re-render config.toml (accepted v1 ops contract). - HTTPEnabled() call-site migration in nodedeployment/networking.go + status.go bundled (was a separate proposed PR; co-locating with the TCP branch keeps the gating consistent within a single review). New file: internal/controller/nodedeployment/publishable.go Removed (vs prior PR #362 plan): no external_address.go on SeiNode side, no annotation/Condition gate, no per-reconcile pod-IP routability check. Refs sei-protocol/sei-k8s-controller#360 Co-Authored-By: Claude Opus 4.7 (1M context) --- api/v1alpha1/networking_types.go | 7 + api/v1alpha1/seinode_types.go | 28 +- api/v1alpha1/seinodedeployment_types.go | 34 +- api/v1alpha1/zz_generated.deepcopy.go | 25 + cmd/main.go | 38 +- config/crd/sei.io_seinodedeployments.yaml | 71 ++- config/crd/sei.io_seinodes.yaml | 31 +- .../controller/nodedeployment/controller.go | 16 + .../envtest/publishable_p2p_test.go | 470 ++++++++++++++++++ .../nodedeployment/envtest/suite_test.go | 32 +- .../controller/nodedeployment/networking.go | 68 ++- internal/controller/nodedeployment/nodes.go | 74 +++ .../controller/nodedeployment/publishable.go | 286 +++++++++++ .../nodedeployment/publishable_test.go | 255 ++++++++++ internal/controller/nodedeployment/status.go | 41 +- internal/planner/common_overrides_test.go | 15 +- internal/planner/planner.go | 4 +- manifests/sei.io_seinodedeployments.yaml | 71 ++- manifests/sei.io_seinodes.yaml | 31 +- 19 files changed, 1520 insertions(+), 77 deletions(-) create mode 100644 internal/controller/nodedeployment/envtest/publishable_p2p_test.go create mode 100644 internal/controller/nodedeployment/publishable.go create mode 100644 internal/controller/nodedeployment/publishable_test.go diff --git a/api/v1alpha1/networking_types.go b/api/v1alpha1/networking_types.go index 8bc440ba..305275b7 100644 --- a/api/v1alpha1/networking_types.go +++ b/api/v1alpha1/networking_types.go @@ -43,3 +43,10 @@ func (n *NetworkingConfig) HTTPEnabled() bool { } return n.TCP == nil } + +// TCPEnabled reports whether L4 NLB per-pod P2P exposure is requested. +// Mirrors HTTPEnabled but does not carry the legacy `networking: {}` +// back-compat — TCP must be explicitly opted into. +func (n *NetworkingConfig) TCPEnabled() bool { + return n != nil && n.TCP != nil +} diff --git a/api/v1alpha1/seinode_types.go b/api/v1alpha1/seinode_types.go index bc09216e..8364cadf 100644 --- a/api/v1alpha1/seinode_types.go +++ b/api/v1alpha1/seinode_types.go @@ -13,7 +13,12 @@ import ( // +kubebuilder:validation:XValidation:rule="!has(self.replayer) || (has(self.peers) && size(self.peers) > 0)",message="peers is required when replayer mode is set" type SeiNodeSpec struct { // ChainID of the chain this node belongs to. + // Constrained to DNS-1123 label characters because the controller composes + // it into publishable hostnames (e.g. `-p2p..`) when + // the parent SND opts into TCP networking; the address is a one-way door + // once peers cache it. // +kubebuilder:validation:MinLength=1 + // +kubebuilder:validation:Pattern=`^[a-z0-9]([-a-z0-9]*[a-z0-9])?$` ChainID string `json:"chainId"` // Image is the seid container image. @@ -68,6 +73,21 @@ type SeiNodeSpec struct { // +optional Validator *ValidatorSpec `json:"validator,omitempty"` + // ExternalAddress is the routable P2P address — bare host:port, no + // nodeId@ prefix — written into seid's `p2p.external_address`. Two + // ownership models share this field: + // + // - SND-managed: when the parent SeiNodeDeployment has + // `Spec.Networking.TCP` set, the SND reconciler injects this + // value at child Create time (deterministic vanity hostname) and + // re-syncs it via the diff in `ensureSeiNode`. Clearing TCP on + // the parent clears this field on every child. + // - Standalone: a SeiNode created without an SND parent may set + // this directly; the planner emits it verbatim. The controller + // never overwrites a standalone SeiNode's value. + // +optional + ExternalAddress *string `json:"externalAddress,omitempty"` + // Paused freezes reconciliation. While true, the controller does not // advance the lifecycle, start plans, or mutate derived resources // except the owned StatefulSet — which scales to Replicas=0 so pods @@ -341,14 +361,6 @@ type SeiNodeStatus struct { // +optional ResolvedPeers []string `json:"resolvedPeers,omitempty"` - // ExternalAddress is the routable P2P address for this node — bare - // host:port, no nodeId@ prefix. Populated by the SeiNode controller - // from the per-pod LoadBalancer Service when the parent SND has - // Spec.Networking.TCP set; consumed by the planner to set - // p2p.external_address in CometBFT config. - // +optional - ExternalAddress string `json:"externalAddress,omitempty"` - // StatefulSet references the StatefulSet the controller created for // this SeiNode. UID is the identity check: an STS with the expected // name but a different UID is not the one this controller created diff --git a/api/v1alpha1/seinodedeployment_types.go b/api/v1alpha1/seinodedeployment_types.go index dfe066fa..7d9dc755 100644 --- a/api/v1alpha1/seinodedeployment_types.go +++ b/api/v1alpha1/seinodedeployment_types.go @@ -71,9 +71,13 @@ type UpdateStrategy struct { // GenesisCeremonyConfig configures genesis ceremony orchestration for a node group. type GenesisCeremonyConfig struct { // ChainID for the new network. + // Constrained to DNS-1123 label characters because child SeiNodes + // compose it into publishable hostnames when the SND has + // `spec.networking.tcp` set; the address is a one-way door once peers + // cache it. // +kubebuilder:validation:MinLength=1 // +kubebuilder:validation:MaxLength=64 - // +kubebuilder:validation:Pattern=`^[a-z0-9][a-z0-9-]*[a-z0-9]$` + // +kubebuilder:validation:Pattern=`^[a-z0-9]([-a-z0-9]*[a-z0-9])?$` ChainID string `json:"chainId"` // StakingAmount is the amount each validator self-delegates in its gentx. @@ -343,6 +347,17 @@ type NetworkingStatus struct { // Routes lists the HTTPRoute hostnames managed by this deployment. // +optional Routes []RouteStatus `json:"routes,omitempty"` + + // PublishableEndpoints lists the per-ordinal publishable P2P + // hostnames stamped by the SND when `spec.networking.tcp` is set. + // Each entry mirrors the value injected into the child SeiNode's + // `spec.externalAddress` (hostname:port). Hostnames are deterministic + // from the SND identity, so this slice is populated without reading + // Service status back. + // +listType=map + // +listMapKey=ordinal + // +optional + PublishableEndpoints []PublishableEndpoint `json:"publishableEndpoints,omitempty"` } // RouteStatus is the observed state of a single HTTPRoute hostname. @@ -355,6 +370,23 @@ type RouteStatus struct { Protocol string `json:"protocol,omitempty"` } +// PublishableEndpoint is the observed state of one child's publishable +// P2P endpoint. Stamped by the SND networking reconciler. +type PublishableEndpoint struct { + // Ordinal is the child's replica index within the SND + // (matches `sei.io/nodedeployment-ordinal`). + Ordinal int32 `json:"ordinal"` + + // SeiNodeName is the child SeiNode resource name (also the + // per-pod headless Service name). + SeiNodeName string `json:"seiNodeName"` + + // Hostname is the vanity host:port advertised to peers via + // `p2p.external_address`. Equals the value injected into the + // child SeiNode's `spec.externalAddress`. + Hostname string `json:"hostname"` +} + // RolloutStatus tracks an in-progress rollout. Presence on the parent // SeiNodeDeployment is the single source of truth for "rollout in // progress" — `Status.Rollout != nil` and the `RolloutInProgress` diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index c94106b3..6f46b691 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -466,6 +466,11 @@ func (in *NetworkingStatus) DeepCopyInto(out *NetworkingStatus) { *out = make([]RouteStatus, len(*in)) copy(*out, *in) } + if in.PublishableEndpoints != nil { + in, out := &in.PublishableEndpoints, &out.PublishableEndpoints + *out = make([]PublishableEndpoint, len(*in)) + copy(*out, *in) + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new NetworkingStatus. @@ -633,6 +638,21 @@ func (in *PlannedTask) DeepCopy() *PlannedTask { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *PublishableEndpoint) DeepCopyInto(out *PublishableEndpoint) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PublishableEndpoint. +func (in *PublishableEndpoint) DeepCopy() *PublishableEndpoint { + if in == nil { + return nil + } + out := new(PublishableEndpoint) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ReplayerSpec) DeepCopyInto(out *ReplayerSpec) { *out = *in @@ -1027,6 +1047,11 @@ func (in *SeiNodeSpec) DeepCopyInto(out *SeiNodeSpec) { *out = new(ValidatorSpec) (*in).DeepCopyInto(*out) } + if in.ExternalAddress != nil { + in, out := &in.ExternalAddress, &out.ExternalAddress + *out = new(string) + **out = **in + } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SeiNodeSpec. diff --git a/cmd/main.go b/cmd/main.go index d3199fbf..8a5c394f 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -5,6 +5,7 @@ import ( "crypto/tls" "flag" "fmt" + "net" "net/http" "os" "sort" @@ -213,16 +214,39 @@ func main() { os.Exit(1) } + // Publishable-P2P capability: SEI_VPC_CIDR is a deployment-time + // constant (a VPC CIDR rarely changes within a cluster's life). + // Parse once at startup. When unset or unparseable, the SND + // reconciler still runs — TCP-enabled SNDs surface + // `ConditionNetworkingReady=False/VPCCIDRNotConfigured` on each + // reconcile and no LB Services are created. This is fail-closed at + // the apply boundary, not at admission; an admission webhook is + // deferred until we accumulate more SND-side validation needs. + vpcCIDRRaw := os.Getenv("SEI_VPC_CIDR") + publishabilityAvailable := false + var publishableVPCCIDR *net.IPNet + if vpcCIDRRaw == "" { + setupLog.Info("Publishable-P2P capability disabled: SEI_VPC_CIDR not set") + } else if _, cidr, err := net.ParseCIDR(vpcCIDRRaw); err != nil { + setupLog.Error(err, "Publishable-P2P capability disabled: SEI_VPC_CIDR is unparseable", "value", vpcCIDRRaw) + } else { + publishabilityAvailable = true + publishableVPCCIDR = cidr + setupLog.Info("Publishable-P2P capability enabled", "cidr", cidr.String()) + } + //nolint:staticcheck // migrating to events.EventRecorder API is a separate effort recorder := mgr.GetEventRecorderFor("seinodedeployment-controller") if err := (&nodedeploymentcontroller.SeiNodeDeploymentReconciler{ - Client: kc, - Scheme: mgr.GetScheme(), - Recorder: recorder, - GatewayName: platformCfg.GatewayName, - GatewayNamespace: platformCfg.GatewayNamespace, - GatewayDomain: platformCfg.GatewayDomain, - GatewayPublicDomain: platformCfg.GatewayPublicDomain, + Client: kc, + Scheme: mgr.GetScheme(), + Recorder: recorder, + GatewayName: platformCfg.GatewayName, + GatewayNamespace: platformCfg.GatewayNamespace, + GatewayDomain: platformCfg.GatewayDomain, + GatewayPublicDomain: platformCfg.GatewayPublicDomain, + PublishabilityAvailable: publishabilityAvailable, + PublishableVPCCIDR: publishableVPCCIDR, PlanExecutor: &planner.Executor[*seiv1alpha1.SeiNodeDeployment]{ ConfigFor: func(ctx context.Context, group *seiv1alpha1.SeiNodeDeployment) task.ExecutionConfig { var assemblerNode *seiv1alpha1.SeiNode diff --git a/config/crd/sei.io_seinodedeployments.yaml b/config/crd/sei.io_seinodedeployments.yaml index b16679c9..7aab588b 100644 --- a/config/crd/sei.io_seinodedeployments.yaml +++ b/config/crd/sei.io_seinodedeployments.yaml @@ -106,10 +106,15 @@ spec: type: object type: array chainId: - description: ChainID for the new network. + description: |- + ChainID for the new network. + Constrained to DNS-1123 label characters because child SeiNodes + compose it into publishable hostnames when the SND has + `spec.networking.tcp` set; the address is a one-way door once peers + cache it. maxLength: 64 minLength: 1 - pattern: ^[a-z0-9][a-z0-9-]*[a-z0-9]$ + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ type: string maxCeremonyDuration: description: |- @@ -222,8 +227,14 @@ spec: type: object type: object chainId: - description: ChainID of the chain this node belongs to. + description: |- + ChainID of the chain this node belongs to. + Constrained to DNS-1123 label characters because the controller composes + it into publishable hostnames (e.g. `-p2p..`) when + the parent SND opts into TCP networking; the address is a one-way door + once peers cache it. minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ type: string dataVolume: description: |- @@ -261,6 +272,21 @@ spec: x-kubernetes-validations: - message: import cannot be unset once configured rule: (!has(oldSelf.import) || has(self.import)) + externalAddress: + description: |- + ExternalAddress is the routable P2P address — bare host:port, no + nodeId@ prefix — written into seid's `p2p.external_address`. Two + ownership models share this field: + + - SND-managed: when the parent SeiNodeDeployment has + `Spec.Networking.TCP` set, the SND reconciler injects this + value at child Create time (deterministic vanity hostname) and + re-syncs it via the diff in `ensureSeiNode`. Clearing TCP on + the parent clears this field on every child. + - Standalone: a SeiNode created without an SND parent may set + this directly; the planner emits it verbatim. The controller + never overwrites a standalone SeiNode's value. + type: string fullNode: description: FullNode configures a chain-following full node (absorbs the "rpc" role). @@ -1072,6 +1098,45 @@ spec: description: NetworkingStatus reports the observed state of networking resources. properties: + publishableEndpoints: + description: |- + PublishableEndpoints lists the per-ordinal publishable P2P + hostnames stamped by the SND when `spec.networking.tcp` is set. + Each entry mirrors the value injected into the child SeiNode's + `spec.externalAddress` (hostname:port). Hostnames are deterministic + from the SND identity, so this slice is populated without reading + Service status back. + items: + description: |- + PublishableEndpoint is the observed state of one child's publishable + P2P endpoint. Stamped by the SND networking reconciler. + properties: + hostname: + description: |- + Hostname is the vanity host:port advertised to peers via + `p2p.external_address`. Equals the value injected into the + child SeiNode's `spec.externalAddress`. + type: string + ordinal: + description: |- + Ordinal is the child's replica index within the SND + (matches `sei.io/nodedeployment-ordinal`). + format: int32 + type: integer + seiNodeName: + description: |- + SeiNodeName is the child SeiNode resource name (also the + per-pod headless Service name). + type: string + required: + - hostname + - ordinal + - seiNodeName + type: object + type: array + x-kubernetes-list-map-keys: + - ordinal + x-kubernetes-list-type: map routes: description: Routes lists the HTTPRoute hostnames managed by this deployment. diff --git a/config/crd/sei.io_seinodes.yaml b/config/crd/sei.io_seinodes.yaml index 81106584..0683bd2a 100644 --- a/config/crd/sei.io_seinodes.yaml +++ b/config/crd/sei.io_seinodes.yaml @@ -88,8 +88,14 @@ spec: type: object type: object chainId: - description: ChainID of the chain this node belongs to. + description: |- + ChainID of the chain this node belongs to. + Constrained to DNS-1123 label characters because the controller composes + it into publishable hostnames (e.g. `-p2p..`) when + the parent SND opts into TCP networking; the address is a one-way door + once peers cache it. minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ type: string dataVolume: description: |- @@ -127,6 +133,21 @@ spec: x-kubernetes-validations: - message: import cannot be unset once configured rule: (!has(oldSelf.import) || has(self.import)) + externalAddress: + description: |- + ExternalAddress is the routable P2P address — bare host:port, no + nodeId@ prefix — written into seid's `p2p.external_address`. Two + ownership models share this field: + + - SND-managed: when the parent SeiNodeDeployment has + `Spec.Networking.TCP` set, the SND reconciler injects this + value at child Create time (deterministic vanity hostname) and + re-syncs it via the diff in `ensureSeiNode`. Clearing TCP on + the parent clears this field on every child. + - Standalone: a SeiNode created without an SND parent may set + this directly; the planner emits it verbatim. The controller + never overwrites a standalone SeiNode's value. + type: string fullNode: description: FullNode configures a chain-following full node (absorbs the "rpc" role). @@ -800,14 +821,6 @@ spec: Parent controllers compare this against spec.image to determine whether a spec change has been fully actuated. type: string - externalAddress: - description: |- - ExternalAddress is the routable P2P address for this node — bare - host:port, no nodeId@ prefix. Populated by the SeiNode controller - from the per-pod LoadBalancer Service when the parent SND has - Spec.Networking.TCP set; consumed by the planner to set - p2p.external_address in CometBFT config. - type: string phase: description: Phase is the high-level lifecycle state. enum: diff --git a/internal/controller/nodedeployment/controller.go b/internal/controller/nodedeployment/controller.go index 7590af64..a7d9bc30 100644 --- a/internal/controller/nodedeployment/controller.go +++ b/internal/controller/nodedeployment/controller.go @@ -3,6 +3,7 @@ package nodedeployment import ( "context" "fmt" + "net" "time" corev1 "k8s.io/api/core/v1" @@ -42,6 +43,21 @@ type SeiNodeDeploymentReconciler struct { GatewayDomain string GatewayPublicDomain string + // PublishabilityAvailable is the cluster-level capability flag for the + // publishable-P2P path. Set to true at startup only when SEI_VPC_CIDR + // parses cleanly; gates Service apply in the TCP networking branch. + // When false, an SND with `Spec.Networking.TCP` set surfaces + // `ConditionNetworkingReady=False/VPCCIDRNotConfigured` and no LB + // Services are created. + PublishabilityAvailable bool + + // PublishableVPCCIDR is the cluster's VPC CIDR, parsed from + // SEI_VPC_CIDR at startup. Held for future hooks; the reconcile path + // does not check Pod IPs against it — AWS LBC surfaces target + // registration failures via Service events if the cluster's Pod IP + // range is outside this CIDR. + PublishableVPCCIDR *net.IPNet + // PlanExecutor drives group-level task plans (e.g. genesis assembly). PlanExecutor planner.PlanExecutor[*seiv1alpha1.SeiNodeDeployment] } diff --git a/internal/controller/nodedeployment/envtest/publishable_p2p_test.go b/internal/controller/nodedeployment/envtest/publishable_p2p_test.go new file mode 100644 index 00000000..29ff98a5 --- /dev/null +++ b/internal/controller/nodedeployment/envtest/publishable_p2p_test.go @@ -0,0 +1,470 @@ +//go:build envtest + +package envtest_test + +import ( + "fmt" + "testing" + "time" + + . "github.com/onsi/gomega" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + apimeta "k8s.io/apimachinery/pkg/api/meta" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "sigs.k8s.io/controller-runtime/pkg/client" + + seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" + "github.com/sei-protocol/sei-k8s-controller/internal/controller/nodedeployment/envtest/fixtures" +) + +const ( + // publishableTestDomain is the value the publishable tests pin onto + // `testSNDReconciler.GatewayPublicDomain` for the duration of each + // test. The HTTP-route tests rely on the default empty value; + // `withPublishabilityEnabled` restores both fields in t.Cleanup so + // later tests see the unmodified suite state. + publishableTestDomain = "test.platform.sei.io" +) + +// expectedPublishableHost composes the deterministic vanity host the +// SND should stamp for a given SND name + child ordinal. Mirrors the +// production template (`-p2p..`) +// so the tests fail loudly if either side drifts. +func expectedPublishableHost(sndName, chainID string, ordinal int) string { + return fmt.Sprintf("%s-%d-p2p.%s.%s", sndName, ordinal, chainID, publishableTestDomain) +} + +func expectedPublishableAddr(sndName, chainID string, ordinal int) string { + return expectedPublishableHost(sndName, chainID, ordinal) + ":26656" +} + +// withPublishabilityEnabled stamps the publishable test fixtures onto +// the shared SND reconciler and restores the original values when the +// test finishes. Tests that need a different capability state (e.g. +// VPCCIDRNotConfigured) call this with available=false. +func withPublishabilityEnabled(t *testing.T, available bool) { + t.Helper() + prevAvail := testSNDReconciler.PublishabilityAvailable + prevDomain := testSNDReconciler.GatewayPublicDomain + testSNDReconciler.PublishabilityAvailable = available + testSNDReconciler.GatewayPublicDomain = publishableTestDomain + t.Cleanup(func() { + testSNDReconciler.PublishabilityAvailable = prevAvail + testSNDReconciler.GatewayPublicDomain = prevDomain + }) +} + +// withTCP enables `spec.networking.tcp` on a fixtures-built SND. Goes +// here (not in fixtures/) because TCP-only is a publishable-test +// concept; the broader fixture surface stays minimal. +func withTCP() fixtures.Option { + return func(snd *seiv1alpha1.SeiNodeDeployment) { + if snd.Spec.Networking == nil { + snd.Spec.Networking = &seiv1alpha1.NetworkingConfig{} + } + snd.Spec.Networking.TCP = &seiv1alpha1.TCPConfig{} + } +} + +// TestPublishableP2P_CreateWithTCP_ChildHasAddressAndServiceExists is +// the happy path. An SND with `networking.tcp` set should produce: a +// child SeiNode whose `Spec.ExternalAddress` matches the deterministic +// vanity host, a per-pod LB Service with the right annotations, and an +// entry in `Status.NetworkingStatus.PublishableEndpoints`. +func TestPublishableP2P_CreateWithTCP_ChildHasAddressAndServiceExists(t *testing.T) { + g := NewWithT(t) + withPublishabilityEnabled(t, true) + ns := makeNamespace(t) + + snd := fixtures.NewSND(ns, "pub-create", + fixtures.WithReplicas(1), + withTCP(), + ) + g.Expect(testCli.Create(testCtx, snd)).To(Succeed()) + + const chainID = "pacific-1" // fixtures default + wantHost := expectedPublishableHost(snd.Name, chainID, 0) + wantAddr := expectedPublishableAddr(snd.Name, chainID, 0) + + // 1. Child SeiNode is created with Spec.ExternalAddress already set. + // The brief calls out "child appears in the API with the value + // already populated" — this poll asserts the no-race-window claim. + childKey := types.NamespacedName{Name: snd.Name + "-0", Namespace: ns} + waitFor(t, func() bool { + child := &seiv1alpha1.SeiNode{} + if err := testCli.Get(testCtx, childKey, child); err != nil { + return false + } + return child.Spec.ExternalAddress != nil && *child.Spec.ExternalAddress == wantAddr + }, "child SeiNode "+childKey.Name+" populated with publishable ExternalAddress") + + // 2. Per-pod LB Service exists with the expected annotations. + svcKey := types.NamespacedName{Name: snd.Name + "-0-p2p", Namespace: ns} + waitFor(t, func() bool { + svc := &corev1.Service{} + return testCli.Get(testCtx, svcKey, svc) == nil + }, "publishable Service "+svcKey.Name+" created") + + svc := &corev1.Service{} + g.Expect(testCli.Get(testCtx, svcKey, svc)).To(Succeed()) + g.Expect(svc.Spec.Type).To(Equal(corev1.ServiceTypeLoadBalancer)) + g.Expect(svc.Annotations).To(HaveKeyWithValue( + "external-dns.alpha.kubernetes.io/hostname", wantHost)) + g.Expect(svc.Annotations).To(HaveKeyWithValue( + "service.beta.kubernetes.io/aws-load-balancer-type", "external")) + g.Expect(svc.Annotations).To(HaveKeyWithValue( + "service.beta.kubernetes.io/aws-load-balancer-scheme", "internet-facing")) + g.Expect(svc.Annotations).To(HaveKeyWithValue( + "service.beta.kubernetes.io/aws-load-balancer-nlb-target-type", "ip")) + // Selector targets the child SeiNode's pod via `sei.io/node`. + g.Expect(svc.Spec.Selector).To(HaveKeyWithValue("sei.io/node", snd.Name+"-0")) + + // 3. Owner reference is the SND (not the child) so opt-out and SND + // deletion both reap the Service through the same path. + refs := svc.GetOwnerReferences() + g.Expect(refs).NotTo(BeEmpty()) + g.Expect(refs[0].UID).To(Equal(snd.UID)) + g.Expect(refs[0].Controller).NotTo(BeNil()) + g.Expect(*refs[0].Controller).To(BeTrue()) + + // 4. PublishableEndpoints surfaces the same hostname the child got. + waitForStatus(t, client.ObjectKeyFromObject(snd), func(latest *seiv1alpha1.SeiNodeDeployment) bool { + ns := latest.Status.NetworkingStatus + if ns == nil || len(ns.PublishableEndpoints) != 1 { + return false + } + ep := ns.PublishableEndpoints[0] + return ep.Ordinal == 0 && + ep.SeiNodeName == snd.Name+"-0" && + ep.Hostname == wantAddr + }, "PublishableEndpoints stamped with deterministic vanity host") +} + +// TestPublishableP2P_OptOut_ClearsAddressAndDeletesService asserts the +// opt-out lifecycle: removing TCP from `spec.networking` clears the +// child's `Spec.ExternalAddress` and deletes the per-pod LB Service. +// ensureSeiNode is the single write site for the child field, so this +// validates the diff propagation path end-to-end. +func TestPublishableP2P_OptOut_ClearsAddressAndDeletesService(t *testing.T) { + g := NewWithT(t) + withPublishabilityEnabled(t, true) + ns := makeNamespace(t) + + snd := fixtures.NewSND(ns, "pub-optout", + fixtures.WithReplicas(1), + withTCP(), + ) + g.Expect(testCli.Create(testCtx, snd)).To(Succeed()) + + const chainID = "pacific-1" + wantAddr := expectedPublishableAddr(snd.Name, chainID, 0) + childKey := types.NamespacedName{Name: snd.Name + "-0", Namespace: ns} + svcKey := types.NamespacedName{Name: snd.Name + "-0-p2p", Namespace: ns} + + // Wait for converged publishable state. + waitFor(t, func() bool { + child := &seiv1alpha1.SeiNode{} + if err := testCli.Get(testCtx, childKey, child); err != nil { + return false + } + return child.Spec.ExternalAddress != nil && *child.Spec.ExternalAddress == wantAddr + }, "child has publishable address before opt-out") + waitFor(t, func() bool { + return testCli.Get(testCtx, svcKey, &corev1.Service{}) == nil + }, "publishable Service exists before opt-out") + + // Opt out: clear TCP. The SND retains an empty `networking` block + // (HTTPEnabled() back-compat will treat it as HTTP-enabled, but the + // validator template has no externally-routable HTTP protocols so + // nothing else changes). To get a clean "no networking" state, set + // Networking to nil. + latest := getSND(t, client.ObjectKeyFromObject(snd)) + patch := client.MergeFrom(latest.DeepCopy()) + latest.Spec.Networking = nil + g.Expect(testCli.Patch(testCtx, latest, patch)).To(Succeed()) + + // Child Spec.ExternalAddress is cleared by ensureSeiNode's diff. + waitFor(t, func() bool { + child := &seiv1alpha1.SeiNode{} + if err := testCli.Get(testCtx, childKey, child); err != nil { + return false + } + return child.Spec.ExternalAddress == nil || *child.Spec.ExternalAddress == "" + }, "child ExternalAddress cleared after opt-out") + + // Per-pod LB Service is deleted. + waitFor(t, func() bool { + err := testCli.Get(testCtx, svcKey, &corev1.Service{}) + return apierrors.IsNotFound(err) + }, "publishable Service deleted after opt-out") + + // PublishableEndpoints is cleared (or NetworkingStatus is nil). + waitForStatus(t, client.ObjectKeyFromObject(snd), func(latest *seiv1alpha1.SeiNodeDeployment) bool { + return latest.Status.NetworkingStatus == nil || + len(latest.Status.NetworkingStatus.PublishableEndpoints) == 0 + }, "PublishableEndpoints cleared after opt-out") +} + +// TestPublishableP2P_ReOptIn_RestoresAddressAndService is the dual of +// the opt-out test: re-adding TCP after a clear should bring the child +// back to the same vanity hostname (deterministic) and recreate the +// per-pod LB Service. +func TestPublishableP2P_ReOptIn_RestoresAddressAndService(t *testing.T) { + g := NewWithT(t) + withPublishabilityEnabled(t, true) + ns := makeNamespace(t) + + snd := fixtures.NewSND(ns, "pub-reoptin", + fixtures.WithReplicas(1), + withTCP(), + ) + g.Expect(testCli.Create(testCtx, snd)).To(Succeed()) + + const chainID = "pacific-1" + wantAddr := expectedPublishableAddr(snd.Name, chainID, 0) + childKey := types.NamespacedName{Name: snd.Name + "-0", Namespace: ns} + svcKey := types.NamespacedName{Name: snd.Name + "-0-p2p", Namespace: ns} + + waitFor(t, func() bool { + child := &seiv1alpha1.SeiNode{} + if err := testCli.Get(testCtx, childKey, child); err != nil { + return false + } + return child.Spec.ExternalAddress != nil && *child.Spec.ExternalAddress == wantAddr + }, "initial converge") + + // Opt out. + latest := getSND(t, client.ObjectKeyFromObject(snd)) + patch := client.MergeFrom(latest.DeepCopy()) + latest.Spec.Networking = nil + g.Expect(testCli.Patch(testCtx, latest, patch)).To(Succeed()) + waitFor(t, func() bool { + err := testCli.Get(testCtx, svcKey, &corev1.Service{}) + return apierrors.IsNotFound(err) + }, "Service gone after opt-out") + + // Re-opt-in. + latest = getSND(t, client.ObjectKeyFromObject(snd)) + patch = client.MergeFrom(latest.DeepCopy()) + latest.Spec.Networking = &seiv1alpha1.NetworkingConfig{TCP: &seiv1alpha1.TCPConfig{}} + g.Expect(testCli.Patch(testCtx, latest, patch)).To(Succeed()) + + // Child gets the same vanity address back (identity is stable). + waitFor(t, func() bool { + child := &seiv1alpha1.SeiNode{} + if err := testCli.Get(testCtx, childKey, child); err != nil { + return false + } + return child.Spec.ExternalAddress != nil && *child.Spec.ExternalAddress == wantAddr + }, "child re-stamped with same vanity address after re-opt-in") + + // Service is recreated. + waitFor(t, func() bool { + return testCli.Get(testCtx, svcKey, &corev1.Service{}) == nil + }, "publishable Service recreated after re-opt-in") +} + +// TestPublishableP2P_ScaleDown_DeletesOrdinalServiceBeforeChild guards +// the invariant called out in the brief: on scale-down the ordinal's +// publishable Service must be deleted before the child SeiNode so the +// NLB never sits in the no-healthy-targets-but-still-allocated state. +// +// envtest is happy-path here — both deletions are issued by the same +// reconcile pass, so we mainly assert both objects converge to gone. +// The ordering predicate runs inside `scaleDown`. +func TestPublishableP2P_ScaleDown_DeletesOrdinalServiceBeforeChild(t *testing.T) { + g := NewWithT(t) + withPublishabilityEnabled(t, true) + ns := makeNamespace(t) + + snd := fixtures.NewSND(ns, "pub-scale", + fixtures.WithReplicas(2), + withTCP(), + ) + g.Expect(testCli.Create(testCtx, snd)).To(Succeed()) + + // Wait for both replicas to converge with their publishable Services. + for i := 0; i < 2; i++ { + svcKey := types.NamespacedName{Name: fmt.Sprintf("%s-%d-p2p", snd.Name, i), Namespace: ns} + waitFor(t, func() bool { + return testCli.Get(testCtx, svcKey, &corev1.Service{}) == nil + }, fmt.Sprintf("publishable Service %s created", svcKey.Name)) + } + + // Scale down to 1 replica. + latest := getSND(t, client.ObjectKeyFromObject(snd)) + patch := client.MergeFrom(latest.DeepCopy()) + latest.Spec.Replicas = 1 + g.Expect(testCli.Patch(testCtx, latest, patch)).To(Succeed()) + + // ordinal-1 Service is gone. + scaledOutKey := types.NamespacedName{Name: snd.Name + "-1-p2p", Namespace: ns} + waitFor(t, func() bool { + err := testCli.Get(testCtx, scaledOutKey, &corev1.Service{}) + return apierrors.IsNotFound(err) + }, "ordinal-1 publishable Service deleted on scale-down") + + // ordinal-0 Service survives. + survivingKey := types.NamespacedName{Name: snd.Name + "-0-p2p", Namespace: ns} + g.Consistently(func() error { + return testCli.Get(testCtx, survivingKey, &corev1.Service{}) + }, 2*time.Second, 200*time.Millisecond).Should(Succeed()) +} + +// TestPublishableP2P_StandaloneSeiNode_PreservesUserAddress asserts the +// brief's "no SND involvement" contract. A SeiNode created directly by +// a user with `Spec.ExternalAddress` set should keep that value through +// reconcile — no SND-side code path should clear it, because no SND +// owns it. The planner emits it verbatim. The check is interface-only +// (we read back the spec); the planner emit-side is covered by the +// unit test in `internal/planner/common_overrides_test.go`. +func TestPublishableP2P_StandaloneSeiNode_PreservesUserAddress(t *testing.T) { + g := NewWithT(t) + withPublishabilityEnabled(t, true) + ns := makeNamespace(t) + + addr := "custom.example.com:26656" + node := &seiv1alpha1.SeiNode{ + ObjectMeta: metav1.ObjectMeta{ + Name: "standalone", + Namespace: ns, + }, + Spec: seiv1alpha1.SeiNodeSpec{ + ChainID: "pacific-1", + Image: fixtures.DefaultImage, + ExternalAddress: &addr, + FullNode: &seiv1alpha1.FullNodeSpec{}, + }, + } + g.Expect(testCli.Create(testCtx, node)).To(Succeed()) + + // Read back: the spec persists. No SND code path should touch this. + g.Consistently(func() *string { + fetched := &seiv1alpha1.SeiNode{} + if err := testCli.Get(testCtx, client.ObjectKeyFromObject(node), fetched); err != nil { + return nil + } + return fetched.Spec.ExternalAddress + }, 2*time.Second, 200*time.Millisecond).Should(And( + Not(BeNil()), + HaveValue(Equal(addr)), + )) +} + +// TestPublishableP2P_KubectlEditStomp_ReconverresViaEnsureSeiNode +// exercises the brief's stomp scenario: an external write clears the +// child's `Spec.ExternalAddress` mid-flight. ensureSeiNode must detect +// the diff on next reconcile and restore the SND-managed value. This +// is the load-bearing claim that the field truly converges through +// the single write site. +func TestPublishableP2P_KubectlEditStomp_ReconverresViaEnsureSeiNode(t *testing.T) { + g := NewWithT(t) + withPublishabilityEnabled(t, true) + ns := makeNamespace(t) + + snd := fixtures.NewSND(ns, "pub-stomp", + fixtures.WithReplicas(1), + withTCP(), + ) + g.Expect(testCli.Create(testCtx, snd)).To(Succeed()) + + const chainID = "pacific-1" + wantAddr := expectedPublishableAddr(snd.Name, chainID, 0) + childKey := types.NamespacedName{Name: snd.Name + "-0", Namespace: ns} + + waitFor(t, func() bool { + child := &seiv1alpha1.SeiNode{} + if err := testCli.Get(testCtx, childKey, child); err != nil { + return false + } + return child.Spec.ExternalAddress != nil && *child.Spec.ExternalAddress == wantAddr + }, "initial converge before stomp") + + // Simulate `kubectl edit seinode` clearing the field. The SND + // reconciler's ensureSeiNode pass should re-stamp it. Patch through + // any finalizer-induced 409s with a small retry — the goal is to + // land the cleared value at the apiserver, not to assert on the + // PATCH path itself. + g.Eventually(func() error { + child := &seiv1alpha1.SeiNode{} + if err := testCli.Get(testCtx, childKey, child); err != nil { + return err + } + patch := client.MergeFrom(child.DeepCopy()) + child.Spec.ExternalAddress = nil + return testCli.Patch(testCtx, child, patch) + }, 5*time.Second, 200*time.Millisecond).Should(Succeed()) + + // Bump the SND so a fresh reconcile is queued (envtest's predicate + // gates on generation changes for SND-side spec). A no-op label edit + // is enough. + g.Eventually(func() error { + latest := getSND(t, client.ObjectKeyFromObject(snd)) + patch := client.MergeFrom(latest.DeepCopy()) + if latest.Annotations == nil { + latest.Annotations = map[string]string{} + } + latest.Annotations["test.sei.io/stomp-retick"] = time.Now().Format(time.RFC3339Nano) + return testCli.Patch(testCtx, latest, patch) + }, 5*time.Second, 200*time.Millisecond).Should(Succeed()) + + // Reconverge. + waitFor(t, func() bool { + child := &seiv1alpha1.SeiNode{} + if err := testCli.Get(testCtx, childKey, child); err != nil { + return false + } + return child.Spec.ExternalAddress != nil && *child.Spec.ExternalAddress == wantAddr + }, "child reconverged via ensureSeiNode after external stomp") +} + +// TestPublishableP2P_VPCCIDRNotConfigured_SurfacesConditionAndSkipsService +// is the capability-disabled branch. With `PublishabilityAvailable=false` +// and a TCP-enabled SND, the reconciler must: +// - set ConditionNetworkingReady=False/VPCCIDRNotConfigured +// - not create a publishable Service +// - leave the child's Spec.ExternalAddress nil/empty +// +// This is the equivalent of an operator misconfigured the controller +// (SEI_VPC_CIDR missing). The condition is the operator's debugging +// signal. +func TestPublishableP2P_VPCCIDRNotConfigured_SurfacesConditionAndSkipsService(t *testing.T) { + g := NewWithT(t) + withPublishabilityEnabled(t, false) // capability OFF for this test + ns := makeNamespace(t) + + snd := fixtures.NewSND(ns, "pub-no-cidr", + fixtures.WithReplicas(1), + withTCP(), + ) + g.Expect(testCli.Create(testCtx, snd)).To(Succeed()) + + // Condition surfaces the misconfiguration. + waitForStatus(t, client.ObjectKeyFromObject(snd), func(latest *seiv1alpha1.SeiNodeDeployment) bool { + c := apimeta.FindStatusCondition(latest.Status.Conditions, seiv1alpha1.ConditionNetworkingReady) + return c != nil && c.Status == metav1.ConditionFalse && c.Reason == "VPCCIDRNotConfigured" + }, "ConditionNetworkingReady=False/VPCCIDRNotConfigured") + + // No publishable Service. + svcKey := types.NamespacedName{Name: snd.Name + "-0-p2p", Namespace: ns} + g.Consistently(func() bool { + err := testCli.Get(testCtx, svcKey, &corev1.Service{}) + return apierrors.IsNotFound(err) + }, 2*time.Second, 200*time.Millisecond).Should(BeTrue(), + "no publishable Service created when capability is unavailable") + + // Child SeiNode has no ExternalAddress. + childKey := types.NamespacedName{Name: snd.Name + "-0", Namespace: ns} + g.Eventually(func() error { + child := &seiv1alpha1.SeiNode{} + return testCli.Get(testCtx, childKey, child) + }, 5*time.Second, 200*time.Millisecond).Should(Succeed()) + child := &seiv1alpha1.SeiNode{} + g.Expect(testCli.Get(testCtx, childKey, child)).To(Succeed()) + if child.Spec.ExternalAddress != nil { + g.Expect(*child.Spec.ExternalAddress).To(BeEmpty(), + "child ExternalAddress must be empty without capability") + } +} diff --git a/internal/controller/nodedeployment/envtest/suite_test.go b/internal/controller/nodedeployment/envtest/suite_test.go index 9ac1b5a9..3b6e826d 100644 --- a/internal/controller/nodedeployment/envtest/suite_test.go +++ b/internal/controller/nodedeployment/envtest/suite_test.go @@ -46,6 +46,14 @@ var ( testCtx context.Context testCncl context.CancelFunc testFaker *envtestpkg.StatusFaker + + // testSNDReconciler is the live SND reconciler the manager runs. + // Exposed so tests that need to flip controller-level state (e.g. + // the publishable-P2P capability toggle) can do so without rebuilding + // the manager. Mutation must happen before the test creates the SND + // under test; controller-runtime serializes reconciles per-key, so + // reads during reconcile are safe. + testSNDReconciler *nodedeploymentcontroller.SeiNodeDeploymentReconciler ) func TestMain(m *testing.M) { @@ -187,14 +195,19 @@ func run(m *testing.M) (int, error) { // pure kube-client tasks and never call the sidecar. Genesis // ceremony tasks would; the InPlace fixtures don't trigger those. recorder := mgr.GetEventRecorderFor("seinodedeployment-controller") //nolint:staticcheck // new events API migration is a separate effort - if err := (&nodedeploymentcontroller.SeiNodeDeploymentReconciler{ - Client: kc, - Scheme: mgr.GetScheme(), - Recorder: recorder, - GatewayName: "sei-gateway", - GatewayNamespace: "gateway", - GatewayDomain: "test.local", - GatewayPublicDomain: "", + // Default capability state: publishability enabled, but the public + // domain stays empty — the publishable-P2P tests flip both fields + // themselves before creating their SND so they don't perturb the + // HTTP-route tests' single-hostname expectations. + testSNDReconciler = &nodedeploymentcontroller.SeiNodeDeploymentReconciler{ + Client: kc, + Scheme: mgr.GetScheme(), + Recorder: recorder, + GatewayName: "sei-gateway", + GatewayNamespace: "gateway", + GatewayDomain: "test.local", + GatewayPublicDomain: "", + PublishabilityAvailable: true, PlanExecutor: &planner.Executor[*seiv1alpha1.SeiNodeDeployment]{ ConfigFor: func(_ context.Context, group *seiv1alpha1.SeiNodeDeployment) task.ExecutionConfig { return task.ExecutionConfig{ @@ -207,7 +220,8 @@ func run(m *testing.M) (int, error) { } }, }, - }).SetupWithManager(mgr); err != nil { + } + if err := testSNDReconciler.SetupWithManager(mgr); err != nil { return 1, fmt.Errorf("setting up SeiNodeDeployment controller: %w", err) } diff --git a/internal/controller/nodedeployment/networking.go b/internal/controller/nodedeployment/networking.go index 3d7d7759..8e3ee3e4 100644 --- a/internal/controller/nodedeployment/networking.go +++ b/internal/controller/nodedeployment/networking.go @@ -39,11 +39,16 @@ type effectiveRoute struct { } // routeHostnameResolvable returns true when the deployment's first public -// hostname resolves in DNS, indicating the HTTPRoute + External-DNS pipeline -// is ready. Returns true when no routes are expected (private deployments, -// validator mode). +// HTTP hostname resolves in DNS, indicating the HTTPRoute + External-DNS +// pipeline is ready. Returns true when no HTTP routes are expected +// (TCP-only deployments, validator mode, private deployments). +// +// The publishable-P2P hostname has no parallel resolvability gate. CometBFT +// peers retry; the controller does not block STS creation on external-dns +// catching up — that would re-introduce the layering inversion the original +// PR #362 design suffered from. func (r *SeiNodeDeploymentReconciler) routeHostnameResolvable(ctx context.Context, group *seiv1alpha1.SeiNodeDeployment) bool { - if group.Spec.Networking == nil { + if !group.Spec.Networking.HTTPEnabled() { return true } routes := resolveEffectiveRoutes(group, r.GatewayDomain, r.GatewayPublicDomain) @@ -58,6 +63,18 @@ func (r *SeiNodeDeploymentReconciler) routeHostnameResolvable(ctx context.Contex return true } +// reconcileNetworking dispatches to two independent sub-reconcilers gated +// on independent presence checks. HTTP (L7 Gateway exposure) and TCP +// (per-pod L4 NLB for P2P) share only the group-level resource labels, +// the `fieldOwner` constant, and the `ConditionNetworkingReady` condition +// vocabulary; they do not nest. +// +// Condition contract: each branch sets `ConditionNetworkingReady` to +// reflect *its* terminal state. When both are enabled, the TCP branch +// runs second and its outcome wins; that's deliberate — TCP is the +// load-bearing path for publishable validators, and the operator wants +// to know its readiness first. When neither is enabled, +// `NetworkingDisabled/false` describes the steady state. func (r *SeiNodeDeploymentReconciler) reconcileNetworking(ctx context.Context, group *seiv1alpha1.SeiNodeDeployment) error { if group.Spec.Networking == nil { setCondition(group, seiv1alpha1.ConditionNetworkingReady, metav1.ConditionFalse, @@ -65,12 +82,44 @@ func (r *SeiNodeDeploymentReconciler) reconcileNetworking(ctx context.Context, g return r.deleteNetworkingResources(ctx, group) } - if err := r.reconcileExternalService(ctx, group); err != nil { - return fmt.Errorf("reconciling external service: %w", err) + if group.Spec.Networking.HTTPEnabled() { + if err := r.reconcileExternalService(ctx, group); err != nil { + return fmt.Errorf("reconciling external service: %w", err) + } + if err := r.reconcileRoute(ctx, group); err != nil { + return fmt.Errorf("reconciling route: %w", err) + } + } else { + if err := r.deleteExternalService(ctx, group); err != nil { + return fmt.Errorf("deleting external service: %w", err) + } + if err := r.deleteHTTPRoutesByLabel(ctx, group); err != nil { + return fmt.Errorf("deleting HTTPRoutes: %w", err) + } } - if err := r.reconcileRoute(ctx, group); err != nil { - return fmt.Errorf("reconciling route: %w", err) + + if group.Spec.Networking.TCPEnabled() { + if !r.PublishabilityAvailable { + setCondition(group, seiv1alpha1.ConditionNetworkingReady, metav1.ConditionFalse, + "VPCCIDRNotConfigured", + "spec.networking.tcp requires SEI_VPC_CIDR to be set on the controller; publishable Services are not created") + // Defensive: clear any stale Services left from a prior boot + // where the env was set. Cheap list-and-delete; no-op when + // nothing is there. + if err := r.deletePublishableServices(ctx, group); err != nil { + return fmt.Errorf("deleting publishable services after capability loss: %w", err) + } + return nil + } + if err := r.reconcilePublishableServices(ctx, group); err != nil { + return fmt.Errorf("reconciling publishable services: %w", err) + } + } else { + if err := r.deletePublishableServices(ctx, group); err != nil { + return fmt.Errorf("deleting publishable services: %w", err) + } } + return nil } @@ -366,6 +415,9 @@ func (r *SeiNodeDeploymentReconciler) deleteNetworkingResources(ctx context.Cont if err := r.deleteHTTPRoutesByLabel(ctx, group); err != nil { return fmt.Errorf("deleting HTTPRoutes: %w", err) } + if err := r.deletePublishableServices(ctx, group); err != nil { + return fmt.Errorf("deleting publishable Services: %w", err) + } return nil } diff --git a/internal/controller/nodedeployment/nodes.go b/internal/controller/nodedeployment/nodes.go index ce325679..f45b68d8 100644 --- a/internal/controller/nodedeployment/nodes.go +++ b/internal/controller/nodedeployment/nodes.go @@ -175,6 +175,13 @@ func (r *SeiNodeDeploymentReconciler) populateIncumbentNodes(ctx context.Context func (r *SeiNodeDeploymentReconciler) ensureSeiNode(ctx context.Context, group *seiv1alpha1.SeiNodeDeployment, ordinal int) error { desired := generateSeiNode(group, ordinal) + // Inject the SND-managed external address after the pure-template + // portion is built. Kept out of `generateSeiNode` so the rest of + // the package (and the existing test surface) treats the function + // as data-only. + if addr := r.publishableExternalAddressForChild(group, ordinal); addr != "" { + desired.Spec.ExternalAddress = &addr + } if err := ctrl.SetControllerReference(group, desired, r.Scheme); err != nil { return fmt.Errorf("setting owner reference: %w", err) } @@ -218,17 +225,60 @@ func (r *SeiNodeDeploymentReconciler) ensureSeiNode(ctx context.Context, group * existing.Spec.PodLabels = desired.Spec.PodLabels updated = true } + // ExternalAddress is SND-managed when TCP is on, cleared when TCP is + // off, and untouched when the parent does not opt into networking. The + // diff here is the single write site that propagates the field — + // generateSeiNode handles creation, this handles updates and external + // stomps (kubectl edit, manual patches). + if !externalAddressEqual(existing.Spec.ExternalAddress, desired.Spec.ExternalAddress) { + existing.Spec.ExternalAddress = desired.Spec.ExternalAddress + updated = true + } if updated { return r.Update(ctx, existing) } return nil } +// externalAddressEqual treats nil and a pointer to the empty string as +// equivalent — both represent "no override." Without this normalization +// the diff in ensureSeiNode would oscillate between nil and *"" each +// reconcile when a child is created via generateSeiNode (which sets nil) +// and then read back from the API (which may also store nil). +func externalAddressEqual(a, b *string) bool { + av, bv := "", "" + if a != nil { + av = *a + } + if b != nil { + bv = *b + } + return av == bv +} + +// generateSeiNode produces the desired child SeiNode for a given ordinal. +// Pure: depends only on the SND spec and ordinal. The publishable +// external-address injection happens in `ensureSeiNode` via the +// reconciler-bound helper because it needs `r.GatewayPublicDomain` and +// `r.PublishabilityAvailable` — controller-state, not template-data. +// +// Template-aliasing guard: the deep-copied template's +// `Spec.ExternalAddress` is zeroed before any per-ordinal stamping. If +// we did not, a user who set `spec.template.spec.externalAddress: foo` +// on the SND would cause every child to be created with the same +// literal value, which would either silently break peer routing or — +// worse — succeed long enough to pollute peer caches before the +// SND-side reconciler corrected it. Standalone SeiNodes (no SND parent) +// still control the field directly; this guard only runs on the SND +// code path. func generateSeiNode(group *seiv1alpha1.SeiNodeDeployment, ordinal int) *seiv1alpha1.SeiNode { labels := seiNodeLabels(group, ordinal) annotations := seiNodeAnnotations(group) spec := group.Spec.Template.Spec.DeepCopy() + // Template-aliasing guard — see function doc. + spec.ExternalAddress = nil + podLabels := make(map[string]string) if group.Spec.Template.Metadata != nil { maps.Copy(podLabels, group.Spec.Template.Metadata.Labels) @@ -261,6 +311,21 @@ func generateSeiNode(group *seiv1alpha1.SeiNodeDeployment, ordinal int) *seiv1al } } +// publishableExternalAddressForChild returns the deterministic vanity +// address (host:port) when the SND opts into TCP networking and the +// controller has the capability flag. Returns "" otherwise. ensureSeiNode +// uses this to inject `Spec.ExternalAddress` on create and to drive the +// diff on update — single write site for the SND-managed value. +func (r *SeiNodeDeploymentReconciler) publishableExternalAddressForChild(group *seiv1alpha1.SeiNodeDeployment, ordinal int) string { + if !group.Spec.Networking.TCPEnabled() { + return "" + } + if !r.PublishabilityAvailable { + return "" + } + return r.publishableExternalAddress(group, ordinal) +} + // scaleDown deletes SeiNodes with ordinals >= the desired replica count. func (r *SeiNodeDeploymentReconciler) scaleDown(ctx context.Context, group *seiv1alpha1.SeiNodeDeployment) error { if group.Spec.Replicas <= 0 { @@ -289,6 +354,15 @@ func (r *SeiNodeDeploymentReconciler) scaleDown(ctx context.Context, group *seiv continue } if ord >= int(group.Spec.Replicas) { + // Delete the per-ordinal publishable Service before the child + // SeiNode. Both are SND-owned, so owner-ref cascade would + // only fire on SND deletion — leaving the NLB stranded with + // no targets between scale-down and SND delete is the failure + // we explicitly avoid. Idempotent: no-op when publishability + // isn't in use. + if err := r.deletePublishableServiceForOrdinal(ctx, group, ord); err != nil { + return fmt.Errorf("deleting publishable Service for scaled-down ordinal %d: %w", ord, err) + } if err := r.Delete(ctx, node); err != nil && !apierrors.IsNotFound(err) { return fmt.Errorf("deleting excess SeiNode %s: %w", node.Name, err) } diff --git a/internal/controller/nodedeployment/publishable.go b/internal/controller/nodedeployment/publishable.go new file mode 100644 index 00000000..96c4b8cf --- /dev/null +++ b/internal/controller/nodedeployment/publishable.go @@ -0,0 +1,286 @@ +// Package nodedeployment — publishable.go is the SND-side networking sub-reconciler +// for per-pod P2P LoadBalancer Services. Owns the deterministic vanity-hostname +// derivation, the Service apply/delete lifecycle, and the population of +// `Status.NetworkingStatus.PublishableEndpoints`. The child SeiNode reconciler +// never reads from this file — it observes `Spec.ExternalAddress` and proceeds. +// +// Lifecycle invariants: +// +// - Hostnames are derived purely from SND identity (`Name`, `ChainID`, the +// controller's `GatewayPublicDomain`). No Service status read-back, no AWS +// NLB hostname plumbed into the child. +// - external-dns watches `Service` resources with the +// `external-dns.alpha.kubernetes.io/hostname` annotation and provisions a +// CNAME from the vanity name to the NLB's allocated DNS. The pod itself +// advertises the vanity name to peers via `p2p.external_address`; peers +// resolve the CNAME at dial time. We never block STS creation on this. +// - Owner ref is the SND, not the child. On scale-down we explicitly delete +// the per-ordinal Service before deleting the child SeiNode; on full SND +// deletion the owner-ref cascade handles cleanup. +// - When the cluster lacks a parseable `SEI_VPC_CIDR`, this entire sub-path +// is bypassed and `ConditionNetworkingReady = False / VPCCIDRNotConfigured`. +// The controller never per-reconcile-introspects Pod IPs — misconfiguration +// surfaces as NLB target-registration failure events on the LB Service, +// which is the AWS-side ground truth. +package nodedeployment + +import ( + "context" + "fmt" + "strconv" + + seiconfig "github.com/sei-protocol/sei-config" + corev1 "k8s.io/api/core/v1" + apierrors "k8s.io/apimachinery/pkg/api/errors" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/apimachinery/pkg/types" + "k8s.io/apimachinery/pkg/util/intstr" + "k8s.io/apimachinery/pkg/util/validation" + ctrl "sigs.k8s.io/controller-runtime" + "sigs.k8s.io/controller-runtime/pkg/client" + + seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" + "github.com/sei-protocol/sei-k8s-controller/internal/noderesource" +) + +const ( + // publishableFieldOwner isolates the publishable Service apply path from + // the rest of the SND's SSA writes so the two paths cannot fight each other + // over field ownership. + publishableFieldOwner = client.FieldOwner("nodedeployment-controller-publishable") + + // publishableComponentLabel marks Services owned by this sub-reconciler so + // list-based cleanup can target them without entangling the HTTP path's + // external Service. + publishableComponentLabel = "sei.io/component" + publishableComponentValue = "p2p-lb" + + // externalDNSHostnameAnnotation is the external-dns annotation that drives + // CNAME creation from the vanity hostname to the NLB's allocated DNS name. + // Pinned key — changing it would orphan in-cluster DNS records. + externalDNSHostnameAnnotation = "external-dns.alpha.kubernetes.io/hostname" + + // AWS Load Balancer Controller annotations for an internet-facing NLB + // with IP targets and cross-zone load balancing. Hardcoded for the + // single use case in scope; revisit if per-cluster knobs land. + awsLBTypeAnnotation = "service.beta.kubernetes.io/aws-load-balancer-type" + awsLBSchemeAnnotation = "service.beta.kubernetes.io/aws-load-balancer-scheme" + awsLBTargetTypeAnnotation = "service.beta.kubernetes.io/aws-load-balancer-nlb-target-type" + awsLBCrossZoneAnnotation = "service.beta.kubernetes.io/aws-load-balancer-attributes" + awsLBTypeValue = "external" + awsLBSchemeValue = "internet-facing" + awsLBTargetTypeValue = "ip" + awsLBCrossZoneAttributeStr = "load_balancing.cross_zone.enabled=true" + + // publishableServiceSuffix is appended to the child SeiNode name to form + // the per-pod LB Service name. Stable identifier — operators key dashboards + // and runbooks off it. + publishableServiceSuffix = "-p2p" +) + +// effectiveChainID returns the chain ID the SND will stamp into child +// SeiNodes. Mirrors the precedence used by `generateSeiNode`: the +// template's explicit value wins, otherwise fall back to the Genesis +// ceremony's ChainID. Validators always set template ChainID via the +// ceremony override at child create, but a non-validator SND has no +// Genesis block and relies entirely on the template value. +func effectiveChainID(group *seiv1alpha1.SeiNodeDeployment) string { + if id := group.Spec.Template.Spec.ChainID; id != "" { + return id + } + if group.Spec.Genesis != nil { + return group.Spec.Genesis.ChainID + } + return "" +} + +// publishableHostname is the deterministic vanity hostname for the given +// ordinal. Returns "" if the inputs cannot produce a valid DNS-1123 +// hostname (missing chain ID, missing public domain, or chain ID with +// DNS-illegal characters that slipped past admission). Callers must +// treat the empty string as "skip" — the apply boundary fails closed. +func (r *SeiNodeDeploymentReconciler) publishableHostname(group *seiv1alpha1.SeiNodeDeployment, ordinal int) string { + if r.GatewayPublicDomain == "" { + return "" + } + chainID := effectiveChainID(group) + if chainID == "" { + return "" + } + // Belt-and-suspenders: the API patterns reject DNS-illegal IDs at + // admission, but admission rules can be bypassed (cluster CRD edits, + // older CRD schema in-cluster). Validate the chain ID's label shape + // before composing it into a host. + if errs := validation.IsDNS1123Label(chainID); len(errs) > 0 { + return "" + } + host := fmt.Sprintf("%s-p2p.%s.%s", + seiNodeName(group, ordinal), + chainID, + r.GatewayPublicDomain, + ) + return host +} + +// publishableExternalAddress returns ":" suitable for +// CometBFT's `p2p.external_address`. Empty when the hostname cannot be +// derived (see publishableHostname). +func (r *SeiNodeDeploymentReconciler) publishableExternalAddress(group *seiv1alpha1.SeiNodeDeployment, ordinal int) string { + host := r.publishableHostname(group, ordinal) + if host == "" { + return "" + } + return fmt.Sprintf("%s:%d", host, seiconfig.PortP2P) +} + +// publishableServiceName is the per-pod LB Service name. Stable across +// SND edits — only the ordinal can shift it. +func publishableServiceName(group *seiv1alpha1.SeiNodeDeployment, ordinal int) string { + return seiNodeName(group, ordinal) + publishableServiceSuffix +} + +// reconcilePublishableServices server-side-applies one LoadBalancer +// Service per desired ordinal and trims any orphan Services left over +// from scale-down events the controller missed. +func (r *SeiNodeDeploymentReconciler) reconcilePublishableServices(ctx context.Context, group *seiv1alpha1.SeiNodeDeployment) error { + desiredNames := make(map[string]struct{}, group.Spec.Replicas) + for i := range int(group.Spec.Replicas) { + host := r.publishableHostname(group, i) + if host == "" { + // Capability gating happens upstream in reconcileNetworking; + // the only way to reach this branch is a chain ID that + // somehow escaped admission validation. Fail closed: set the + // stable False reason, do not apply. + setCondition(group, seiv1alpha1.ConditionNetworkingReady, metav1.ConditionFalse, + "InvalidChainIDForDNS", + fmt.Sprintf("chainID %q is not a valid DNS-1123 label; cannot compose publishable hostname", effectiveChainID(group))) + return nil + } + desired := generatePublishableService(group, i, host) + desired.SetGroupVersionKind(corev1.SchemeGroupVersion.WithKind("Service")) + if err := ctrl.SetControllerReference(group, desired, r.Scheme); err != nil { + return fmt.Errorf("setting owner reference on publishable Service %s: %w", desired.Name, err) + } + //nolint:staticcheck // SSA apply via untyped object mirrors the rest of this controller; typed ApplyConfiguration is a separate effort + if err := r.Patch(ctx, desired, client.Apply, publishableFieldOwner, client.ForceOwnership); err != nil { + return fmt.Errorf("applying publishable Service %s: %w", desired.Name, err) + } + desiredNames[desired.Name] = struct{}{} + } + + if err := r.deleteOrphanPublishableServices(ctx, group, desiredNames); err != nil { + return fmt.Errorf("trimming orphan publishable Services: %w", err) + } + + setCondition(group, seiv1alpha1.ConditionNetworkingReady, metav1.ConditionTrue, + "PublishableServicesApplied", + fmt.Sprintf("%d publishable Service(s) reconciled", len(desiredNames))) + return nil +} + +// deleteOrphanPublishableServices removes publishable Services labelled +// for this SND whose names are not in desiredNames. Used after a +// scale-down or replica reset to clear stranded LB Services. +func (r *SeiNodeDeploymentReconciler) deleteOrphanPublishableServices(ctx context.Context, group *seiv1alpha1.SeiNodeDeployment, desiredNames map[string]struct{}) error { + list := &corev1.ServiceList{} + if err := r.List(ctx, list, + client.InNamespace(group.Namespace), + client.MatchingLabels{ + groupLabel: group.Name, + publishableComponentLabel: publishableComponentValue, + }, + ); err != nil { + return fmt.Errorf("listing publishable Services: %w", err) + } + for i := range list.Items { + svc := &list.Items[i] + if _, ok := desiredNames[svc.Name]; ok { + continue + } + if !metav1.IsControlledBy(svc, group) { + continue + } + if err := r.Delete(ctx, svc); err != nil && !apierrors.IsNotFound(err) { + return fmt.Errorf("deleting orphan publishable Service %s: %w", svc.Name, err) + } + } + return nil +} + +// deletePublishableServices removes every publishable Service owned by +// the SND. Called on opt-out (TCP unset on a previously-publishable SND) +// and as part of the general networking-resources cleanup path. +func (r *SeiNodeDeploymentReconciler) deletePublishableServices(ctx context.Context, group *seiv1alpha1.SeiNodeDeployment) error { + return r.deleteOrphanPublishableServices(ctx, group, nil) +} + +// deletePublishableServiceForOrdinal is the explicit per-ordinal delete +// scaleDown calls before deleting the child SeiNode. Idempotent — +// silently no-ops when the Service is already gone. +func (r *SeiNodeDeploymentReconciler) deletePublishableServiceForOrdinal(ctx context.Context, group *seiv1alpha1.SeiNodeDeployment, ordinal int) error { + name := publishableServiceName(group, ordinal) + svc := &corev1.Service{} + err := r.Get(ctx, types.NamespacedName{Name: name, Namespace: group.Namespace}, svc) + if apierrors.IsNotFound(err) { + return nil + } + if err != nil { + return fmt.Errorf("fetching publishable Service %s for delete: %w", name, err) + } + if !metav1.IsControlledBy(svc, group) { + return nil + } + if err := r.Delete(ctx, svc); err != nil && !apierrors.IsNotFound(err) { + return fmt.Errorf("deleting publishable Service %s: %w", name, err) + } + return nil +} + +// generatePublishableService is a pure factory — given an SND, ordinal, +// and the deterministic vanity hostname, returns the desired Service +// spec. No external state, no API reads. The annotation set drives AWS +// LBC and external-dns; the selector reuses the SeiNode-controller's +// `sei.io/node=` label, which is stamped onto the pod by the +// StatefulSet template. +func generatePublishableService(group *seiv1alpha1.SeiNodeDeployment, ordinal int, hostname string) *corev1.Service { + name := publishableServiceName(group, ordinal) + childName := seiNodeName(group, ordinal) + + labels := map[string]string{ + groupLabel: group.Name, + groupOrdinalLabel: strconv.Itoa(ordinal), + publishableComponentLabel: publishableComponentValue, + } + + annotations := map[string]string{ + externalDNSHostnameAnnotation: hostname, + awsLBTypeAnnotation: awsLBTypeValue, + awsLBSchemeAnnotation: awsLBSchemeValue, + awsLBTargetTypeAnnotation: awsLBTargetTypeValue, + awsLBCrossZoneAnnotation: awsLBCrossZoneAttributeStr, + managedByAnnotation: controllerName, + } + + return &corev1.Service{ + ObjectMeta: metav1.ObjectMeta{ + Name: name, + Namespace: group.Namespace, + Labels: labels, + Annotations: annotations, + }, + Spec: corev1.ServiceSpec{ + Type: corev1.ServiceTypeLoadBalancer, + ExternalTrafficPolicy: corev1.ServiceExternalTrafficPolicyTypeLocal, + AllocateLoadBalancerNodePorts: new(bool), // explicit false; NLBs use IP targets, not NodePort routing + Selector: map[string]string{ + noderesource.NodeLabel: childName, + }, + Ports: []corev1.ServicePort{{ + Name: "p2p", + Port: seiconfig.PortP2P, + TargetPort: intstr.FromInt32(seiconfig.PortP2P), + Protocol: corev1.ProtocolTCP, + }}, + }, + } +} diff --git a/internal/controller/nodedeployment/publishable_test.go b/internal/controller/nodedeployment/publishable_test.go new file mode 100644 index 00000000..e65b4c2a --- /dev/null +++ b/internal/controller/nodedeployment/publishable_test.go @@ -0,0 +1,255 @@ +package nodedeployment + +import ( + "testing" + + . "github.com/onsi/gomega" + seiconfig "github.com/sei-protocol/sei-config" + corev1 "k8s.io/api/core/v1" + metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + + seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" + "github.com/sei-protocol/sei-k8s-controller/internal/noderesource" +) + +func TestPublishableHostname_Table(t *testing.T) { + t.Parallel() + cases := []struct { + name string + sndName string + namespace string + templateChainID string + genesisChainID string + ordinal int + gatewayPubDomain string + want string + }{ + { + name: "atlantic-2 ordinal 0", + sndName: "atlantic-2", + namespace: "sei-test-1", + templateChainID: "atlantic-2", + ordinal: 0, + gatewayPubDomain: "prod.platform.sei.io", + want: "atlantic-2-0-p2p.atlantic-2.prod.platform.sei.io", + }, + { + name: "ordinal 5 of multi-replica fleet", + sndName: "rpc", + namespace: "pacific-1", + templateChainID: "pacific-1", + ordinal: 5, + gatewayPubDomain: "prod.platform.sei.io", + want: "rpc-5-p2p.pacific-1.prod.platform.sei.io", + }, + { + name: "genesis-only chainID (validator SND)", + sndName: "newchain-validators", + namespace: "ceremony", + templateChainID: "", + genesisChainID: "newchain-1", + ordinal: 1, + gatewayPubDomain: "test.platform.sei.io", + want: "newchain-validators-1-p2p.newchain-1.test.platform.sei.io", + }, + { + name: "empty gateway public domain returns empty", + sndName: "atlantic-2", + namespace: "sei-test-1", + templateChainID: "atlantic-2", + ordinal: 0, + gatewayPubDomain: "", + want: "", + }, + { + name: "empty chain ID returns empty", + sndName: "atlantic-2", + namespace: "sei-test-1", + templateChainID: "", + ordinal: 0, + gatewayPubDomain: "prod.platform.sei.io", + want: "", + }, + { + name: "chainID with uppercase fails the DNS-1123 belt-and-suspenders", + sndName: "atlantic-2", + namespace: "sei-test-1", + templateChainID: "Atlantic-2", + ordinal: 0, + gatewayPubDomain: "prod.platform.sei.io", + want: "", + }, + { + name: "chainID with dot fails the DNS-1123 belt-and-suspenders", + sndName: "atlantic-2", + namespace: "sei-test-1", + templateChainID: "atlantic.2", + ordinal: 0, + gatewayPubDomain: "prod.platform.sei.io", + want: "", + }, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + g := NewWithT(t) + snd := &seiv1alpha1.SeiNodeDeployment{ + ObjectMeta: metav1.ObjectMeta{Name: tc.sndName, Namespace: tc.namespace}, + Spec: seiv1alpha1.SeiNodeDeploymentSpec{ + Template: seiv1alpha1.SeiNodeTemplate{ + Spec: seiv1alpha1.SeiNodeSpec{ChainID: tc.templateChainID}, + }, + }, + } + if tc.genesisChainID != "" { + snd.Spec.Genesis = &seiv1alpha1.GenesisCeremonyConfig{ChainID: tc.genesisChainID} + } + r := &SeiNodeDeploymentReconciler{GatewayPublicDomain: tc.gatewayPubDomain} + g.Expect(r.publishableHostname(snd, tc.ordinal)).To(Equal(tc.want)) + }) + } +} + +func TestPublishableExternalAddress_AppendsP2PPort(t *testing.T) { + g := NewWithT(t) + snd := &seiv1alpha1.SeiNodeDeployment{ + ObjectMeta: metav1.ObjectMeta{Name: "atlantic-2", Namespace: "sei-test-1"}, + Spec: seiv1alpha1.SeiNodeDeploymentSpec{ + Template: seiv1alpha1.SeiNodeTemplate{ + Spec: seiv1alpha1.SeiNodeSpec{ChainID: "atlantic-2"}, + }, + }, + } + r := &SeiNodeDeploymentReconciler{GatewayPublicDomain: "prod.platform.sei.io"} + got := r.publishableExternalAddress(snd, 0) + g.Expect(got).To(Equal("atlantic-2-0-p2p.atlantic-2.prod.platform.sei.io:26656")) +} + +func TestPublishableExternalAddress_EmptyWhenHostnameRejected(t *testing.T) { + g := NewWithT(t) + snd := &seiv1alpha1.SeiNodeDeployment{ + ObjectMeta: metav1.ObjectMeta{Name: "atlantic-2", Namespace: "sei-test-1"}, + } + r := &SeiNodeDeploymentReconciler{GatewayPublicDomain: "prod.platform.sei.io"} + g.Expect(r.publishableExternalAddress(snd, 0)).To(BeEmpty()) +} + +func TestPublishableServiceName(t *testing.T) { + g := NewWithT(t) + snd := &seiv1alpha1.SeiNodeDeployment{ + ObjectMeta: metav1.ObjectMeta{Name: "atlantic-2", Namespace: "sei-test-1"}, + } + g.Expect(publishableServiceName(snd, 0)).To(Equal("atlantic-2-0-p2p")) + g.Expect(publishableServiceName(snd, 7)).To(Equal("atlantic-2-7-p2p")) +} + +func TestGeneratePublishableService_Shape(t *testing.T) { + g := NewWithT(t) + snd := &seiv1alpha1.SeiNodeDeployment{ + ObjectMeta: metav1.ObjectMeta{Name: "atlantic-2", Namespace: "sei-test-1"}, + } + host := "atlantic-2-0-p2p.atlantic-2.prod.platform.sei.io" + + svc := generatePublishableService(snd, 0, host) + + g.Expect(svc.Name).To(Equal("atlantic-2-0-p2p")) + g.Expect(svc.Namespace).To(Equal("sei-test-1")) + + // Labels: SND group label, ordinal label, and the new component label + // so cleanup queries can target publishable Services without + // entangling other group-owned objects. + g.Expect(svc.Labels).To(HaveKeyWithValue(groupLabel, "atlantic-2")) + g.Expect(svc.Labels).To(HaveKeyWithValue(groupOrdinalLabel, "0")) + g.Expect(svc.Labels).To(HaveKeyWithValue(publishableComponentLabel, publishableComponentValue)) + + // Annotations: external-dns hostname pins the vanity → NLB CNAME; + // AWS LBC annotations select the NLB shape; managed-by is operator + // guidance, matching the HTTPRoute precedent. + g.Expect(svc.Annotations).To(HaveKeyWithValue(externalDNSHostnameAnnotation, host)) + g.Expect(svc.Annotations).To(HaveKeyWithValue(awsLBTypeAnnotation, awsLBTypeValue)) + g.Expect(svc.Annotations).To(HaveKeyWithValue(awsLBSchemeAnnotation, awsLBSchemeValue)) + g.Expect(svc.Annotations).To(HaveKeyWithValue(awsLBTargetTypeAnnotation, awsLBTargetTypeValue)) + g.Expect(svc.Annotations).To(HaveKeyWithValue(awsLBCrossZoneAnnotation, awsLBCrossZoneAttributeStr)) + g.Expect(svc.Annotations).To(HaveKeyWithValue(managedByAnnotation, controllerName)) + + g.Expect(svc.Spec.Type).To(Equal(corev1.ServiceTypeLoadBalancer)) + g.Expect(svc.Spec.ExternalTrafficPolicy).To(Equal(corev1.ServiceExternalTrafficPolicyTypeLocal)) + g.Expect(svc.Spec.AllocateLoadBalancerNodePorts).NotTo(BeNil()) + g.Expect(*svc.Spec.AllocateLoadBalancerNodePorts).To(BeFalse()) + + // Selector matches the child SeiNode's pod via the `sei.io/node` + // label the SeiNode controller stamps on its StatefulSet template. + // Using that label (rather than the SND group label) ensures the + // NLB has exactly one healthy target — the per-replica pod — + // instead of round-robining across siblings. + g.Expect(svc.Spec.Selector).To(Equal(map[string]string{ + noderesource.NodeLabel: "atlantic-2-0", + })) + + g.Expect(svc.Spec.Ports).To(HaveLen(1)) + g.Expect(svc.Spec.Ports[0].Port).To(Equal(seiconfig.PortP2P)) + g.Expect(svc.Spec.Ports[0].Name).To(Equal("p2p")) + g.Expect(svc.Spec.Ports[0].Protocol).To(Equal(corev1.ProtocolTCP)) +} + +func TestEffectiveChainID_GenesisFallback(t *testing.T) { + g := NewWithT(t) + snd := &seiv1alpha1.SeiNodeDeployment{ + Spec: seiv1alpha1.SeiNodeDeploymentSpec{ + Template: seiv1alpha1.SeiNodeTemplate{ + Spec: seiv1alpha1.SeiNodeSpec{ChainID: ""}, + }, + Genesis: &seiv1alpha1.GenesisCeremonyConfig{ChainID: "newchain-1"}, + }, + } + g.Expect(effectiveChainID(snd)).To(Equal("newchain-1")) +} + +func TestEffectiveChainID_TemplateWins(t *testing.T) { + g := NewWithT(t) + snd := &seiv1alpha1.SeiNodeDeployment{ + Spec: seiv1alpha1.SeiNodeDeploymentSpec{ + Template: seiv1alpha1.SeiNodeTemplate{ + Spec: seiv1alpha1.SeiNodeSpec{ChainID: "pacific-1"}, + }, + Genesis: &seiv1alpha1.GenesisCeremonyConfig{ChainID: "should-be-ignored"}, + }, + } + g.Expect(effectiveChainID(snd)).To(Equal("pacific-1")) +} + +func TestExternalAddressEqual_NormalizesNilAndEmpty(t *testing.T) { + g := NewWithT(t) + empty := "" + val := "host:26656" + other := "other:26656" + g.Expect(externalAddressEqual(nil, nil)).To(BeTrue()) + g.Expect(externalAddressEqual(nil, &empty)).To(BeTrue()) + g.Expect(externalAddressEqual(&empty, nil)).To(BeTrue()) + g.Expect(externalAddressEqual(&val, &val)).To(BeTrue()) + g.Expect(externalAddressEqual(&val, &other)).To(BeFalse()) + g.Expect(externalAddressEqual(nil, &val)).To(BeFalse()) + g.Expect(externalAddressEqual(&val, nil)).To(BeFalse()) +} + +func TestNetworkingConfig_TCPEnabled(t *testing.T) { + t.Parallel() + tests := []struct { + name string + cfg *seiv1alpha1.NetworkingConfig + want bool + }{ + {name: "nil receiver", cfg: nil, want: false}, + {name: "legacy empty struct does not enable TCP", cfg: &seiv1alpha1.NetworkingConfig{}, want: false}, + {name: "HTTP-only does not enable TCP", cfg: &seiv1alpha1.NetworkingConfig{HTTP: &seiv1alpha1.HTTPConfig{}}, want: false}, + {name: "explicit TCP enables TCP", cfg: &seiv1alpha1.NetworkingConfig{TCP: &seiv1alpha1.TCPConfig{}}, want: true}, + {name: "HTTP and TCP both set", cfg: &seiv1alpha1.NetworkingConfig{HTTP: &seiv1alpha1.HTTPConfig{}, TCP: &seiv1alpha1.TCPConfig{}}, want: true}, + } + for _, tc := range tests { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + g := NewWithT(t) + g.Expect(tc.cfg.TCPEnabled()).To(Equal(tc.want)) + }) + } +} diff --git a/internal/controller/nodedeployment/status.go b/internal/controller/nodedeployment/status.go index 1eef56bc..3d422555 100644 --- a/internal/controller/nodedeployment/status.go +++ b/internal/controller/nodedeployment/status.go @@ -103,20 +103,41 @@ func (r *SeiNodeDeploymentReconciler) buildNetworkingStatus(group *seiv1alpha1.S if group.Spec.Networking == nil { return nil } - routes := resolveEffectiveRoutes(group, r.GatewayDomain, r.GatewayPublicDomain) - if len(routes) == 0 { - return nil + + out := &seiv1alpha1.NetworkingStatus{} + + if group.Spec.Networking.HTTPEnabled() { + routes := resolveEffectiveRoutes(group, r.GatewayDomain, r.GatewayPublicDomain) + for _, er := range routes { + for _, h := range er.Hostnames { + out.Routes = append(out.Routes, seiv1alpha1.RouteStatus{ + Hostname: h, + Protocol: er.Protocol, + }) + } + } } - var rs []seiv1alpha1.RouteStatus - for _, er := range routes { - for _, h := range er.Hostnames { - rs = append(rs, seiv1alpha1.RouteStatus{ - Hostname: h, - Protocol: er.Protocol, + + if group.Spec.Networking.TCPEnabled() && r.PublishabilityAvailable { + for i := range int(group.Spec.Replicas) { + host := r.publishableExternalAddress(group, i) + if host == "" { + // publishableHostname rejected this ordinal (invalid + // chainID); skip the entry rather than stamp a bad one. + continue + } + out.PublishableEndpoints = append(out.PublishableEndpoints, seiv1alpha1.PublishableEndpoint{ + Ordinal: int32(i), + SeiNodeName: seiNodeName(group, i), + Hostname: host, }) } } - return &seiv1alpha1.NetworkingStatus{Routes: rs} + + if len(out.Routes) == 0 && len(out.PublishableEndpoints) == 0 { + return nil + } + return out } func setNodesReadyCondition(group *seiv1alpha1.SeiNodeDeployment, ready, desired int32, nodes []seiv1alpha1.SeiNode) { diff --git a/internal/planner/common_overrides_test.go b/internal/planner/common_overrides_test.go index c3f7d083..4ba2ebf9 100644 --- a/internal/planner/common_overrides_test.go +++ b/internal/planner/common_overrides_test.go @@ -5,6 +5,7 @@ import ( seiconfig "github.com/sei-protocol/sei-config" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" + "k8s.io/utils/ptr" seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" ) @@ -12,8 +13,8 @@ import ( func TestCommonOverrides_WithExternalAddress(t *testing.T) { node := &seiv1alpha1.SeiNode{ ObjectMeta: metav1.ObjectMeta{Name: "test-node"}, - Status: seiv1alpha1.SeiNodeStatus{ - ExternalAddress: "p2p.atlantic-2.seinetwork.io:26656", + Spec: seiv1alpha1.SeiNodeSpec{ + ExternalAddress: ptr.To("p2p.atlantic-2.seinetwork.io:26656"), }, } overrides := commonOverrides(node) @@ -45,16 +46,14 @@ func TestCommonOverrides_UserOverrideTakesPrecedence(t *testing.T) { node := &seiv1alpha1.SeiNode{ ObjectMeta: metav1.ObjectMeta{Name: "test-node"}, Spec: seiv1alpha1.SeiNodeSpec{ - ChainID: "test-1", - Image: "seid:v1", - FullNode: &seiv1alpha1.FullNodeSpec{}, + ChainID: "test-1", + Image: "seid:v1", + FullNode: &seiv1alpha1.FullNodeSpec{}, + ExternalAddress: ptr.To("lb.address:26656"), Overrides: map[string]string{ seiconfig.KeyP2PExternalAddress: "custom.address:26656", }, }, - Status: seiv1alpha1.SeiNodeStatus{ - ExternalAddress: "lb.address:26656", - }, } common := commonOverrides(node) diff --git a/internal/planner/planner.go b/internal/planner/planner.go index c2c33bd0..3b40d284 100644 --- a/internal/planner/planner.go +++ b/internal/planner/planner.go @@ -694,8 +694,8 @@ func commonOverrides(node *seiv1alpha1.SeiNode) map[string]string { out := map[string]string{ "logging.level": "error", } - if node.Status.ExternalAddress != "" { - out[seiconfig.KeyP2PExternalAddress] = node.Status.ExternalAddress + if node.Spec.ExternalAddress != nil && *node.Spec.ExternalAddress != "" { + out[seiconfig.KeyP2PExternalAddress] = *node.Spec.ExternalAddress } return out } diff --git a/manifests/sei.io_seinodedeployments.yaml b/manifests/sei.io_seinodedeployments.yaml index b16679c9..7aab588b 100644 --- a/manifests/sei.io_seinodedeployments.yaml +++ b/manifests/sei.io_seinodedeployments.yaml @@ -106,10 +106,15 @@ spec: type: object type: array chainId: - description: ChainID for the new network. + description: |- + ChainID for the new network. + Constrained to DNS-1123 label characters because child SeiNodes + compose it into publishable hostnames when the SND has + `spec.networking.tcp` set; the address is a one-way door once peers + cache it. maxLength: 64 minLength: 1 - pattern: ^[a-z0-9][a-z0-9-]*[a-z0-9]$ + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ type: string maxCeremonyDuration: description: |- @@ -222,8 +227,14 @@ spec: type: object type: object chainId: - description: ChainID of the chain this node belongs to. + description: |- + ChainID of the chain this node belongs to. + Constrained to DNS-1123 label characters because the controller composes + it into publishable hostnames (e.g. `-p2p..`) when + the parent SND opts into TCP networking; the address is a one-way door + once peers cache it. minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ type: string dataVolume: description: |- @@ -261,6 +272,21 @@ spec: x-kubernetes-validations: - message: import cannot be unset once configured rule: (!has(oldSelf.import) || has(self.import)) + externalAddress: + description: |- + ExternalAddress is the routable P2P address — bare host:port, no + nodeId@ prefix — written into seid's `p2p.external_address`. Two + ownership models share this field: + + - SND-managed: when the parent SeiNodeDeployment has + `Spec.Networking.TCP` set, the SND reconciler injects this + value at child Create time (deterministic vanity hostname) and + re-syncs it via the diff in `ensureSeiNode`. Clearing TCP on + the parent clears this field on every child. + - Standalone: a SeiNode created without an SND parent may set + this directly; the planner emits it verbatim. The controller + never overwrites a standalone SeiNode's value. + type: string fullNode: description: FullNode configures a chain-following full node (absorbs the "rpc" role). @@ -1072,6 +1098,45 @@ spec: description: NetworkingStatus reports the observed state of networking resources. properties: + publishableEndpoints: + description: |- + PublishableEndpoints lists the per-ordinal publishable P2P + hostnames stamped by the SND when `spec.networking.tcp` is set. + Each entry mirrors the value injected into the child SeiNode's + `spec.externalAddress` (hostname:port). Hostnames are deterministic + from the SND identity, so this slice is populated without reading + Service status back. + items: + description: |- + PublishableEndpoint is the observed state of one child's publishable + P2P endpoint. Stamped by the SND networking reconciler. + properties: + hostname: + description: |- + Hostname is the vanity host:port advertised to peers via + `p2p.external_address`. Equals the value injected into the + child SeiNode's `spec.externalAddress`. + type: string + ordinal: + description: |- + Ordinal is the child's replica index within the SND + (matches `sei.io/nodedeployment-ordinal`). + format: int32 + type: integer + seiNodeName: + description: |- + SeiNodeName is the child SeiNode resource name (also the + per-pod headless Service name). + type: string + required: + - hostname + - ordinal + - seiNodeName + type: object + type: array + x-kubernetes-list-map-keys: + - ordinal + x-kubernetes-list-type: map routes: description: Routes lists the HTTPRoute hostnames managed by this deployment. diff --git a/manifests/sei.io_seinodes.yaml b/manifests/sei.io_seinodes.yaml index 81106584..0683bd2a 100644 --- a/manifests/sei.io_seinodes.yaml +++ b/manifests/sei.io_seinodes.yaml @@ -88,8 +88,14 @@ spec: type: object type: object chainId: - description: ChainID of the chain this node belongs to. + description: |- + ChainID of the chain this node belongs to. + Constrained to DNS-1123 label characters because the controller composes + it into publishable hostnames (e.g. `-p2p..`) when + the parent SND opts into TCP networking; the address is a one-way door + once peers cache it. minLength: 1 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?$ type: string dataVolume: description: |- @@ -127,6 +133,21 @@ spec: x-kubernetes-validations: - message: import cannot be unset once configured rule: (!has(oldSelf.import) || has(self.import)) + externalAddress: + description: |- + ExternalAddress is the routable P2P address — bare host:port, no + nodeId@ prefix — written into seid's `p2p.external_address`. Two + ownership models share this field: + + - SND-managed: when the parent SeiNodeDeployment has + `Spec.Networking.TCP` set, the SND reconciler injects this + value at child Create time (deterministic vanity hostname) and + re-syncs it via the diff in `ensureSeiNode`. Clearing TCP on + the parent clears this field on every child. + - Standalone: a SeiNode created without an SND parent may set + this directly; the planner emits it verbatim. The controller + never overwrites a standalone SeiNode's value. + type: string fullNode: description: FullNode configures a chain-following full node (absorbs the "rpc" role). @@ -800,14 +821,6 @@ spec: Parent controllers compare this against spec.image to determine whether a spec change has been fully actuated. type: string - externalAddress: - description: |- - ExternalAddress is the routable P2P address for this node — bare - host:port, no nodeId@ prefix. Populated by the SeiNode controller - from the per-pod LoadBalancer Service when the parent SND has - Spec.Networking.TCP set; consumed by the planner to set - p2p.external_address in CometBFT config. - type: string phase: description: Phase is the high-level lifecycle state. enum: From 2c710915e7099ba9aa4676e90f4ce95e6485a2f9 Mon Sep 17 00:00:00 2001 From: bdchatham Date: Thu, 28 May 2026 15:10:08 -0700 Subject: [PATCH 2/6] fix(publishable-p2p): separate SEI_PUBLISHABLE_DOMAIN from SEI_GATEWAY_PUBLIC_DOMAIN MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Platform review surfaced three blockers, all the same root cause: the mental model assumed the L7 gateway zone (SEI_GATEWAY_PUBLIC_DOMAIN) was reusable for L4 publishable hostnames, but the cluster reality is that harbor's external-dns watches `harbor.platform.sei.io` while its gateway uses `platform.sei.io`, and dev has no gateway public domain configured at all. Reusing the L7 zone would silently write CNAMEs into a zone the cluster's external-dns doesn't watch — peers would never resolve. Resolution: - New `SEI_PUBLISHABLE_DOMAIN` env (optional, no fallback). When unset, TCP-enabled SNDs surface a distinct condition reason. - New `ConditionNetworkingReady=False/PublishableDomainNotConfigured` reason — distinct from `VPCCIDRNotConfigured` so dev's empty-domain-but-cidr-set case doesn't mislabel as `InvalidChainIDForDNS` (chain ID is fine; domain is the issue). - `publishableHostname` switches to `r.PublishableDomain` from `r.GatewayPublicDomain`. L7 and L4 zones now diverge per cluster. Platform PR (separate, follow-up) sets per-cluster: - prod: SEI_PUBLISHABLE_DOMAIN=prod.platform.sei.io - harbor: SEI_PUBLISHABLE_DOMAIN=harbor.platform.sei.io - dev: unset → publishability inert with clear reason Tests: - All publishable_test.go cases retargeted to PublishableDomain field. - New envtest TestPublishableP2P_PublishableDomainNotConfigured_SurfacesCondition pins the distinct condition reason. - Existing envtest suite still green at 67s. Refs sei-protocol/sei-k8s-controller#360 Addresses platform-engineer review blockers on PR #365 Co-Authored-By: Claude Opus 4.7 (1M context) --- cmd/main.go | 14 ++++++ .../controller/nodedeployment/controller.go | 9 ++++ .../envtest/publishable_p2p_test.go | 47 +++++++++++++++++-- .../nodedeployment/envtest/suite_test.go | 1 + .../controller/nodedeployment/networking.go | 9 ++++ .../controller/nodedeployment/publishable.go | 4 +- .../nodedeployment/publishable_test.go | 22 ++++----- 7 files changed, 89 insertions(+), 17 deletions(-) diff --git a/cmd/main.go b/cmd/main.go index 8a5c394f..f46239ec 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -235,6 +235,19 @@ func main() { setupLog.Info("Publishable-P2P capability enabled", "cidr", cidr.String()) } + // SEI_PUBLISHABLE_DOMAIN is the DNS zone for per-pod vanity hostnames. + // Distinct from SEI_GATEWAY_PUBLIC_DOMAIN because L7 and L4 zones may + // diverge per cluster: harbor's external-dns watches + // `harbor.platform.sei.io` while the gateway uses `platform.sei.io`. + // When unset, TCP-enabled SNDs surface + // `ConditionNetworkingReady=False/PublishableDomainNotConfigured`. + publishableDomain := os.Getenv("SEI_PUBLISHABLE_DOMAIN") + if publishableDomain == "" { + setupLog.Info("Publishable-P2P domain not configured: SEI_PUBLISHABLE_DOMAIN not set; TCP-enabled SNDs will surface PublishableDomainNotConfigured") + } else { + setupLog.Info("Publishable-P2P domain configured", "domain", publishableDomain) + } + //nolint:staticcheck // migrating to events.EventRecorder API is a separate effort recorder := mgr.GetEventRecorderFor("seinodedeployment-controller") if err := (&nodedeploymentcontroller.SeiNodeDeploymentReconciler{ @@ -247,6 +260,7 @@ func main() { GatewayPublicDomain: platformCfg.GatewayPublicDomain, PublishabilityAvailable: publishabilityAvailable, PublishableVPCCIDR: publishableVPCCIDR, + PublishableDomain: publishableDomain, PlanExecutor: &planner.Executor[*seiv1alpha1.SeiNodeDeployment]{ ConfigFor: func(ctx context.Context, group *seiv1alpha1.SeiNodeDeployment) task.ExecutionConfig { var assemblerNode *seiv1alpha1.SeiNode diff --git a/internal/controller/nodedeployment/controller.go b/internal/controller/nodedeployment/controller.go index a7d9bc30..226e0a60 100644 --- a/internal/controller/nodedeployment/controller.go +++ b/internal/controller/nodedeployment/controller.go @@ -58,6 +58,15 @@ type SeiNodeDeploymentReconciler struct { // range is outside this CIDR. PublishableVPCCIDR *net.IPNet + // PublishableDomain is the DNS zone for per-pod vanity hostnames, + // parsed from SEI_PUBLISHABLE_DOMAIN at startup. Distinct from + // GatewayPublicDomain because the L7 gateway zone and the L4 + // publishable zone may diverge per cluster (harbor's external-dns + // watches `harbor.platform.sei.io` while the gateway uses + // `platform.sei.io`). When empty, the publishable path is + // fail-closed with reason PublishableDomainNotConfigured. + PublishableDomain string + // PlanExecutor drives group-level task plans (e.g. genesis assembly). PlanExecutor planner.PlanExecutor[*seiv1alpha1.SeiNodeDeployment] } diff --git a/internal/controller/nodedeployment/envtest/publishable_p2p_test.go b/internal/controller/nodedeployment/envtest/publishable_p2p_test.go index 29ff98a5..ee78e830 100644 --- a/internal/controller/nodedeployment/envtest/publishable_p2p_test.go +++ b/internal/controller/nodedeployment/envtest/publishable_p2p_test.go @@ -21,7 +21,7 @@ import ( const ( // publishableTestDomain is the value the publishable tests pin onto - // `testSNDReconciler.GatewayPublicDomain` for the duration of each + // `testSNDReconciler.PublishableDomain` for the duration of each // test. The HTTP-route tests rely on the default empty value; // `withPublishabilityEnabled` restores both fields in t.Cleanup so // later tests see the unmodified suite state. @@ -47,12 +47,12 @@ func expectedPublishableAddr(sndName, chainID string, ordinal int) string { func withPublishabilityEnabled(t *testing.T, available bool) { t.Helper() prevAvail := testSNDReconciler.PublishabilityAvailable - prevDomain := testSNDReconciler.GatewayPublicDomain + prevDomain := testSNDReconciler.PublishableDomain testSNDReconciler.PublishabilityAvailable = available - testSNDReconciler.GatewayPublicDomain = publishableTestDomain + testSNDReconciler.PublishableDomain = publishableTestDomain t.Cleanup(func() { testSNDReconciler.PublishabilityAvailable = prevAvail - testSNDReconciler.GatewayPublicDomain = prevDomain + testSNDReconciler.PublishableDomain = prevDomain }) } @@ -468,3 +468,42 @@ func TestPublishableP2P_VPCCIDRNotConfigured_SurfacesConditionAndSkipsService(t "child ExternalAddress must be empty without capability") } } + +// TestPublishableP2P_PublishableDomainNotConfigured_SurfacesCondition is the +// L4-zone-not-configured branch. With `PublishabilityAvailable=true` (VPC +// CIDR set) but `PublishableDomain=""` (no SEI_PUBLISHABLE_DOMAIN), a +// TCP-enabled SND must surface `PublishableDomainNotConfigured` rather +// than getting silently misclassified as a VPC-CIDR or chain-ID problem. +// Real-world trigger: dev cluster, where SEI_PUBLISHABLE_DOMAIN is unset. +func TestPublishableP2P_PublishableDomainNotConfigured_SurfacesCondition(t *testing.T) { + g := NewWithT(t) + // Cap available, domain absent: pin both fields explicitly. + prevAvail := testSNDReconciler.PublishabilityAvailable + prevDomain := testSNDReconciler.PublishableDomain + testSNDReconciler.PublishabilityAvailable = true + testSNDReconciler.PublishableDomain = "" + t.Cleanup(func() { + testSNDReconciler.PublishabilityAvailable = prevAvail + testSNDReconciler.PublishableDomain = prevDomain + }) + + ns := makeNamespace(t) + snd := fixtures.NewSND(ns, "pub-no-domain", + fixtures.WithReplicas(1), + withTCP(), + ) + g.Expect(testCli.Create(testCtx, snd)).To(Succeed()) + + waitForStatus(t, client.ObjectKeyFromObject(snd), func(latest *seiv1alpha1.SeiNodeDeployment) bool { + c := apimeta.FindStatusCondition(latest.Status.Conditions, seiv1alpha1.ConditionNetworkingReady) + return c != nil && c.Status == metav1.ConditionFalse && c.Reason == "PublishableDomainNotConfigured" + }, "ConditionNetworkingReady=False/PublishableDomainNotConfigured") + + // No publishable Service. + svcKey := types.NamespacedName{Name: snd.Name + "-0-p2p", Namespace: ns} + g.Consistently(func() bool { + err := testCli.Get(testCtx, svcKey, &corev1.Service{}) + return apierrors.IsNotFound(err) + }, 2*time.Second, 200*time.Millisecond).Should(BeTrue(), + "no publishable Service created when domain is unconfigured") +} diff --git a/internal/controller/nodedeployment/envtest/suite_test.go b/internal/controller/nodedeployment/envtest/suite_test.go index 3b6e826d..7f7d0fa8 100644 --- a/internal/controller/nodedeployment/envtest/suite_test.go +++ b/internal/controller/nodedeployment/envtest/suite_test.go @@ -208,6 +208,7 @@ func run(m *testing.M) (int, error) { GatewayDomain: "test.local", GatewayPublicDomain: "", PublishabilityAvailable: true, + PublishableDomain: "", PlanExecutor: &planner.Executor[*seiv1alpha1.SeiNodeDeployment]{ ConfigFor: func(_ context.Context, group *seiv1alpha1.SeiNodeDeployment) task.ExecutionConfig { return task.ExecutionConfig{ diff --git a/internal/controller/nodedeployment/networking.go b/internal/controller/nodedeployment/networking.go index 8e3ee3e4..5e504869 100644 --- a/internal/controller/nodedeployment/networking.go +++ b/internal/controller/nodedeployment/networking.go @@ -111,6 +111,15 @@ func (r *SeiNodeDeploymentReconciler) reconcileNetworking(ctx context.Context, g } return nil } + if r.PublishableDomain == "" { + setCondition(group, seiv1alpha1.ConditionNetworkingReady, metav1.ConditionFalse, + "PublishableDomainNotConfigured", + "spec.networking.tcp requires SEI_PUBLISHABLE_DOMAIN to be set on the controller; publishable Services are not created") + if err := r.deletePublishableServices(ctx, group); err != nil { + return fmt.Errorf("deleting publishable services after capability loss: %w", err) + } + return nil + } if err := r.reconcilePublishableServices(ctx, group); err != nil { return fmt.Errorf("reconciling publishable services: %w", err) } diff --git a/internal/controller/nodedeployment/publishable.go b/internal/controller/nodedeployment/publishable.go index 96c4b8cf..f4beffe7 100644 --- a/internal/controller/nodedeployment/publishable.go +++ b/internal/controller/nodedeployment/publishable.go @@ -100,7 +100,7 @@ func effectiveChainID(group *seiv1alpha1.SeiNodeDeployment) string { // DNS-illegal characters that slipped past admission). Callers must // treat the empty string as "skip" — the apply boundary fails closed. func (r *SeiNodeDeploymentReconciler) publishableHostname(group *seiv1alpha1.SeiNodeDeployment, ordinal int) string { - if r.GatewayPublicDomain == "" { + if r.PublishableDomain == "" { return "" } chainID := effectiveChainID(group) @@ -117,7 +117,7 @@ func (r *SeiNodeDeploymentReconciler) publishableHostname(group *seiv1alpha1.Sei host := fmt.Sprintf("%s-p2p.%s.%s", seiNodeName(group, ordinal), chainID, - r.GatewayPublicDomain, + r.PublishableDomain, ) return host } diff --git a/internal/controller/nodedeployment/publishable_test.go b/internal/controller/nodedeployment/publishable_test.go index e65b4c2a..d6463d30 100644 --- a/internal/controller/nodedeployment/publishable_test.go +++ b/internal/controller/nodedeployment/publishable_test.go @@ -21,7 +21,7 @@ func TestPublishableHostname_Table(t *testing.T) { templateChainID string genesisChainID string ordinal int - gatewayPubDomain string + publishableDomain string want string }{ { @@ -30,7 +30,7 @@ func TestPublishableHostname_Table(t *testing.T) { namespace: "sei-test-1", templateChainID: "atlantic-2", ordinal: 0, - gatewayPubDomain: "prod.platform.sei.io", + publishableDomain: "prod.platform.sei.io", want: "atlantic-2-0-p2p.atlantic-2.prod.platform.sei.io", }, { @@ -39,7 +39,7 @@ func TestPublishableHostname_Table(t *testing.T) { namespace: "pacific-1", templateChainID: "pacific-1", ordinal: 5, - gatewayPubDomain: "prod.platform.sei.io", + publishableDomain: "prod.platform.sei.io", want: "rpc-5-p2p.pacific-1.prod.platform.sei.io", }, { @@ -49,7 +49,7 @@ func TestPublishableHostname_Table(t *testing.T) { templateChainID: "", genesisChainID: "newchain-1", ordinal: 1, - gatewayPubDomain: "test.platform.sei.io", + publishableDomain: "test.platform.sei.io", want: "newchain-validators-1-p2p.newchain-1.test.platform.sei.io", }, { @@ -58,7 +58,7 @@ func TestPublishableHostname_Table(t *testing.T) { namespace: "sei-test-1", templateChainID: "atlantic-2", ordinal: 0, - gatewayPubDomain: "", + publishableDomain: "", want: "", }, { @@ -67,7 +67,7 @@ func TestPublishableHostname_Table(t *testing.T) { namespace: "sei-test-1", templateChainID: "", ordinal: 0, - gatewayPubDomain: "prod.platform.sei.io", + publishableDomain: "prod.platform.sei.io", want: "", }, { @@ -76,7 +76,7 @@ func TestPublishableHostname_Table(t *testing.T) { namespace: "sei-test-1", templateChainID: "Atlantic-2", ordinal: 0, - gatewayPubDomain: "prod.platform.sei.io", + publishableDomain: "prod.platform.sei.io", want: "", }, { @@ -85,7 +85,7 @@ func TestPublishableHostname_Table(t *testing.T) { namespace: "sei-test-1", templateChainID: "atlantic.2", ordinal: 0, - gatewayPubDomain: "prod.platform.sei.io", + publishableDomain: "prod.platform.sei.io", want: "", }, } @@ -104,7 +104,7 @@ func TestPublishableHostname_Table(t *testing.T) { if tc.genesisChainID != "" { snd.Spec.Genesis = &seiv1alpha1.GenesisCeremonyConfig{ChainID: tc.genesisChainID} } - r := &SeiNodeDeploymentReconciler{GatewayPublicDomain: tc.gatewayPubDomain} + r := &SeiNodeDeploymentReconciler{PublishableDomain:tc.publishableDomain} g.Expect(r.publishableHostname(snd, tc.ordinal)).To(Equal(tc.want)) }) } @@ -120,7 +120,7 @@ func TestPublishableExternalAddress_AppendsP2PPort(t *testing.T) { }, }, } - r := &SeiNodeDeploymentReconciler{GatewayPublicDomain: "prod.platform.sei.io"} + r := &SeiNodeDeploymentReconciler{PublishableDomain:"prod.platform.sei.io"} got := r.publishableExternalAddress(snd, 0) g.Expect(got).To(Equal("atlantic-2-0-p2p.atlantic-2.prod.platform.sei.io:26656")) } @@ -130,7 +130,7 @@ func TestPublishableExternalAddress_EmptyWhenHostnameRejected(t *testing.T) { snd := &seiv1alpha1.SeiNodeDeployment{ ObjectMeta: metav1.ObjectMeta{Name: "atlantic-2", Namespace: "sei-test-1"}, } - r := &SeiNodeDeploymentReconciler{GatewayPublicDomain: "prod.platform.sei.io"} + r := &SeiNodeDeploymentReconciler{PublishableDomain:"prod.platform.sei.io"} g.Expect(r.publishableExternalAddress(snd, 0)).To(BeEmpty()) } From 53423de1b94f200729dcf2c64bea481550afe192 Mon Sep 17 00:00:00 2001 From: bdchatham Date: Thu, 28 May 2026 16:44:44 -0700 Subject: [PATCH 3/6] refactor(publishable-p2p): drop VPC CIDR machinery, flatten ExternalAddress, sweep comments Per PR #365 review feedback. Three less-ambiguous wins applied; subjective direction (publishable.go vs external_address.go complexity trade) parked for follow-up. Dropped: - SEI_VPC_CIDR env parsing in main.go and the PublishabilityAvailable / PublishableVPCCIDR fields on the SND reconciler. - VPCCIDRNotConfigured + PodOutsideVPCCIDR condition reasons. - PublishableDomainNotConfigured condition reason (caller folded into one combined `TCPEnabled() && PublishableDomain != ""` gate). - Runtime DNS-1123 belt-and-suspenders in publishableHostname (the CRD Pattern on ChainID is the gate). - externalAddressEqual helper (was solving nil-vs-*"" for a field that shouldn't have been a pointer). Changed: - Spec.ExternalAddress flipped from *string to plain string. Planner read becomes plain `!= ""`; ensureSeiNode diff becomes plain `==`. Template guard becomes `spec.ExternalAddress = ""`. - orphanNetworkingResources now also drops the SND owner ref on publishable Services so DeletionPolicyRetain keeps them around. New orphanPublishableServices mirrors the existing HTTP-side path. Aggressive comment sweep across changed files. ~445 net line deletion. Tests pass: unit + envtest (65s). Co-Authored-By: Claude Opus 4.7 (1M context) --- api/v1alpha1/seinode_types.go | 18 +- api/v1alpha1/zz_generated.deepcopy.go | 5 - cmd/main.go | 35 --- config/crd/sei.io_seinodedeployments.yaml | 16 +- config/crd/sei.io_seinodes.yaml | 16 +- .../controller/nodedeployment/controller.go | 25 +- .../envtest/publishable_p2p_test.go | 199 ++++------------ .../nodedeployment/envtest/suite_test.go | 1 - .../controller/nodedeployment/networking.go | 27 +-- internal/controller/nodedeployment/nodes.go | 60 +---- .../controller/nodedeployment/publishable.go | 142 ++++------- .../nodedeployment/publishable_test.go | 220 +++++------------- internal/controller/nodedeployment/status.go | 4 +- internal/planner/common_overrides_test.go | 5 +- internal/planner/planner.go | 4 +- manifests/sei.io_seinodedeployments.yaml | 16 +- manifests/sei.io_seinodes.yaml | 16 +- 17 files changed, 182 insertions(+), 627 deletions(-) diff --git a/api/v1alpha1/seinode_types.go b/api/v1alpha1/seinode_types.go index 8364cadf..9fe598dc 100644 --- a/api/v1alpha1/seinode_types.go +++ b/api/v1alpha1/seinode_types.go @@ -73,20 +73,12 @@ type SeiNodeSpec struct { // +optional Validator *ValidatorSpec `json:"validator,omitempty"` - // ExternalAddress is the routable P2P address — bare host:port, no - // nodeId@ prefix — written into seid's `p2p.external_address`. Two - // ownership models share this field: - // - // - SND-managed: when the parent SeiNodeDeployment has - // `Spec.Networking.TCP` set, the SND reconciler injects this - // value at child Create time (deterministic vanity hostname) and - // re-syncs it via the diff in `ensureSeiNode`. Clearing TCP on - // the parent clears this field on every child. - // - Standalone: a SeiNode created without an SND parent may set - // this directly; the planner emits it verbatim. The controller - // never overwrites a standalone SeiNode's value. + // ExternalAddress is the routable P2P host:port written into seid's + // `p2p.external_address`. SND-managed nodes get this stamped by the + // SND reconciler when TCP networking is enabled. Standalone SeiNodes + // can set it directly. // +optional - ExternalAddress *string `json:"externalAddress,omitempty"` + ExternalAddress string `json:"externalAddress,omitempty"` // Paused freezes reconciliation. While true, the controller does not // advance the lifecycle, start plans, or mutate derived resources diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 6f46b691..0a9bf9ca 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -1047,11 +1047,6 @@ func (in *SeiNodeSpec) DeepCopyInto(out *SeiNodeSpec) { *out = new(ValidatorSpec) (*in).DeepCopyInto(*out) } - if in.ExternalAddress != nil { - in, out := &in.ExternalAddress, &out.ExternalAddress - *out = new(string) - **out = **in - } } // DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new SeiNodeSpec. diff --git a/cmd/main.go b/cmd/main.go index f46239ec..fd6ca324 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -5,7 +5,6 @@ import ( "crypto/tls" "flag" "fmt" - "net" "net/http" "os" "sort" @@ -214,39 +213,7 @@ func main() { os.Exit(1) } - // Publishable-P2P capability: SEI_VPC_CIDR is a deployment-time - // constant (a VPC CIDR rarely changes within a cluster's life). - // Parse once at startup. When unset or unparseable, the SND - // reconciler still runs — TCP-enabled SNDs surface - // `ConditionNetworkingReady=False/VPCCIDRNotConfigured` on each - // reconcile and no LB Services are created. This is fail-closed at - // the apply boundary, not at admission; an admission webhook is - // deferred until we accumulate more SND-side validation needs. - vpcCIDRRaw := os.Getenv("SEI_VPC_CIDR") - publishabilityAvailable := false - var publishableVPCCIDR *net.IPNet - if vpcCIDRRaw == "" { - setupLog.Info("Publishable-P2P capability disabled: SEI_VPC_CIDR not set") - } else if _, cidr, err := net.ParseCIDR(vpcCIDRRaw); err != nil { - setupLog.Error(err, "Publishable-P2P capability disabled: SEI_VPC_CIDR is unparseable", "value", vpcCIDRRaw) - } else { - publishabilityAvailable = true - publishableVPCCIDR = cidr - setupLog.Info("Publishable-P2P capability enabled", "cidr", cidr.String()) - } - - // SEI_PUBLISHABLE_DOMAIN is the DNS zone for per-pod vanity hostnames. - // Distinct from SEI_GATEWAY_PUBLIC_DOMAIN because L7 and L4 zones may - // diverge per cluster: harbor's external-dns watches - // `harbor.platform.sei.io` while the gateway uses `platform.sei.io`. - // When unset, TCP-enabled SNDs surface - // `ConditionNetworkingReady=False/PublishableDomainNotConfigured`. publishableDomain := os.Getenv("SEI_PUBLISHABLE_DOMAIN") - if publishableDomain == "" { - setupLog.Info("Publishable-P2P domain not configured: SEI_PUBLISHABLE_DOMAIN not set; TCP-enabled SNDs will surface PublishableDomainNotConfigured") - } else { - setupLog.Info("Publishable-P2P domain configured", "domain", publishableDomain) - } //nolint:staticcheck // migrating to events.EventRecorder API is a separate effort recorder := mgr.GetEventRecorderFor("seinodedeployment-controller") @@ -258,8 +225,6 @@ func main() { GatewayNamespace: platformCfg.GatewayNamespace, GatewayDomain: platformCfg.GatewayDomain, GatewayPublicDomain: platformCfg.GatewayPublicDomain, - PublishabilityAvailable: publishabilityAvailable, - PublishableVPCCIDR: publishableVPCCIDR, PublishableDomain: publishableDomain, PlanExecutor: &planner.Executor[*seiv1alpha1.SeiNodeDeployment]{ ConfigFor: func(ctx context.Context, group *seiv1alpha1.SeiNodeDeployment) task.ExecutionConfig { diff --git a/config/crd/sei.io_seinodedeployments.yaml b/config/crd/sei.io_seinodedeployments.yaml index 7aab588b..75167670 100644 --- a/config/crd/sei.io_seinodedeployments.yaml +++ b/config/crd/sei.io_seinodedeployments.yaml @@ -274,18 +274,10 @@ spec: rule: (!has(oldSelf.import) || has(self.import)) externalAddress: description: |- - ExternalAddress is the routable P2P address — bare host:port, no - nodeId@ prefix — written into seid's `p2p.external_address`. Two - ownership models share this field: - - - SND-managed: when the parent SeiNodeDeployment has - `Spec.Networking.TCP` set, the SND reconciler injects this - value at child Create time (deterministic vanity hostname) and - re-syncs it via the diff in `ensureSeiNode`. Clearing TCP on - the parent clears this field on every child. - - Standalone: a SeiNode created without an SND parent may set - this directly; the planner emits it verbatim. The controller - never overwrites a standalone SeiNode's value. + ExternalAddress is the routable P2P host:port written into seid's + `p2p.external_address`. SND-managed nodes get this stamped by the + SND reconciler when TCP networking is enabled. Standalone SeiNodes + can set it directly. type: string fullNode: description: FullNode configures a chain-following full node diff --git a/config/crd/sei.io_seinodes.yaml b/config/crd/sei.io_seinodes.yaml index 0683bd2a..268716a0 100644 --- a/config/crd/sei.io_seinodes.yaml +++ b/config/crd/sei.io_seinodes.yaml @@ -135,18 +135,10 @@ spec: rule: (!has(oldSelf.import) || has(self.import)) externalAddress: description: |- - ExternalAddress is the routable P2P address — bare host:port, no - nodeId@ prefix — written into seid's `p2p.external_address`. Two - ownership models share this field: - - - SND-managed: when the parent SeiNodeDeployment has - `Spec.Networking.TCP` set, the SND reconciler injects this - value at child Create time (deterministic vanity hostname) and - re-syncs it via the diff in `ensureSeiNode`. Clearing TCP on - the parent clears this field on every child. - - Standalone: a SeiNode created without an SND parent may set - this directly; the planner emits it verbatim. The controller - never overwrites a standalone SeiNode's value. + ExternalAddress is the routable P2P host:port written into seid's + `p2p.external_address`. SND-managed nodes get this stamped by the + SND reconciler when TCP networking is enabled. Standalone SeiNodes + can set it directly. type: string fullNode: description: FullNode configures a chain-following full node (absorbs diff --git a/internal/controller/nodedeployment/controller.go b/internal/controller/nodedeployment/controller.go index 226e0a60..ea543eb1 100644 --- a/internal/controller/nodedeployment/controller.go +++ b/internal/controller/nodedeployment/controller.go @@ -3,7 +3,6 @@ package nodedeployment import ( "context" "fmt" - "net" "time" corev1 "k8s.io/api/core/v1" @@ -43,28 +42,8 @@ type SeiNodeDeploymentReconciler struct { GatewayDomain string GatewayPublicDomain string - // PublishabilityAvailable is the cluster-level capability flag for the - // publishable-P2P path. Set to true at startup only when SEI_VPC_CIDR - // parses cleanly; gates Service apply in the TCP networking branch. - // When false, an SND with `Spec.Networking.TCP` set surfaces - // `ConditionNetworkingReady=False/VPCCIDRNotConfigured` and no LB - // Services are created. - PublishabilityAvailable bool - - // PublishableVPCCIDR is the cluster's VPC CIDR, parsed from - // SEI_VPC_CIDR at startup. Held for future hooks; the reconcile path - // does not check Pod IPs against it — AWS LBC surfaces target - // registration failures via Service events if the cluster's Pod IP - // range is outside this CIDR. - PublishableVPCCIDR *net.IPNet - - // PublishableDomain is the DNS zone for per-pod vanity hostnames, - // parsed from SEI_PUBLISHABLE_DOMAIN at startup. Distinct from - // GatewayPublicDomain because the L7 gateway zone and the L4 - // publishable zone may diverge per cluster (harbor's external-dns - // watches `harbor.platform.sei.io` while the gateway uses - // `platform.sei.io`). When empty, the publishable path is - // fail-closed with reason PublishableDomainNotConfigured. + // PublishableDomain is the DNS zone for per-pod publishable hostnames, + // from SEI_PUBLISHABLE_DOMAIN. Empty disables the publishable path. PublishableDomain string // PlanExecutor drives group-level task plans (e.g. genesis assembly). diff --git a/internal/controller/nodedeployment/envtest/publishable_p2p_test.go b/internal/controller/nodedeployment/envtest/publishable_p2p_test.go index ee78e830..76d6163d 100644 --- a/internal/controller/nodedeployment/envtest/publishable_p2p_test.go +++ b/internal/controller/nodedeployment/envtest/publishable_p2p_test.go @@ -10,7 +10,6 @@ import ( . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" - apimeta "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" @@ -19,19 +18,8 @@ import ( "github.com/sei-protocol/sei-k8s-controller/internal/controller/nodedeployment/envtest/fixtures" ) -const ( - // publishableTestDomain is the value the publishable tests pin onto - // `testSNDReconciler.PublishableDomain` for the duration of each - // test. The HTTP-route tests rely on the default empty value; - // `withPublishabilityEnabled` restores both fields in t.Cleanup so - // later tests see the unmodified suite state. - publishableTestDomain = "test.platform.sei.io" -) +const publishableTestDomain = "test.platform.sei.io" -// expectedPublishableHost composes the deterministic vanity host the -// SND should stamp for a given SND name + child ordinal. Mirrors the -// production template (`-p2p..`) -// so the tests fail loudly if either side drifts. func expectedPublishableHost(sndName, chainID string, ordinal int) string { return fmt.Sprintf("%s-%d-p2p.%s.%s", sndName, ordinal, chainID, publishableTestDomain) } @@ -40,20 +28,13 @@ func expectedPublishableAddr(sndName, chainID string, ordinal int) string { return expectedPublishableHost(sndName, chainID, ordinal) + ":26656" } -// withPublishabilityEnabled stamps the publishable test fixtures onto -// the shared SND reconciler and restores the original values when the -// test finishes. Tests that need a different capability state (e.g. -// VPCCIDRNotConfigured) call this with available=false. -func withPublishabilityEnabled(t *testing.T, available bool) { +// withPublishableDomain sets the test domain on the shared reconciler and +// restores it after the test. +func withPublishableDomain(t *testing.T, domain string) { t.Helper() - prevAvail := testSNDReconciler.PublishabilityAvailable - prevDomain := testSNDReconciler.PublishableDomain - testSNDReconciler.PublishabilityAvailable = available - testSNDReconciler.PublishableDomain = publishableTestDomain - t.Cleanup(func() { - testSNDReconciler.PublishabilityAvailable = prevAvail - testSNDReconciler.PublishableDomain = prevDomain - }) + prev := testSNDReconciler.PublishableDomain + testSNDReconciler.PublishableDomain = domain + t.Cleanup(func() { testSNDReconciler.PublishableDomain = prev }) } // withTCP enables `spec.networking.tcp` on a fixtures-built SND. Goes @@ -68,14 +49,11 @@ func withTCP() fixtures.Option { } } -// TestPublishableP2P_CreateWithTCP_ChildHasAddressAndServiceExists is -// the happy path. An SND with `networking.tcp` set should produce: a -// child SeiNode whose `Spec.ExternalAddress` matches the deterministic -// vanity host, a per-pod LB Service with the right annotations, and an -// entry in `Status.NetworkingStatus.PublishableEndpoints`. +// Happy path: TCP-enabled SND produces a child with Spec.ExternalAddress, +// a per-pod LB Service, and a PublishableEndpoints entry. func TestPublishableP2P_CreateWithTCP_ChildHasAddressAndServiceExists(t *testing.T) { g := NewWithT(t) - withPublishabilityEnabled(t, true) + withPublishableDomain(t, publishableTestDomain) ns := makeNamespace(t) snd := fixtures.NewSND(ns, "pub-create", @@ -97,7 +75,7 @@ func TestPublishableP2P_CreateWithTCP_ChildHasAddressAndServiceExists(t *testing if err := testCli.Get(testCtx, childKey, child); err != nil { return false } - return child.Spec.ExternalAddress != nil && *child.Spec.ExternalAddress == wantAddr + return child.Spec.ExternalAddress == wantAddr }, "child SeiNode "+childKey.Name+" populated with publishable ExternalAddress") // 2. Per-pod LB Service exists with the expected annotations. @@ -142,14 +120,10 @@ func TestPublishableP2P_CreateWithTCP_ChildHasAddressAndServiceExists(t *testing }, "PublishableEndpoints stamped with deterministic vanity host") } -// TestPublishableP2P_OptOut_ClearsAddressAndDeletesService asserts the -// opt-out lifecycle: removing TCP from `spec.networking` clears the -// child's `Spec.ExternalAddress` and deletes the per-pod LB Service. -// ensureSeiNode is the single write site for the child field, so this -// validates the diff propagation path end-to-end. +// Opt-out: removing TCP clears Spec.ExternalAddress and deletes the Service. func TestPublishableP2P_OptOut_ClearsAddressAndDeletesService(t *testing.T) { g := NewWithT(t) - withPublishabilityEnabled(t, true) + withPublishableDomain(t, publishableTestDomain) ns := makeNamespace(t) snd := fixtures.NewSND(ns, "pub-optout", @@ -169,7 +143,7 @@ func TestPublishableP2P_OptOut_ClearsAddressAndDeletesService(t *testing.T) { if err := testCli.Get(testCtx, childKey, child); err != nil { return false } - return child.Spec.ExternalAddress != nil && *child.Spec.ExternalAddress == wantAddr + return child.Spec.ExternalAddress == wantAddr }, "child has publishable address before opt-out") waitFor(t, func() bool { return testCli.Get(testCtx, svcKey, &corev1.Service{}) == nil @@ -191,7 +165,7 @@ func TestPublishableP2P_OptOut_ClearsAddressAndDeletesService(t *testing.T) { if err := testCli.Get(testCtx, childKey, child); err != nil { return false } - return child.Spec.ExternalAddress == nil || *child.Spec.ExternalAddress == "" + return child.Spec.ExternalAddress == "" }, "child ExternalAddress cleared after opt-out") // Per-pod LB Service is deleted. @@ -207,13 +181,10 @@ func TestPublishableP2P_OptOut_ClearsAddressAndDeletesService(t *testing.T) { }, "PublishableEndpoints cleared after opt-out") } -// TestPublishableP2P_ReOptIn_RestoresAddressAndService is the dual of -// the opt-out test: re-adding TCP after a clear should bring the child -// back to the same vanity hostname (deterministic) and recreate the -// per-pod LB Service. +// Re-opt-in: TCP restored brings the address back and recreates the Service. func TestPublishableP2P_ReOptIn_RestoresAddressAndService(t *testing.T) { g := NewWithT(t) - withPublishabilityEnabled(t, true) + withPublishableDomain(t, publishableTestDomain) ns := makeNamespace(t) snd := fixtures.NewSND(ns, "pub-reoptin", @@ -232,7 +203,7 @@ func TestPublishableP2P_ReOptIn_RestoresAddressAndService(t *testing.T) { if err := testCli.Get(testCtx, childKey, child); err != nil { return false } - return child.Spec.ExternalAddress != nil && *child.Spec.ExternalAddress == wantAddr + return child.Spec.ExternalAddress == wantAddr }, "initial converge") // Opt out. @@ -257,7 +228,7 @@ func TestPublishableP2P_ReOptIn_RestoresAddressAndService(t *testing.T) { if err := testCli.Get(testCtx, childKey, child); err != nil { return false } - return child.Spec.ExternalAddress != nil && *child.Spec.ExternalAddress == wantAddr + return child.Spec.ExternalAddress == wantAddr }, "child re-stamped with same vanity address after re-opt-in") // Service is recreated. @@ -266,17 +237,10 @@ func TestPublishableP2P_ReOptIn_RestoresAddressAndService(t *testing.T) { }, "publishable Service recreated after re-opt-in") } -// TestPublishableP2P_ScaleDown_DeletesOrdinalServiceBeforeChild guards -// the invariant called out in the brief: on scale-down the ordinal's -// publishable Service must be deleted before the child SeiNode so the -// NLB never sits in the no-healthy-targets-but-still-allocated state. -// -// envtest is happy-path here — both deletions are issued by the same -// reconcile pass, so we mainly assert both objects converge to gone. -// The ordering predicate runs inside `scaleDown`. +// Scale-down deletes the ordinal's Service before the child SeiNode. func TestPublishableP2P_ScaleDown_DeletesOrdinalServiceBeforeChild(t *testing.T) { g := NewWithT(t) - withPublishabilityEnabled(t, true) + withPublishableDomain(t, publishableTestDomain) ns := makeNamespace(t) snd := fixtures.NewSND(ns, "pub-scale", @@ -313,16 +277,10 @@ func TestPublishableP2P_ScaleDown_DeletesOrdinalServiceBeforeChild(t *testing.T) }, 2*time.Second, 200*time.Millisecond).Should(Succeed()) } -// TestPublishableP2P_StandaloneSeiNode_PreservesUserAddress asserts the -// brief's "no SND involvement" contract. A SeiNode created directly by -// a user with `Spec.ExternalAddress` set should keep that value through -// reconcile — no SND-side code path should clear it, because no SND -// owns it. The planner emits it verbatim. The check is interface-only -// (we read back the spec); the planner emit-side is covered by the -// unit test in `internal/planner/common_overrides_test.go`. +// Standalone SeiNodes own Spec.ExternalAddress directly; no SND touches it. func TestPublishableP2P_StandaloneSeiNode_PreservesUserAddress(t *testing.T) { g := NewWithT(t) - withPublishabilityEnabled(t, true) + withPublishableDomain(t, publishableTestDomain) ns := makeNamespace(t) addr := "custom.example.com:26656" @@ -334,34 +292,25 @@ func TestPublishableP2P_StandaloneSeiNode_PreservesUserAddress(t *testing.T) { Spec: seiv1alpha1.SeiNodeSpec{ ChainID: "pacific-1", Image: fixtures.DefaultImage, - ExternalAddress: &addr, + ExternalAddress: addr, FullNode: &seiv1alpha1.FullNodeSpec{}, }, } g.Expect(testCli.Create(testCtx, node)).To(Succeed()) - // Read back: the spec persists. No SND code path should touch this. - g.Consistently(func() *string { + g.Consistently(func() string { fetched := &seiv1alpha1.SeiNode{} if err := testCli.Get(testCtx, client.ObjectKeyFromObject(node), fetched); err != nil { - return nil + return "" } return fetched.Spec.ExternalAddress - }, 2*time.Second, 200*time.Millisecond).Should(And( - Not(BeNil()), - HaveValue(Equal(addr)), - )) + }, 2*time.Second, 200*time.Millisecond).Should(Equal(addr)) } -// TestPublishableP2P_KubectlEditStomp_ReconverresViaEnsureSeiNode -// exercises the brief's stomp scenario: an external write clears the -// child's `Spec.ExternalAddress` mid-flight. ensureSeiNode must detect -// the diff on next reconcile and restore the SND-managed value. This -// is the load-bearing claim that the field truly converges through -// the single write site. +// kubectl-edit stomp: external clear of Spec.ExternalAddress reconverges via ensureSeiNode. func TestPublishableP2P_KubectlEditStomp_ReconverresViaEnsureSeiNode(t *testing.T) { g := NewWithT(t) - withPublishabilityEnabled(t, true) + withPublishableDomain(t, publishableTestDomain) ns := makeNamespace(t) snd := fixtures.NewSND(ns, "pub-stomp", @@ -379,7 +328,7 @@ func TestPublishableP2P_KubectlEditStomp_ReconverresViaEnsureSeiNode(t *testing. if err := testCli.Get(testCtx, childKey, child); err != nil { return false } - return child.Spec.ExternalAddress != nil && *child.Spec.ExternalAddress == wantAddr + return child.Spec.ExternalAddress == wantAddr }, "initial converge before stomp") // Simulate `kubectl edit seinode` clearing the field. The SND @@ -393,7 +342,7 @@ func TestPublishableP2P_KubectlEditStomp_ReconverresViaEnsureSeiNode(t *testing. return err } patch := client.MergeFrom(child.DeepCopy()) - child.Spec.ExternalAddress = nil + child.Spec.ExternalAddress = "" return testCli.Patch(testCtx, child, patch) }, 5*time.Second, 200*time.Millisecond).Should(Succeed()) @@ -416,94 +365,36 @@ func TestPublishableP2P_KubectlEditStomp_ReconverresViaEnsureSeiNode(t *testing. if err := testCli.Get(testCtx, childKey, child); err != nil { return false } - return child.Spec.ExternalAddress != nil && *child.Spec.ExternalAddress == wantAddr + return child.Spec.ExternalAddress == wantAddr }, "child reconverged via ensureSeiNode after external stomp") } -// TestPublishableP2P_VPCCIDRNotConfigured_SurfacesConditionAndSkipsService -// is the capability-disabled branch. With `PublishabilityAvailable=false` -// and a TCP-enabled SND, the reconciler must: -// - set ConditionNetworkingReady=False/VPCCIDRNotConfigured -// - not create a publishable Service -// - leave the child's Spec.ExternalAddress nil/empty -// -// This is the equivalent of an operator misconfigured the controller -// (SEI_VPC_CIDR missing). The condition is the operator's debugging -// signal. -func TestPublishableP2P_VPCCIDRNotConfigured_SurfacesConditionAndSkipsService(t *testing.T) { +// TestPublishableP2P_NoDomainConfigured_SkipsServices verifies the silent +// no-op path: when PublishableDomain is unset, TCP-enabled SNDs get no +// publishable Service and no child ExternalAddress. +func TestPublishableP2P_NoDomainConfigured_SkipsServices(t *testing.T) { g := NewWithT(t) - withPublishabilityEnabled(t, false) // capability OFF for this test + withPublishableDomain(t, "") ns := makeNamespace(t) - snd := fixtures.NewSND(ns, "pub-no-cidr", + snd := fixtures.NewSND(ns, "pub-no-domain", fixtures.WithReplicas(1), withTCP(), ) g.Expect(testCli.Create(testCtx, snd)).To(Succeed()) - // Condition surfaces the misconfiguration. - waitForStatus(t, client.ObjectKeyFromObject(snd), func(latest *seiv1alpha1.SeiNodeDeployment) bool { - c := apimeta.FindStatusCondition(latest.Status.Conditions, seiv1alpha1.ConditionNetworkingReady) - return c != nil && c.Status == metav1.ConditionFalse && c.Reason == "VPCCIDRNotConfigured" - }, "ConditionNetworkingReady=False/VPCCIDRNotConfigured") - - // No publishable Service. svcKey := types.NamespacedName{Name: snd.Name + "-0-p2p", Namespace: ns} g.Consistently(func() bool { err := testCli.Get(testCtx, svcKey, &corev1.Service{}) return apierrors.IsNotFound(err) - }, 2*time.Second, 200*time.Millisecond).Should(BeTrue(), - "no publishable Service created when capability is unavailable") + }, 2*time.Second, 200*time.Millisecond).Should(BeTrue()) - // Child SeiNode has no ExternalAddress. childKey := types.NamespacedName{Name: snd.Name + "-0", Namespace: ns} - g.Eventually(func() error { + g.Eventually(func() string { child := &seiv1alpha1.SeiNode{} - return testCli.Get(testCtx, childKey, child) - }, 5*time.Second, 200*time.Millisecond).Should(Succeed()) - child := &seiv1alpha1.SeiNode{} - g.Expect(testCli.Get(testCtx, childKey, child)).To(Succeed()) - if child.Spec.ExternalAddress != nil { - g.Expect(*child.Spec.ExternalAddress).To(BeEmpty(), - "child ExternalAddress must be empty without capability") - } -} - -// TestPublishableP2P_PublishableDomainNotConfigured_SurfacesCondition is the -// L4-zone-not-configured branch. With `PublishabilityAvailable=true` (VPC -// CIDR set) but `PublishableDomain=""` (no SEI_PUBLISHABLE_DOMAIN), a -// TCP-enabled SND must surface `PublishableDomainNotConfigured` rather -// than getting silently misclassified as a VPC-CIDR or chain-ID problem. -// Real-world trigger: dev cluster, where SEI_PUBLISHABLE_DOMAIN is unset. -func TestPublishableP2P_PublishableDomainNotConfigured_SurfacesCondition(t *testing.T) { - g := NewWithT(t) - // Cap available, domain absent: pin both fields explicitly. - prevAvail := testSNDReconciler.PublishabilityAvailable - prevDomain := testSNDReconciler.PublishableDomain - testSNDReconciler.PublishabilityAvailable = true - testSNDReconciler.PublishableDomain = "" - t.Cleanup(func() { - testSNDReconciler.PublishabilityAvailable = prevAvail - testSNDReconciler.PublishableDomain = prevDomain - }) - - ns := makeNamespace(t) - snd := fixtures.NewSND(ns, "pub-no-domain", - fixtures.WithReplicas(1), - withTCP(), - ) - g.Expect(testCli.Create(testCtx, snd)).To(Succeed()) - - waitForStatus(t, client.ObjectKeyFromObject(snd), func(latest *seiv1alpha1.SeiNodeDeployment) bool { - c := apimeta.FindStatusCondition(latest.Status.Conditions, seiv1alpha1.ConditionNetworkingReady) - return c != nil && c.Status == metav1.ConditionFalse && c.Reason == "PublishableDomainNotConfigured" - }, "ConditionNetworkingReady=False/PublishableDomainNotConfigured") - - // No publishable Service. - svcKey := types.NamespacedName{Name: snd.Name + "-0-p2p", Namespace: ns} - g.Consistently(func() bool { - err := testCli.Get(testCtx, svcKey, &corev1.Service{}) - return apierrors.IsNotFound(err) - }, 2*time.Second, 200*time.Millisecond).Should(BeTrue(), - "no publishable Service created when domain is unconfigured") + if err := testCli.Get(testCtx, childKey, child); err != nil { + return "ERR" + } + return child.Spec.ExternalAddress + }, 5*time.Second, 200*time.Millisecond).Should(BeEmpty()) } diff --git a/internal/controller/nodedeployment/envtest/suite_test.go b/internal/controller/nodedeployment/envtest/suite_test.go index 7f7d0fa8..ffe97fbb 100644 --- a/internal/controller/nodedeployment/envtest/suite_test.go +++ b/internal/controller/nodedeployment/envtest/suite_test.go @@ -207,7 +207,6 @@ func run(m *testing.M) (int, error) { GatewayNamespace: "gateway", GatewayDomain: "test.local", GatewayPublicDomain: "", - PublishabilityAvailable: true, PublishableDomain: "", PlanExecutor: &planner.Executor[*seiv1alpha1.SeiNodeDeployment]{ ConfigFor: func(_ context.Context, group *seiv1alpha1.SeiNodeDeployment) task.ExecutionConfig { diff --git a/internal/controller/nodedeployment/networking.go b/internal/controller/nodedeployment/networking.go index 5e504869..be316b3b 100644 --- a/internal/controller/nodedeployment/networking.go +++ b/internal/controller/nodedeployment/networking.go @@ -98,28 +98,7 @@ func (r *SeiNodeDeploymentReconciler) reconcileNetworking(ctx context.Context, g } } - if group.Spec.Networking.TCPEnabled() { - if !r.PublishabilityAvailable { - setCondition(group, seiv1alpha1.ConditionNetworkingReady, metav1.ConditionFalse, - "VPCCIDRNotConfigured", - "spec.networking.tcp requires SEI_VPC_CIDR to be set on the controller; publishable Services are not created") - // Defensive: clear any stale Services left from a prior boot - // where the env was set. Cheap list-and-delete; no-op when - // nothing is there. - if err := r.deletePublishableServices(ctx, group); err != nil { - return fmt.Errorf("deleting publishable services after capability loss: %w", err) - } - return nil - } - if r.PublishableDomain == "" { - setCondition(group, seiv1alpha1.ConditionNetworkingReady, metav1.ConditionFalse, - "PublishableDomainNotConfigured", - "spec.networking.tcp requires SEI_PUBLISHABLE_DOMAIN to be set on the controller; publishable Services are not created") - if err := r.deletePublishableServices(ctx, group); err != nil { - return fmt.Errorf("deleting publishable services after capability loss: %w", err) - } - return nil - } + if group.Spec.Networking.TCPEnabled() && r.PublishableDomain != "" { if err := r.reconcilePublishableServices(ctx, group); err != nil { return fmt.Errorf("reconciling publishable services: %w", err) } @@ -454,6 +433,10 @@ func (r *SeiNodeDeploymentReconciler) orphanNetworkingResources(ctx context.Cont } } + if err := r.orphanPublishableServices(ctx, group); err != nil { + return err + } + return nil } diff --git a/internal/controller/nodedeployment/nodes.go b/internal/controller/nodedeployment/nodes.go index f45b68d8..c00b9a79 100644 --- a/internal/controller/nodedeployment/nodes.go +++ b/internal/controller/nodedeployment/nodes.go @@ -175,13 +175,7 @@ func (r *SeiNodeDeploymentReconciler) populateIncumbentNodes(ctx context.Context func (r *SeiNodeDeploymentReconciler) ensureSeiNode(ctx context.Context, group *seiv1alpha1.SeiNodeDeployment, ordinal int) error { desired := generateSeiNode(group, ordinal) - // Inject the SND-managed external address after the pure-template - // portion is built. Kept out of `generateSeiNode` so the rest of - // the package (and the existing test surface) treats the function - // as data-only. - if addr := r.publishableExternalAddressForChild(group, ordinal); addr != "" { - desired.Spec.ExternalAddress = &addr - } + desired.Spec.ExternalAddress = r.publishableExternalAddressForChild(group, ordinal) if err := ctrl.SetControllerReference(group, desired, r.Scheme); err != nil { return fmt.Errorf("setting owner reference: %w", err) } @@ -225,12 +219,7 @@ func (r *SeiNodeDeploymentReconciler) ensureSeiNode(ctx context.Context, group * existing.Spec.PodLabels = desired.Spec.PodLabels updated = true } - // ExternalAddress is SND-managed when TCP is on, cleared when TCP is - // off, and untouched when the parent does not opt into networking. The - // diff here is the single write site that propagates the field — - // generateSeiNode handles creation, this handles updates and external - // stomps (kubectl edit, manual patches). - if !externalAddressEqual(existing.Spec.ExternalAddress, desired.Spec.ExternalAddress) { + if existing.Spec.ExternalAddress != desired.Spec.ExternalAddress { existing.Spec.ExternalAddress = desired.Spec.ExternalAddress updated = true } @@ -240,44 +229,17 @@ func (r *SeiNodeDeploymentReconciler) ensureSeiNode(ctx context.Context, group * return nil } -// externalAddressEqual treats nil and a pointer to the empty string as -// equivalent — both represent "no override." Without this normalization -// the diff in ensureSeiNode would oscillate between nil and *"" each -// reconcile when a child is created via generateSeiNode (which sets nil) -// and then read back from the API (which may also store nil). -func externalAddressEqual(a, b *string) bool { - av, bv := "", "" - if a != nil { - av = *a - } - if b != nil { - bv = *b - } - return av == bv -} - // generateSeiNode produces the desired child SeiNode for a given ordinal. // Pure: depends only on the SND spec and ordinal. The publishable -// external-address injection happens in `ensureSeiNode` via the -// reconciler-bound helper because it needs `r.GatewayPublicDomain` and -// `r.PublishabilityAvailable` — controller-state, not template-data. -// -// Template-aliasing guard: the deep-copied template's -// `Spec.ExternalAddress` is zeroed before any per-ordinal stamping. If -// we did not, a user who set `spec.template.spec.externalAddress: foo` -// on the SND would cause every child to be created with the same -// literal value, which would either silently break peer routing or — -// worse — succeed long enough to pollute peer caches before the -// SND-side reconciler corrected it. Standalone SeiNodes (no SND parent) -// still control the field directly; this guard only runs on the SND -// code path. +// external address is injected by ensureSeiNode, which needs reconciler +// state. The template's ExternalAddress is dropped to prevent every +// child from sharing the same value if a user set it on the template. func generateSeiNode(group *seiv1alpha1.SeiNodeDeployment, ordinal int) *seiv1alpha1.SeiNode { labels := seiNodeLabels(group, ordinal) annotations := seiNodeAnnotations(group) spec := group.Spec.Template.Spec.DeepCopy() - // Template-aliasing guard — see function doc. - spec.ExternalAddress = nil + spec.ExternalAddress = "" podLabels := make(map[string]string) if group.Spec.Template.Metadata != nil { @@ -311,18 +273,12 @@ func generateSeiNode(group *seiv1alpha1.SeiNodeDeployment, ordinal int) *seiv1al } } -// publishableExternalAddressForChild returns the deterministic vanity -// address (host:port) when the SND opts into TCP networking and the -// controller has the capability flag. Returns "" otherwise. ensureSeiNode -// uses this to inject `Spec.ExternalAddress` on create and to drive the -// diff on update — single write site for the SND-managed value. +// publishableExternalAddressForChild returns the vanity host:port when the +// SND opts into TCP networking, "" otherwise. func (r *SeiNodeDeploymentReconciler) publishableExternalAddressForChild(group *seiv1alpha1.SeiNodeDeployment, ordinal int) string { if !group.Spec.Networking.TCPEnabled() { return "" } - if !r.PublishabilityAvailable { - return "" - } return r.publishableExternalAddress(group, ordinal) } diff --git a/internal/controller/nodedeployment/publishable.go b/internal/controller/nodedeployment/publishable.go index f4beffe7..d1077928 100644 --- a/internal/controller/nodedeployment/publishable.go +++ b/internal/controller/nodedeployment/publishable.go @@ -1,27 +1,7 @@ -// Package nodedeployment — publishable.go is the SND-side networking sub-reconciler -// for per-pod P2P LoadBalancer Services. Owns the deterministic vanity-hostname -// derivation, the Service apply/delete lifecycle, and the population of -// `Status.NetworkingStatus.PublishableEndpoints`. The child SeiNode reconciler -// never reads from this file — it observes `Spec.ExternalAddress` and proceeds. -// -// Lifecycle invariants: -// -// - Hostnames are derived purely from SND identity (`Name`, `ChainID`, the -// controller's `GatewayPublicDomain`). No Service status read-back, no AWS -// NLB hostname plumbed into the child. -// - external-dns watches `Service` resources with the -// `external-dns.alpha.kubernetes.io/hostname` annotation and provisions a -// CNAME from the vanity name to the NLB's allocated DNS. The pod itself -// advertises the vanity name to peers via `p2p.external_address`; peers -// resolve the CNAME at dial time. We never block STS creation on this. -// - Owner ref is the SND, not the child. On scale-down we explicitly delete -// the per-ordinal Service before deleting the child SeiNode; on full SND -// deletion the owner-ref cascade handles cleanup. -// - When the cluster lacks a parseable `SEI_VPC_CIDR`, this entire sub-path -// is bypassed and `ConditionNetworkingReady = False / VPCCIDRNotConfigured`. -// The controller never per-reconcile-introspects Pod IPs — misconfiguration -// surfaces as NLB target-registration failure events on the LB Service, -// which is the AWS-side ground truth. +// Package nodedeployment — publishable.go is the SND-side sub-reconciler +// for per-pod P2P LoadBalancer Services. Hostnames are derived from SND +// identity; external-dns creates the CNAME to the NLB. The child SeiNode +// reconciler is unaware and just reads Spec.ExternalAddress. package nodedeployment import ( @@ -35,7 +15,6 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "k8s.io/apimachinery/pkg/util/intstr" - "k8s.io/apimachinery/pkg/util/validation" ctrl "sigs.k8s.io/controller-runtime" "sigs.k8s.io/controller-runtime/pkg/client" @@ -44,25 +23,13 @@ import ( ) const ( - // publishableFieldOwner isolates the publishable Service apply path from - // the rest of the SND's SSA writes so the two paths cannot fight each other - // over field ownership. publishableFieldOwner = client.FieldOwner("nodedeployment-controller-publishable") - // publishableComponentLabel marks Services owned by this sub-reconciler so - // list-based cleanup can target them without entangling the HTTP path's - // external Service. publishableComponentLabel = "sei.io/component" publishableComponentValue = "p2p-lb" - // externalDNSHostnameAnnotation is the external-dns annotation that drives - // CNAME creation from the vanity hostname to the NLB's allocated DNS name. - // Pinned key — changing it would orphan in-cluster DNS records. externalDNSHostnameAnnotation = "external-dns.alpha.kubernetes.io/hostname" - // AWS Load Balancer Controller annotations for an internet-facing NLB - // with IP targets and cross-zone load balancing. Hardcoded for the - // single use case in scope; revisit if per-cluster knobs land. awsLBTypeAnnotation = "service.beta.kubernetes.io/aws-load-balancer-type" awsLBSchemeAnnotation = "service.beta.kubernetes.io/aws-load-balancer-scheme" awsLBTargetTypeAnnotation = "service.beta.kubernetes.io/aws-load-balancer-nlb-target-type" @@ -72,18 +39,11 @@ const ( awsLBTargetTypeValue = "ip" awsLBCrossZoneAttributeStr = "load_balancing.cross_zone.enabled=true" - // publishableServiceSuffix is appended to the child SeiNode name to form - // the per-pod LB Service name. Stable identifier — operators key dashboards - // and runbooks off it. publishableServiceSuffix = "-p2p" ) -// effectiveChainID returns the chain ID the SND will stamp into child -// SeiNodes. Mirrors the precedence used by `generateSeiNode`: the -// template's explicit value wins, otherwise fall back to the Genesis -// ceremony's ChainID. Validators always set template ChainID via the -// ceremony override at child create, but a non-validator SND has no -// Genesis block and relies entirely on the template value. +// effectiveChainID returns the chain ID for child SeiNodes, mirroring +// generateSeiNode: template wins, Genesis is the validator fallback. func effectiveChainID(group *seiv1alpha1.SeiNodeDeployment) string { if id := group.Spec.Template.Spec.ChainID; id != "" { return id @@ -94,11 +54,8 @@ func effectiveChainID(group *seiv1alpha1.SeiNodeDeployment) string { return "" } -// publishableHostname is the deterministic vanity hostname for the given -// ordinal. Returns "" if the inputs cannot produce a valid DNS-1123 -// hostname (missing chain ID, missing public domain, or chain ID with -// DNS-illegal characters that slipped past admission). Callers must -// treat the empty string as "skip" — the apply boundary fails closed. +// publishableHostname returns "-p2p.." or "" when +// the publishable path is not configured. func (r *SeiNodeDeploymentReconciler) publishableHostname(group *seiv1alpha1.SeiNodeDeployment, ordinal int) string { if r.PublishableDomain == "" { return "" @@ -107,24 +64,11 @@ func (r *SeiNodeDeploymentReconciler) publishableHostname(group *seiv1alpha1.Sei if chainID == "" { return "" } - // Belt-and-suspenders: the API patterns reject DNS-illegal IDs at - // admission, but admission rules can be bypassed (cluster CRD edits, - // older CRD schema in-cluster). Validate the chain ID's label shape - // before composing it into a host. - if errs := validation.IsDNS1123Label(chainID); len(errs) > 0 { - return "" - } - host := fmt.Sprintf("%s-p2p.%s.%s", - seiNodeName(group, ordinal), - chainID, - r.PublishableDomain, - ) - return host + return fmt.Sprintf("%s-p2p.%s.%s", seiNodeName(group, ordinal), chainID, r.PublishableDomain) } -// publishableExternalAddress returns ":" suitable for -// CometBFT's `p2p.external_address`. Empty when the hostname cannot be -// derived (see publishableHostname). +// publishableExternalAddress returns ":" or "" when the +// hostname is unavailable. func (r *SeiNodeDeploymentReconciler) publishableExternalAddress(group *seiv1alpha1.SeiNodeDeployment, ordinal int) string { host := r.publishableHostname(group, ordinal) if host == "" { @@ -133,35 +77,21 @@ func (r *SeiNodeDeploymentReconciler) publishableExternalAddress(group *seiv1alp return fmt.Sprintf("%s:%d", host, seiconfig.PortP2P) } -// publishableServiceName is the per-pod LB Service name. Stable across -// SND edits — only the ordinal can shift it. func publishableServiceName(group *seiv1alpha1.SeiNodeDeployment, ordinal int) string { return seiNodeName(group, ordinal) + publishableServiceSuffix } -// reconcilePublishableServices server-side-applies one LoadBalancer -// Service per desired ordinal and trims any orphan Services left over -// from scale-down events the controller missed. +// reconcilePublishableServices server-side-applies one LoadBalancer Service +// per desired ordinal and trims orphans. func (r *SeiNodeDeploymentReconciler) reconcilePublishableServices(ctx context.Context, group *seiv1alpha1.SeiNodeDeployment) error { desiredNames := make(map[string]struct{}, group.Spec.Replicas) for i := range int(group.Spec.Replicas) { - host := r.publishableHostname(group, i) - if host == "" { - // Capability gating happens upstream in reconcileNetworking; - // the only way to reach this branch is a chain ID that - // somehow escaped admission validation. Fail closed: set the - // stable False reason, do not apply. - setCondition(group, seiv1alpha1.ConditionNetworkingReady, metav1.ConditionFalse, - "InvalidChainIDForDNS", - fmt.Sprintf("chainID %q is not a valid DNS-1123 label; cannot compose publishable hostname", effectiveChainID(group))) - return nil - } - desired := generatePublishableService(group, i, host) + desired := generatePublishableService(group, i, r.publishableHostname(group, i)) desired.SetGroupVersionKind(corev1.SchemeGroupVersion.WithKind("Service")) if err := ctrl.SetControllerReference(group, desired, r.Scheme); err != nil { return fmt.Errorf("setting owner reference on publishable Service %s: %w", desired.Name, err) } - //nolint:staticcheck // SSA apply via untyped object mirrors the rest of this controller; typed ApplyConfiguration is a separate effort + //nolint:staticcheck // SSA apply via untyped object mirrors the rest of this controller if err := r.Patch(ctx, desired, client.Apply, publishableFieldOwner, client.ForceOwnership); err != nil { return fmt.Errorf("applying publishable Service %s: %w", desired.Name, err) } @@ -178,9 +108,8 @@ func (r *SeiNodeDeploymentReconciler) reconcilePublishableServices(ctx context.C return nil } -// deleteOrphanPublishableServices removes publishable Services labelled -// for this SND whose names are not in desiredNames. Used after a -// scale-down or replica reset to clear stranded LB Services. +// deleteOrphanPublishableServices removes publishable Services owned by the +// SND whose names are not in desiredNames. Pass nil to delete all. func (r *SeiNodeDeploymentReconciler) deleteOrphanPublishableServices(ctx context.Context, group *seiv1alpha1.SeiNodeDeployment, desiredNames map[string]struct{}) error { list := &corev1.ServiceList{} if err := r.List(ctx, list, @@ -207,16 +136,33 @@ func (r *SeiNodeDeploymentReconciler) deleteOrphanPublishableServices(ctx contex return nil } -// deletePublishableServices removes every publishable Service owned by -// the SND. Called on opt-out (TCP unset on a previously-publishable SND) -// and as part of the general networking-resources cleanup path. func (r *SeiNodeDeploymentReconciler) deletePublishableServices(ctx context.Context, group *seiv1alpha1.SeiNodeDeployment) error { return r.deleteOrphanPublishableServices(ctx, group, nil) } -// deletePublishableServiceForOrdinal is the explicit per-ordinal delete -// scaleDown calls before deleting the child SeiNode. Idempotent — -// silently no-ops when the Service is already gone. +// orphanPublishableServices drops the SND owner ref so the Services survive +// SND deletion under DeletionPolicyRetain. +func (r *SeiNodeDeploymentReconciler) orphanPublishableServices(ctx context.Context, group *seiv1alpha1.SeiNodeDeployment) error { + list := &corev1.ServiceList{} + if err := r.List(ctx, list, + client.InNamespace(group.Namespace), + client.MatchingLabels{ + groupLabel: group.Name, + publishableComponentLabel: publishableComponentValue, + }, + ); err != nil { + return fmt.Errorf("listing publishable Services for orphan: %w", err) + } + for i := range list.Items { + if err := r.removeOwnerRef(ctx, &list.Items[i], group); err != nil { + return fmt.Errorf("orphaning publishable Service %s: %w", list.Items[i].Name, err) + } + } + return nil +} + +// deletePublishableServiceForOrdinal removes the per-ordinal Service before +// scaleDown deletes the child. Idempotent. func (r *SeiNodeDeploymentReconciler) deletePublishableServiceForOrdinal(ctx context.Context, group *seiv1alpha1.SeiNodeDeployment, ordinal int) error { name := publishableServiceName(group, ordinal) svc := &corev1.Service{} @@ -236,12 +182,6 @@ func (r *SeiNodeDeploymentReconciler) deletePublishableServiceForOrdinal(ctx con return nil } -// generatePublishableService is a pure factory — given an SND, ordinal, -// and the deterministic vanity hostname, returns the desired Service -// spec. No external state, no API reads. The annotation set drives AWS -// LBC and external-dns; the selector reuses the SeiNode-controller's -// `sei.io/node=` label, which is stamped onto the pod by the -// StatefulSet template. func generatePublishableService(group *seiv1alpha1.SeiNodeDeployment, ordinal int, hostname string) *corev1.Service { name := publishableServiceName(group, ordinal) childName := seiNodeName(group, ordinal) @@ -271,7 +211,7 @@ func generatePublishableService(group *seiv1alpha1.SeiNodeDeployment, ordinal in Spec: corev1.ServiceSpec{ Type: corev1.ServiceTypeLoadBalancer, ExternalTrafficPolicy: corev1.ServiceExternalTrafficPolicyTypeLocal, - AllocateLoadBalancerNodePorts: new(bool), // explicit false; NLBs use IP targets, not NodePort routing + AllocateLoadBalancerNodePorts: new(bool), Selector: map[string]string{ noderesource.NodeLabel: childName, }, diff --git a/internal/controller/nodedeployment/publishable_test.go b/internal/controller/nodedeployment/publishable_test.go index d6463d30..1c65927b 100644 --- a/internal/controller/nodedeployment/publishable_test.go +++ b/internal/controller/nodedeployment/publishable_test.go @@ -15,78 +15,52 @@ import ( func TestPublishableHostname_Table(t *testing.T) { t.Parallel() cases := []struct { - name string - sndName string - namespace string - templateChainID string - genesisChainID string - ordinal int + name string + sndName string + templateChainID string + genesisChainID string + ordinal int publishableDomain string - want string + want string }{ { - name: "atlantic-2 ordinal 0", - sndName: "atlantic-2", - namespace: "sei-test-1", - templateChainID: "atlantic-2", - ordinal: 0, + name: "atlantic-2 ordinal 0", + sndName: "atlantic-2", + templateChainID: "atlantic-2", + ordinal: 0, publishableDomain: "prod.platform.sei.io", - want: "atlantic-2-0-p2p.atlantic-2.prod.platform.sei.io", + want: "atlantic-2-0-p2p.atlantic-2.prod.platform.sei.io", }, { - name: "ordinal 5 of multi-replica fleet", - sndName: "rpc", - namespace: "pacific-1", - templateChainID: "pacific-1", - ordinal: 5, + name: "ordinal 5 of multi-replica fleet", + sndName: "rpc", + templateChainID: "pacific-1", + ordinal: 5, publishableDomain: "prod.platform.sei.io", - want: "rpc-5-p2p.pacific-1.prod.platform.sei.io", + want: "rpc-5-p2p.pacific-1.prod.platform.sei.io", }, { - name: "genesis-only chainID (validator SND)", - sndName: "newchain-validators", - namespace: "ceremony", - templateChainID: "", - genesisChainID: "newchain-1", - ordinal: 1, + name: "genesis-only chainID (validator SND)", + sndName: "newchain-validators", + genesisChainID: "newchain-1", + ordinal: 1, publishableDomain: "test.platform.sei.io", - want: "newchain-validators-1-p2p.newchain-1.test.platform.sei.io", + want: "newchain-validators-1-p2p.newchain-1.test.platform.sei.io", }, { - name: "empty gateway public domain returns empty", - sndName: "atlantic-2", - namespace: "sei-test-1", - templateChainID: "atlantic-2", - ordinal: 0, + name: "empty publishable domain returns empty", + sndName: "atlantic-2", + templateChainID: "atlantic-2", + ordinal: 0, publishableDomain: "", - want: "", + want: "", }, { - name: "empty chain ID returns empty", - sndName: "atlantic-2", - namespace: "sei-test-1", - templateChainID: "", - ordinal: 0, + name: "empty chain ID returns empty", + sndName: "atlantic-2", + ordinal: 0, publishableDomain: "prod.platform.sei.io", - want: "", - }, - { - name: "chainID with uppercase fails the DNS-1123 belt-and-suspenders", - sndName: "atlantic-2", - namespace: "sei-test-1", - templateChainID: "Atlantic-2", - ordinal: 0, - publishableDomain: "prod.platform.sei.io", - want: "", - }, - { - name: "chainID with dot fails the DNS-1123 belt-and-suspenders", - sndName: "atlantic-2", - namespace: "sei-test-1", - templateChainID: "atlantic.2", - ordinal: 0, - publishableDomain: "prod.platform.sei.io", - want: "", + want: "", }, } for _, tc := range cases { @@ -94,7 +68,7 @@ func TestPublishableHostname_Table(t *testing.T) { t.Parallel() g := NewWithT(t) snd := &seiv1alpha1.SeiNodeDeployment{ - ObjectMeta: metav1.ObjectMeta{Name: tc.sndName, Namespace: tc.namespace}, + ObjectMeta: metav1.ObjectMeta{Name: tc.sndName}, Spec: seiv1alpha1.SeiNodeDeploymentSpec{ Template: seiv1alpha1.SeiNodeTemplate{ Spec: seiv1alpha1.SeiNodeSpec{ChainID: tc.templateChainID}, @@ -104,7 +78,7 @@ func TestPublishableHostname_Table(t *testing.T) { if tc.genesisChainID != "" { snd.Spec.Genesis = &seiv1alpha1.GenesisCeremonyConfig{ChainID: tc.genesisChainID} } - r := &SeiNodeDeploymentReconciler{PublishableDomain:tc.publishableDomain} + r := &SeiNodeDeploymentReconciler{PublishableDomain: tc.publishableDomain} g.Expect(r.publishableHostname(snd, tc.ordinal)).To(Equal(tc.want)) }) } @@ -113,143 +87,59 @@ func TestPublishableHostname_Table(t *testing.T) { func TestPublishableExternalAddress_AppendsP2PPort(t *testing.T) { g := NewWithT(t) snd := &seiv1alpha1.SeiNodeDeployment{ - ObjectMeta: metav1.ObjectMeta{Name: "atlantic-2", Namespace: "sei-test-1"}, + ObjectMeta: metav1.ObjectMeta{Name: "atlantic-2"}, Spec: seiv1alpha1.SeiNodeDeploymentSpec{ Template: seiv1alpha1.SeiNodeTemplate{ Spec: seiv1alpha1.SeiNodeSpec{ChainID: "atlantic-2"}, }, }, } - r := &SeiNodeDeploymentReconciler{PublishableDomain:"prod.platform.sei.io"} - got := r.publishableExternalAddress(snd, 0) - g.Expect(got).To(Equal("atlantic-2-0-p2p.atlantic-2.prod.platform.sei.io:26656")) + r := &SeiNodeDeploymentReconciler{PublishableDomain: "prod.platform.sei.io"} + g.Expect(r.publishableExternalAddress(snd, 0)).To(Equal("atlantic-2-0-p2p.atlantic-2.prod.platform.sei.io:26656")) } func TestPublishableExternalAddress_EmptyWhenHostnameRejected(t *testing.T) { g := NewWithT(t) snd := &seiv1alpha1.SeiNodeDeployment{ - ObjectMeta: metav1.ObjectMeta{Name: "atlantic-2", Namespace: "sei-test-1"}, + ObjectMeta: metav1.ObjectMeta{Name: "atlantic-2"}, } - r := &SeiNodeDeploymentReconciler{PublishableDomain:"prod.platform.sei.io"} + r := &SeiNodeDeploymentReconciler{PublishableDomain: "prod.platform.sei.io"} g.Expect(r.publishableExternalAddress(snd, 0)).To(BeEmpty()) } func TestPublishableServiceName(t *testing.T) { g := NewWithT(t) snd := &seiv1alpha1.SeiNodeDeployment{ - ObjectMeta: metav1.ObjectMeta{Name: "atlantic-2", Namespace: "sei-test-1"}, + ObjectMeta: metav1.ObjectMeta{Name: "atlantic-2"}, } g.Expect(publishableServiceName(snd, 0)).To(Equal("atlantic-2-0-p2p")) g.Expect(publishableServiceName(snd, 7)).To(Equal("atlantic-2-7-p2p")) } -func TestGeneratePublishableService_Shape(t *testing.T) { +func TestGeneratePublishableService_Annotations(t *testing.T) { g := NewWithT(t) snd := &seiv1alpha1.SeiNodeDeployment{ ObjectMeta: metav1.ObjectMeta{Name: "atlantic-2", Namespace: "sei-test-1"}, } - host := "atlantic-2-0-p2p.atlantic-2.prod.platform.sei.io" - - svc := generatePublishableService(snd, 0, host) - - g.Expect(svc.Name).To(Equal("atlantic-2-0-p2p")) - g.Expect(svc.Namespace).To(Equal("sei-test-1")) - - // Labels: SND group label, ordinal label, and the new component label - // so cleanup queries can target publishable Services without - // entangling other group-owned objects. - g.Expect(svc.Labels).To(HaveKeyWithValue(groupLabel, "atlantic-2")) - g.Expect(svc.Labels).To(HaveKeyWithValue(groupOrdinalLabel, "0")) - g.Expect(svc.Labels).To(HaveKeyWithValue(publishableComponentLabel, publishableComponentValue)) - - // Annotations: external-dns hostname pins the vanity → NLB CNAME; - // AWS LBC annotations select the NLB shape; managed-by is operator - // guidance, matching the HTTPRoute precedent. - g.Expect(svc.Annotations).To(HaveKeyWithValue(externalDNSHostnameAnnotation, host)) - g.Expect(svc.Annotations).To(HaveKeyWithValue(awsLBTypeAnnotation, awsLBTypeValue)) - g.Expect(svc.Annotations).To(HaveKeyWithValue(awsLBSchemeAnnotation, awsLBSchemeValue)) - g.Expect(svc.Annotations).To(HaveKeyWithValue(awsLBTargetTypeAnnotation, awsLBTargetTypeValue)) - g.Expect(svc.Annotations).To(HaveKeyWithValue(awsLBCrossZoneAnnotation, awsLBCrossZoneAttributeStr)) - g.Expect(svc.Annotations).To(HaveKeyWithValue(managedByAnnotation, controllerName)) + svc := generatePublishableService(snd, 0, "atlantic-2-0-p2p.atlantic-2.prod.platform.sei.io") + + g.Expect(svc.Annotations).To(HaveKeyWithValue( + "external-dns.alpha.kubernetes.io/hostname", + "atlantic-2-0-p2p.atlantic-2.prod.platform.sei.io", + )) + g.Expect(svc.Annotations).To(HaveKeyWithValue( + "service.beta.kubernetes.io/aws-load-balancer-type", "external")) + g.Expect(svc.Annotations).To(HaveKeyWithValue( + "service.beta.kubernetes.io/aws-load-balancer-scheme", "internet-facing")) + g.Expect(svc.Annotations).To(HaveKeyWithValue( + "service.beta.kubernetes.io/aws-load-balancer-nlb-target-type", "ip")) + g.Expect(svc.Annotations).To(HaveKeyWithValue( + "service.beta.kubernetes.io/aws-load-balancer-attributes", + "load_balancing.cross_zone.enabled=true")) g.Expect(svc.Spec.Type).To(Equal(corev1.ServiceTypeLoadBalancer)) g.Expect(svc.Spec.ExternalTrafficPolicy).To(Equal(corev1.ServiceExternalTrafficPolicyTypeLocal)) - g.Expect(svc.Spec.AllocateLoadBalancerNodePorts).NotTo(BeNil()) - g.Expect(*svc.Spec.AllocateLoadBalancerNodePorts).To(BeFalse()) - - // Selector matches the child SeiNode's pod via the `sei.io/node` - // label the SeiNode controller stamps on its StatefulSet template. - // Using that label (rather than the SND group label) ensures the - // NLB has exactly one healthy target — the per-replica pod — - // instead of round-robining across siblings. - g.Expect(svc.Spec.Selector).To(Equal(map[string]string{ - noderesource.NodeLabel: "atlantic-2-0", - })) - g.Expect(svc.Spec.Ports).To(HaveLen(1)) g.Expect(svc.Spec.Ports[0].Port).To(Equal(seiconfig.PortP2P)) - g.Expect(svc.Spec.Ports[0].Name).To(Equal("p2p")) - g.Expect(svc.Spec.Ports[0].Protocol).To(Equal(corev1.ProtocolTCP)) -} - -func TestEffectiveChainID_GenesisFallback(t *testing.T) { - g := NewWithT(t) - snd := &seiv1alpha1.SeiNodeDeployment{ - Spec: seiv1alpha1.SeiNodeDeploymentSpec{ - Template: seiv1alpha1.SeiNodeTemplate{ - Spec: seiv1alpha1.SeiNodeSpec{ChainID: ""}, - }, - Genesis: &seiv1alpha1.GenesisCeremonyConfig{ChainID: "newchain-1"}, - }, - } - g.Expect(effectiveChainID(snd)).To(Equal("newchain-1")) -} - -func TestEffectiveChainID_TemplateWins(t *testing.T) { - g := NewWithT(t) - snd := &seiv1alpha1.SeiNodeDeployment{ - Spec: seiv1alpha1.SeiNodeDeploymentSpec{ - Template: seiv1alpha1.SeiNodeTemplate{ - Spec: seiv1alpha1.SeiNodeSpec{ChainID: "pacific-1"}, - }, - Genesis: &seiv1alpha1.GenesisCeremonyConfig{ChainID: "should-be-ignored"}, - }, - } - g.Expect(effectiveChainID(snd)).To(Equal("pacific-1")) -} - -func TestExternalAddressEqual_NormalizesNilAndEmpty(t *testing.T) { - g := NewWithT(t) - empty := "" - val := "host:26656" - other := "other:26656" - g.Expect(externalAddressEqual(nil, nil)).To(BeTrue()) - g.Expect(externalAddressEqual(nil, &empty)).To(BeTrue()) - g.Expect(externalAddressEqual(&empty, nil)).To(BeTrue()) - g.Expect(externalAddressEqual(&val, &val)).To(BeTrue()) - g.Expect(externalAddressEqual(&val, &other)).To(BeFalse()) - g.Expect(externalAddressEqual(nil, &val)).To(BeFalse()) - g.Expect(externalAddressEqual(&val, nil)).To(BeFalse()) -} - -func TestNetworkingConfig_TCPEnabled(t *testing.T) { - t.Parallel() - tests := []struct { - name string - cfg *seiv1alpha1.NetworkingConfig - want bool - }{ - {name: "nil receiver", cfg: nil, want: false}, - {name: "legacy empty struct does not enable TCP", cfg: &seiv1alpha1.NetworkingConfig{}, want: false}, - {name: "HTTP-only does not enable TCP", cfg: &seiv1alpha1.NetworkingConfig{HTTP: &seiv1alpha1.HTTPConfig{}}, want: false}, - {name: "explicit TCP enables TCP", cfg: &seiv1alpha1.NetworkingConfig{TCP: &seiv1alpha1.TCPConfig{}}, want: true}, - {name: "HTTP and TCP both set", cfg: &seiv1alpha1.NetworkingConfig{HTTP: &seiv1alpha1.HTTPConfig{}, TCP: &seiv1alpha1.TCPConfig{}}, want: true}, - } - for _, tc := range tests { - t.Run(tc.name, func(t *testing.T) { - t.Parallel() - g := NewWithT(t) - g.Expect(tc.cfg.TCPEnabled()).To(Equal(tc.want)) - }) - } + g.Expect(svc.Spec.Selector).To(HaveKeyWithValue(noderesource.NodeLabel, "atlantic-2-0")) } diff --git a/internal/controller/nodedeployment/status.go b/internal/controller/nodedeployment/status.go index 3d422555..74312e33 100644 --- a/internal/controller/nodedeployment/status.go +++ b/internal/controller/nodedeployment/status.go @@ -118,12 +118,10 @@ func (r *SeiNodeDeploymentReconciler) buildNetworkingStatus(group *seiv1alpha1.S } } - if group.Spec.Networking.TCPEnabled() && r.PublishabilityAvailable { + if group.Spec.Networking.TCPEnabled() && r.PublishableDomain != "" { for i := range int(group.Spec.Replicas) { host := r.publishableExternalAddress(group, i) if host == "" { - // publishableHostname rejected this ordinal (invalid - // chainID); skip the entry rather than stamp a bad one. continue } out.PublishableEndpoints = append(out.PublishableEndpoints, seiv1alpha1.PublishableEndpoint{ diff --git a/internal/planner/common_overrides_test.go b/internal/planner/common_overrides_test.go index 4ba2ebf9..42ab6a16 100644 --- a/internal/planner/common_overrides_test.go +++ b/internal/planner/common_overrides_test.go @@ -5,7 +5,6 @@ import ( seiconfig "github.com/sei-protocol/sei-config" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" - "k8s.io/utils/ptr" seiv1alpha1 "github.com/sei-protocol/sei-k8s-controller/api/v1alpha1" ) @@ -14,7 +13,7 @@ func TestCommonOverrides_WithExternalAddress(t *testing.T) { node := &seiv1alpha1.SeiNode{ ObjectMeta: metav1.ObjectMeta{Name: "test-node"}, Spec: seiv1alpha1.SeiNodeSpec{ - ExternalAddress: ptr.To("p2p.atlantic-2.seinetwork.io:26656"), + ExternalAddress: "p2p.atlantic-2.seinetwork.io:26656", }, } overrides := commonOverrides(node) @@ -49,7 +48,7 @@ func TestCommonOverrides_UserOverrideTakesPrecedence(t *testing.T) { ChainID: "test-1", Image: "seid:v1", FullNode: &seiv1alpha1.FullNodeSpec{}, - ExternalAddress: ptr.To("lb.address:26656"), + ExternalAddress: "lb.address:26656", Overrides: map[string]string{ seiconfig.KeyP2PExternalAddress: "custom.address:26656", }, diff --git a/internal/planner/planner.go b/internal/planner/planner.go index 3b40d284..0b314418 100644 --- a/internal/planner/planner.go +++ b/internal/planner/planner.go @@ -694,8 +694,8 @@ func commonOverrides(node *seiv1alpha1.SeiNode) map[string]string { out := map[string]string{ "logging.level": "error", } - if node.Spec.ExternalAddress != nil && *node.Spec.ExternalAddress != "" { - out[seiconfig.KeyP2PExternalAddress] = *node.Spec.ExternalAddress + if node.Spec.ExternalAddress != "" { + out[seiconfig.KeyP2PExternalAddress] = node.Spec.ExternalAddress } return out } diff --git a/manifests/sei.io_seinodedeployments.yaml b/manifests/sei.io_seinodedeployments.yaml index 7aab588b..75167670 100644 --- a/manifests/sei.io_seinodedeployments.yaml +++ b/manifests/sei.io_seinodedeployments.yaml @@ -274,18 +274,10 @@ spec: rule: (!has(oldSelf.import) || has(self.import)) externalAddress: description: |- - ExternalAddress is the routable P2P address — bare host:port, no - nodeId@ prefix — written into seid's `p2p.external_address`. Two - ownership models share this field: - - - SND-managed: when the parent SeiNodeDeployment has - `Spec.Networking.TCP` set, the SND reconciler injects this - value at child Create time (deterministic vanity hostname) and - re-syncs it via the diff in `ensureSeiNode`. Clearing TCP on - the parent clears this field on every child. - - Standalone: a SeiNode created without an SND parent may set - this directly; the planner emits it verbatim. The controller - never overwrites a standalone SeiNode's value. + ExternalAddress is the routable P2P host:port written into seid's + `p2p.external_address`. SND-managed nodes get this stamped by the + SND reconciler when TCP networking is enabled. Standalone SeiNodes + can set it directly. type: string fullNode: description: FullNode configures a chain-following full node diff --git a/manifests/sei.io_seinodes.yaml b/manifests/sei.io_seinodes.yaml index 0683bd2a..268716a0 100644 --- a/manifests/sei.io_seinodes.yaml +++ b/manifests/sei.io_seinodes.yaml @@ -135,18 +135,10 @@ spec: rule: (!has(oldSelf.import) || has(self.import)) externalAddress: description: |- - ExternalAddress is the routable P2P address — bare host:port, no - nodeId@ prefix — written into seid's `p2p.external_address`. Two - ownership models share this field: - - - SND-managed: when the parent SeiNodeDeployment has - `Spec.Networking.TCP` set, the SND reconciler injects this - value at child Create time (deterministic vanity hostname) and - re-syncs it via the diff in `ensureSeiNode`. Clearing TCP on - the parent clears this field on every child. - - Standalone: a SeiNode created without an SND parent may set - this directly; the planner emits it verbatim. The controller - never overwrites a standalone SeiNode's value. + ExternalAddress is the routable P2P host:port written into seid's + `p2p.external_address`. SND-managed nodes get this stamped by the + SND reconciler when TCP networking is enabled. Standalone SeiNodes + can set it directly. type: string fullNode: description: FullNode configures a chain-following full node (absorbs From 19e550c7fada58752bcb13e4fa3ea126b3d1b9db Mon Sep 17 00:00:00 2001 From: bdchatham Date: Thu, 28 May 2026 16:52:42 -0700 Subject: [PATCH 4/6] fix(publishable-p2p): address branch-new lint issues - gofmt main.go (struct field alignment after VPC CIDR removal) - dedupe `atlantic-2` and `prod.platform.sei.io` test literals via package-local consts; rename one duplicate `rpc` SND name to `validators`; reuse existing `testNamespace` for `pacific-1` `golangci-lint run --new-from-rev=main ./...` is now clean. Co-Authored-By: Claude Opus 4.7 (1M context) --- cmd/main.go | 16 ++++---- .../nodedeployment/publishable_test.go | 41 +++++++++++-------- 2 files changed, 31 insertions(+), 26 deletions(-) diff --git a/cmd/main.go b/cmd/main.go index fd6ca324..d8eb4498 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -218,14 +218,14 @@ func main() { //nolint:staticcheck // migrating to events.EventRecorder API is a separate effort recorder := mgr.GetEventRecorderFor("seinodedeployment-controller") if err := (&nodedeploymentcontroller.SeiNodeDeploymentReconciler{ - Client: kc, - Scheme: mgr.GetScheme(), - Recorder: recorder, - GatewayName: platformCfg.GatewayName, - GatewayNamespace: platformCfg.GatewayNamespace, - GatewayDomain: platformCfg.GatewayDomain, - GatewayPublicDomain: platformCfg.GatewayPublicDomain, - PublishableDomain: publishableDomain, + Client: kc, + Scheme: mgr.GetScheme(), + Recorder: recorder, + GatewayName: platformCfg.GatewayName, + GatewayNamespace: platformCfg.GatewayNamespace, + GatewayDomain: platformCfg.GatewayDomain, + GatewayPublicDomain: platformCfg.GatewayPublicDomain, + PublishableDomain: publishableDomain, PlanExecutor: &planner.Executor[*seiv1alpha1.SeiNodeDeployment]{ ConfigFor: func(ctx context.Context, group *seiv1alpha1.SeiNodeDeployment) task.ExecutionConfig { var assemblerNode *seiv1alpha1.SeiNode diff --git a/internal/controller/nodedeployment/publishable_test.go b/internal/controller/nodedeployment/publishable_test.go index 1c65927b..f80e4b63 100644 --- a/internal/controller/nodedeployment/publishable_test.go +++ b/internal/controller/nodedeployment/publishable_test.go @@ -12,6 +12,11 @@ import ( "github.com/sei-protocol/sei-k8s-controller/internal/noderesource" ) +const ( + testChainAtlantic = "atlantic-2" + testPublishableProd = "prod.platform.sei.io" +) + func TestPublishableHostname_Table(t *testing.T) { t.Parallel() cases := []struct { @@ -25,19 +30,19 @@ func TestPublishableHostname_Table(t *testing.T) { }{ { name: "atlantic-2 ordinal 0", - sndName: "atlantic-2", - templateChainID: "atlantic-2", + sndName: testChainAtlantic, + templateChainID: testChainAtlantic, ordinal: 0, - publishableDomain: "prod.platform.sei.io", + publishableDomain: testPublishableProd, want: "atlantic-2-0-p2p.atlantic-2.prod.platform.sei.io", }, { name: "ordinal 5 of multi-replica fleet", - sndName: "rpc", - templateChainID: "pacific-1", + sndName: "validators", + templateChainID: testNamespace, ordinal: 5, - publishableDomain: "prod.platform.sei.io", - want: "rpc-5-p2p.pacific-1.prod.platform.sei.io", + publishableDomain: testPublishableProd, + want: "validators-5-p2p.pacific-1.prod.platform.sei.io", }, { name: "genesis-only chainID (validator SND)", @@ -49,17 +54,17 @@ func TestPublishableHostname_Table(t *testing.T) { }, { name: "empty publishable domain returns empty", - sndName: "atlantic-2", - templateChainID: "atlantic-2", + sndName: testChainAtlantic, + templateChainID: testChainAtlantic, ordinal: 0, publishableDomain: "", want: "", }, { name: "empty chain ID returns empty", - sndName: "atlantic-2", + sndName: testChainAtlantic, ordinal: 0, - publishableDomain: "prod.platform.sei.io", + publishableDomain: testPublishableProd, want: "", }, } @@ -87,30 +92,30 @@ func TestPublishableHostname_Table(t *testing.T) { func TestPublishableExternalAddress_AppendsP2PPort(t *testing.T) { g := NewWithT(t) snd := &seiv1alpha1.SeiNodeDeployment{ - ObjectMeta: metav1.ObjectMeta{Name: "atlantic-2"}, + ObjectMeta: metav1.ObjectMeta{Name: testChainAtlantic}, Spec: seiv1alpha1.SeiNodeDeploymentSpec{ Template: seiv1alpha1.SeiNodeTemplate{ - Spec: seiv1alpha1.SeiNodeSpec{ChainID: "atlantic-2"}, + Spec: seiv1alpha1.SeiNodeSpec{ChainID: testChainAtlantic}, }, }, } - r := &SeiNodeDeploymentReconciler{PublishableDomain: "prod.platform.sei.io"} + r := &SeiNodeDeploymentReconciler{PublishableDomain: testPublishableProd} g.Expect(r.publishableExternalAddress(snd, 0)).To(Equal("atlantic-2-0-p2p.atlantic-2.prod.platform.sei.io:26656")) } func TestPublishableExternalAddress_EmptyWhenHostnameRejected(t *testing.T) { g := NewWithT(t) snd := &seiv1alpha1.SeiNodeDeployment{ - ObjectMeta: metav1.ObjectMeta{Name: "atlantic-2"}, + ObjectMeta: metav1.ObjectMeta{Name: testChainAtlantic}, } - r := &SeiNodeDeploymentReconciler{PublishableDomain: "prod.platform.sei.io"} + r := &SeiNodeDeploymentReconciler{PublishableDomain: testPublishableProd} g.Expect(r.publishableExternalAddress(snd, 0)).To(BeEmpty()) } func TestPublishableServiceName(t *testing.T) { g := NewWithT(t) snd := &seiv1alpha1.SeiNodeDeployment{ - ObjectMeta: metav1.ObjectMeta{Name: "atlantic-2"}, + ObjectMeta: metav1.ObjectMeta{Name: testChainAtlantic}, } g.Expect(publishableServiceName(snd, 0)).To(Equal("atlantic-2-0-p2p")) g.Expect(publishableServiceName(snd, 7)).To(Equal("atlantic-2-7-p2p")) @@ -119,7 +124,7 @@ func TestPublishableServiceName(t *testing.T) { func TestGeneratePublishableService_Annotations(t *testing.T) { g := NewWithT(t) snd := &seiv1alpha1.SeiNodeDeployment{ - ObjectMeta: metav1.ObjectMeta{Name: "atlantic-2", Namespace: "sei-test-1"}, + ObjectMeta: metav1.ObjectMeta{Name: testChainAtlantic, Namespace: "sei-test-1"}, } svc := generatePublishableService(snd, 0, "atlantic-2-0-p2p.atlantic-2.prod.platform.sei.io") From 5e44109ef6700d4be187f602a48e519edeefcdf3 Mon Sep 17 00:00:00 2001 From: bdchatham Date: Fri, 29 May 2026 10:30:51 -0700 Subject: [PATCH 5/6] =?UTF-8?q?refactor(publishable=E2=86=92p2pEndpoint):?= =?UTF-8?q?=20rename=20publishable*=20to=20p2pEndpoint*?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit "publishable" was internal jargon. Renamed to "p2pEndpoint" throughout for a self-documenting concept name. No behavior change. File renames: - publishable.go → p2p_endpoint.go - publishable_test.go → p2p_endpoint_test.go - envtest/publishable_p2p_test.go → envtest/p2p_endpoint_test.go API/CRD renames: - PublishableEndpoint → P2PEndpoint (struct + json:"p2pEndpoint") - PublishableEndpoints → P2PEndpoints (slice + json:"p2pEndpoints") - PublishableDomain → P2PEndpointDomain (reconciler field) - SEI_PUBLISHABLE_DOMAIN → SEI_P2P_ENDPOINT_DOMAIN (env) - PublishableServicesApplied → P2PEndpointsApplied (condition reason) - Service label value sei.io/component=p2p-lb → p2p-endpoint - Field manager nodedeployment-controller-publishable → -p2p-endpoint Plus a comment-conciseness sweep on the three doc-blocks called out in review (routeHostnameResolvable, reconcileNetworking, scaleDown's per-ordinal delete). Refs sei-protocol/sei-k8s-controller#365 Co-Authored-By: Claude Opus 4.7 (1M context) --- api/v1alpha1/seinode_types.go | 2 +- api/v1alpha1/seinodedeployment_types.go | 10 +- api/v1alpha1/zz_generated.deepcopy.go | 36 +++---- cmd/main.go | 4 +- config/crd/sei.io_seinodedeployments.yaml | 10 +- config/crd/sei.io_seinodes.yaml | 2 +- .../controller/nodedeployment/controller.go | 6 +- ...hable_p2p_test.go => p2p_endpoint_test.go} | 94 +++++++++--------- .../nodedeployment/envtest/suite_test.go | 2 +- .../controller/nodedeployment/networking.go | 42 +++----- internal/controller/nodedeployment/nodes.go | 21 ++-- .../{publishable.go => p2p_endpoint.go} | 95 +++++++++---------- ...blishable_test.go => p2p_endpoint_test.go} | 40 ++++---- internal/controller/nodedeployment/status.go | 8 +- manifests/sei.io_seinodedeployments.yaml | 10 +- manifests/sei.io_seinodes.yaml | 2 +- 16 files changed, 182 insertions(+), 202 deletions(-) rename internal/controller/nodedeployment/envtest/{publishable_p2p_test.go => p2p_endpoint_test.go} (82%) rename internal/controller/nodedeployment/{publishable.go => p2p_endpoint.go} (58%) rename internal/controller/nodedeployment/{publishable_test.go => p2p_endpoint_test.go} (77%) diff --git a/api/v1alpha1/seinode_types.go b/api/v1alpha1/seinode_types.go index 9fe598dc..07b98c35 100644 --- a/api/v1alpha1/seinode_types.go +++ b/api/v1alpha1/seinode_types.go @@ -14,7 +14,7 @@ import ( type SeiNodeSpec struct { // ChainID of the chain this node belongs to. // Constrained to DNS-1123 label characters because the controller composes - // it into publishable hostnames (e.g. `-p2p..`) when + // it into P2P endpoint hostnames (e.g. `-p2p..`) when // the parent SND opts into TCP networking; the address is a one-way door // once peers cache it. // +kubebuilder:validation:MinLength=1 diff --git a/api/v1alpha1/seinodedeployment_types.go b/api/v1alpha1/seinodedeployment_types.go index 7d9dc755..2aa97c52 100644 --- a/api/v1alpha1/seinodedeployment_types.go +++ b/api/v1alpha1/seinodedeployment_types.go @@ -72,7 +72,7 @@ type UpdateStrategy struct { type GenesisCeremonyConfig struct { // ChainID for the new network. // Constrained to DNS-1123 label characters because child SeiNodes - // compose it into publishable hostnames when the SND has + // compose it into P2P endpoint hostnames when the SND has // `spec.networking.tcp` set; the address is a one-way door once peers // cache it. // +kubebuilder:validation:MinLength=1 @@ -348,7 +348,7 @@ type NetworkingStatus struct { // +optional Routes []RouteStatus `json:"routes,omitempty"` - // PublishableEndpoints lists the per-ordinal publishable P2P + // P2PEndpoints lists the per-ordinal P2P endpoint // hostnames stamped by the SND when `spec.networking.tcp` is set. // Each entry mirrors the value injected into the child SeiNode's // `spec.externalAddress` (hostname:port). Hostnames are deterministic @@ -357,7 +357,7 @@ type NetworkingStatus struct { // +listType=map // +listMapKey=ordinal // +optional - PublishableEndpoints []PublishableEndpoint `json:"publishableEndpoints,omitempty"` + P2PEndpoints []P2PEndpoint `json:"p2pEndpoints,omitempty"` } // RouteStatus is the observed state of a single HTTPRoute hostname. @@ -370,9 +370,9 @@ type RouteStatus struct { Protocol string `json:"protocol,omitempty"` } -// PublishableEndpoint is the observed state of one child's publishable +// P2PEndpoint is the observed state of one child's publishable // P2P endpoint. Stamped by the SND networking reconciler. -type PublishableEndpoint struct { +type P2PEndpoint struct { // Ordinal is the child's replica index within the SND // (matches `sei.io/nodedeployment-ordinal`). Ordinal int32 `json:"ordinal"` diff --git a/api/v1alpha1/zz_generated.deepcopy.go b/api/v1alpha1/zz_generated.deepcopy.go index 0a9bf9ca..f9777d76 100644 --- a/api/v1alpha1/zz_generated.deepcopy.go +++ b/api/v1alpha1/zz_generated.deepcopy.go @@ -466,9 +466,9 @@ func (in *NetworkingStatus) DeepCopyInto(out *NetworkingStatus) { *out = make([]RouteStatus, len(*in)) copy(*out, *in) } - if in.PublishableEndpoints != nil { - in, out := &in.PublishableEndpoints, &out.PublishableEndpoints - *out = make([]PublishableEndpoint, len(*in)) + if in.P2PEndpoints != nil { + in, out := &in.P2PEndpoints, &out.P2PEndpoints + *out = make([]P2PEndpoint, len(*in)) copy(*out, *in) } } @@ -538,6 +538,21 @@ func (in *OperatorKeyringSource) DeepCopy() *OperatorKeyringSource { return out } +// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. +func (in *P2PEndpoint) DeepCopyInto(out *P2PEndpoint) { + *out = *in +} + +// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new P2PEndpoint. +func (in *P2PEndpoint) DeepCopy() *P2PEndpoint { + if in == nil { + return nil + } + out := new(P2PEndpoint) + in.DeepCopyInto(out) + return out +} + // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *PassphraseSecretRef) DeepCopyInto(out *PassphraseSecretRef) { *out = *in @@ -638,21 +653,6 @@ func (in *PlannedTask) DeepCopy() *PlannedTask { return out } -// DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. -func (in *PublishableEndpoint) DeepCopyInto(out *PublishableEndpoint) { - *out = *in -} - -// DeepCopy is an autogenerated deepcopy function, copying the receiver, creating a new PublishableEndpoint. -func (in *PublishableEndpoint) DeepCopy() *PublishableEndpoint { - if in == nil { - return nil - } - out := new(PublishableEndpoint) - in.DeepCopyInto(out) - return out -} - // DeepCopyInto is an autogenerated deepcopy function, copying the receiver, writing into out. in must be non-nil. func (in *ReplayerSpec) DeepCopyInto(out *ReplayerSpec) { *out = *in diff --git a/cmd/main.go b/cmd/main.go index d8eb4498..1d87d0d2 100644 --- a/cmd/main.go +++ b/cmd/main.go @@ -213,7 +213,7 @@ func main() { os.Exit(1) } - publishableDomain := os.Getenv("SEI_PUBLISHABLE_DOMAIN") + p2pEndpointDomain := os.Getenv("SEI_P2P_ENDPOINT_DOMAIN") //nolint:staticcheck // migrating to events.EventRecorder API is a separate effort recorder := mgr.GetEventRecorderFor("seinodedeployment-controller") @@ -225,7 +225,7 @@ func main() { GatewayNamespace: platformCfg.GatewayNamespace, GatewayDomain: platformCfg.GatewayDomain, GatewayPublicDomain: platformCfg.GatewayPublicDomain, - PublishableDomain: publishableDomain, + P2PEndpointDomain: p2pEndpointDomain, PlanExecutor: &planner.Executor[*seiv1alpha1.SeiNodeDeployment]{ ConfigFor: func(ctx context.Context, group *seiv1alpha1.SeiNodeDeployment) task.ExecutionConfig { var assemblerNode *seiv1alpha1.SeiNode diff --git a/config/crd/sei.io_seinodedeployments.yaml b/config/crd/sei.io_seinodedeployments.yaml index 75167670..56b6d451 100644 --- a/config/crd/sei.io_seinodedeployments.yaml +++ b/config/crd/sei.io_seinodedeployments.yaml @@ -109,7 +109,7 @@ spec: description: |- ChainID for the new network. Constrained to DNS-1123 label characters because child SeiNodes - compose it into publishable hostnames when the SND has + compose it into P2P endpoint hostnames when the SND has `spec.networking.tcp` set; the address is a one-way door once peers cache it. maxLength: 64 @@ -230,7 +230,7 @@ spec: description: |- ChainID of the chain this node belongs to. Constrained to DNS-1123 label characters because the controller composes - it into publishable hostnames (e.g. `-p2p..`) when + it into P2P endpoint hostnames (e.g. `-p2p..`) when the parent SND opts into TCP networking; the address is a one-way door once peers cache it. minLength: 1 @@ -1090,9 +1090,9 @@ spec: description: NetworkingStatus reports the observed state of networking resources. properties: - publishableEndpoints: + p2pEndpoints: description: |- - PublishableEndpoints lists the per-ordinal publishable P2P + P2PEndpoints lists the per-ordinal P2P endpoint hostnames stamped by the SND when `spec.networking.tcp` is set. Each entry mirrors the value injected into the child SeiNode's `spec.externalAddress` (hostname:port). Hostnames are deterministic @@ -1100,7 +1100,7 @@ spec: Service status back. items: description: |- - PublishableEndpoint is the observed state of one child's publishable + P2PEndpoint is the observed state of one child's publishable P2P endpoint. Stamped by the SND networking reconciler. properties: hostname: diff --git a/config/crd/sei.io_seinodes.yaml b/config/crd/sei.io_seinodes.yaml index 268716a0..8b2f86e2 100644 --- a/config/crd/sei.io_seinodes.yaml +++ b/config/crd/sei.io_seinodes.yaml @@ -91,7 +91,7 @@ spec: description: |- ChainID of the chain this node belongs to. Constrained to DNS-1123 label characters because the controller composes - it into publishable hostnames (e.g. `-p2p..`) when + it into P2P endpoint hostnames (e.g. `-p2p..`) when the parent SND opts into TCP networking; the address is a one-way door once peers cache it. minLength: 1 diff --git a/internal/controller/nodedeployment/controller.go b/internal/controller/nodedeployment/controller.go index ea543eb1..799160b7 100644 --- a/internal/controller/nodedeployment/controller.go +++ b/internal/controller/nodedeployment/controller.go @@ -42,9 +42,9 @@ type SeiNodeDeploymentReconciler struct { GatewayDomain string GatewayPublicDomain string - // PublishableDomain is the DNS zone for per-pod publishable hostnames, - // from SEI_PUBLISHABLE_DOMAIN. Empty disables the publishable path. - PublishableDomain string + // P2PEndpointDomain is the DNS zone for per-pod P2P endpoint hostnames, + // from SEI_P2P_ENDPOINT_DOMAIN. Empty disables the P2P endpoint path. + P2PEndpointDomain string // PlanExecutor drives group-level task plans (e.g. genesis assembly). PlanExecutor planner.PlanExecutor[*seiv1alpha1.SeiNodeDeployment] diff --git a/internal/controller/nodedeployment/envtest/publishable_p2p_test.go b/internal/controller/nodedeployment/envtest/p2p_endpoint_test.go similarity index 82% rename from internal/controller/nodedeployment/envtest/publishable_p2p_test.go rename to internal/controller/nodedeployment/envtest/p2p_endpoint_test.go index 76d6163d..e4477e04 100644 --- a/internal/controller/nodedeployment/envtest/publishable_p2p_test.go +++ b/internal/controller/nodedeployment/envtest/p2p_endpoint_test.go @@ -18,23 +18,23 @@ import ( "github.com/sei-protocol/sei-k8s-controller/internal/controller/nodedeployment/envtest/fixtures" ) -const publishableTestDomain = "test.platform.sei.io" +const p2pEndpointTestDomain = "test.platform.sei.io" -func expectedPublishableHost(sndName, chainID string, ordinal int) string { - return fmt.Sprintf("%s-%d-p2p.%s.%s", sndName, ordinal, chainID, publishableTestDomain) +func expectedP2PEndpointHost(sndName, chainID string, ordinal int) string { + return fmt.Sprintf("%s-%d-p2p.%s.%s", sndName, ordinal, chainID, p2pEndpointTestDomain) } -func expectedPublishableAddr(sndName, chainID string, ordinal int) string { - return expectedPublishableHost(sndName, chainID, ordinal) + ":26656" +func expectedP2PEndpointAddr(sndName, chainID string, ordinal int) string { + return expectedP2PEndpointHost(sndName, chainID, ordinal) + ":26656" } -// withPublishableDomain sets the test domain on the shared reconciler and +// withP2PEndpointDomain sets the test domain on the shared reconciler and // restores it after the test. -func withPublishableDomain(t *testing.T, domain string) { +func withP2PEndpointDomain(t *testing.T, domain string) { t.Helper() - prev := testSNDReconciler.PublishableDomain - testSNDReconciler.PublishableDomain = domain - t.Cleanup(func() { testSNDReconciler.PublishableDomain = prev }) + prev := testSNDReconciler.P2PEndpointDomain + testSNDReconciler.P2PEndpointDomain = domain + t.Cleanup(func() { testSNDReconciler.P2PEndpointDomain = prev }) } // withTCP enables `spec.networking.tcp` on a fixtures-built SND. Goes @@ -50,10 +50,10 @@ func withTCP() fixtures.Option { } // Happy path: TCP-enabled SND produces a child with Spec.ExternalAddress, -// a per-pod LB Service, and a PublishableEndpoints entry. -func TestPublishableP2P_CreateWithTCP_ChildHasAddressAndServiceExists(t *testing.T) { +// a per-pod LB Service, and a P2PEndpoints entry. +func TestP2PEndpointP2P_CreateWithTCP_ChildHasAddressAndServiceExists(t *testing.T) { g := NewWithT(t) - withPublishableDomain(t, publishableTestDomain) + withP2PEndpointDomain(t, p2pEndpointTestDomain) ns := makeNamespace(t) snd := fixtures.NewSND(ns, "pub-create", @@ -63,8 +63,8 @@ func TestPublishableP2P_CreateWithTCP_ChildHasAddressAndServiceExists(t *testing g.Expect(testCli.Create(testCtx, snd)).To(Succeed()) const chainID = "pacific-1" // fixtures default - wantHost := expectedPublishableHost(snd.Name, chainID, 0) - wantAddr := expectedPublishableAddr(snd.Name, chainID, 0) + wantHost := expectedP2PEndpointHost(snd.Name, chainID, 0) + wantAddr := expectedP2PEndpointAddr(snd.Name, chainID, 0) // 1. Child SeiNode is created with Spec.ExternalAddress already set. // The brief calls out "child appears in the API with the value @@ -83,7 +83,7 @@ func TestPublishableP2P_CreateWithTCP_ChildHasAddressAndServiceExists(t *testing waitFor(t, func() bool { svc := &corev1.Service{} return testCli.Get(testCtx, svcKey, svc) == nil - }, "publishable Service "+svcKey.Name+" created") + }, "P2P endpoint Service "+svcKey.Name+" created") svc := &corev1.Service{} g.Expect(testCli.Get(testCtx, svcKey, svc)).To(Succeed()) @@ -107,23 +107,23 @@ func TestPublishableP2P_CreateWithTCP_ChildHasAddressAndServiceExists(t *testing g.Expect(refs[0].Controller).NotTo(BeNil()) g.Expect(*refs[0].Controller).To(BeTrue()) - // 4. PublishableEndpoints surfaces the same hostname the child got. + // 4. P2PEndpoints surfaces the same hostname the child got. waitForStatus(t, client.ObjectKeyFromObject(snd), func(latest *seiv1alpha1.SeiNodeDeployment) bool { ns := latest.Status.NetworkingStatus - if ns == nil || len(ns.PublishableEndpoints) != 1 { + if ns == nil || len(ns.P2PEndpoints) != 1 { return false } - ep := ns.PublishableEndpoints[0] + ep := ns.P2PEndpoints[0] return ep.Ordinal == 0 && ep.SeiNodeName == snd.Name+"-0" && ep.Hostname == wantAddr - }, "PublishableEndpoints stamped with deterministic vanity host") + }, "P2PEndpoints stamped with deterministic vanity host") } // Opt-out: removing TCP clears Spec.ExternalAddress and deletes the Service. -func TestPublishableP2P_OptOut_ClearsAddressAndDeletesService(t *testing.T) { +func TestP2PEndpointP2P_OptOut_ClearsAddressAndDeletesService(t *testing.T) { g := NewWithT(t) - withPublishableDomain(t, publishableTestDomain) + withP2PEndpointDomain(t, p2pEndpointTestDomain) ns := makeNamespace(t) snd := fixtures.NewSND(ns, "pub-optout", @@ -133,7 +133,7 @@ func TestPublishableP2P_OptOut_ClearsAddressAndDeletesService(t *testing.T) { g.Expect(testCli.Create(testCtx, snd)).To(Succeed()) const chainID = "pacific-1" - wantAddr := expectedPublishableAddr(snd.Name, chainID, 0) + wantAddr := expectedP2PEndpointAddr(snd.Name, chainID, 0) childKey := types.NamespacedName{Name: snd.Name + "-0", Namespace: ns} svcKey := types.NamespacedName{Name: snd.Name + "-0-p2p", Namespace: ns} @@ -147,7 +147,7 @@ func TestPublishableP2P_OptOut_ClearsAddressAndDeletesService(t *testing.T) { }, "child has publishable address before opt-out") waitFor(t, func() bool { return testCli.Get(testCtx, svcKey, &corev1.Service{}) == nil - }, "publishable Service exists before opt-out") + }, "P2P endpoint Service exists before opt-out") // Opt out: clear TCP. The SND retains an empty `networking` block // (HTTPEnabled() back-compat will treat it as HTTP-enabled, but the @@ -172,19 +172,19 @@ func TestPublishableP2P_OptOut_ClearsAddressAndDeletesService(t *testing.T) { waitFor(t, func() bool { err := testCli.Get(testCtx, svcKey, &corev1.Service{}) return apierrors.IsNotFound(err) - }, "publishable Service deleted after opt-out") + }, "P2P endpoint Service deleted after opt-out") - // PublishableEndpoints is cleared (or NetworkingStatus is nil). + // P2PEndpoints is cleared (or NetworkingStatus is nil). waitForStatus(t, client.ObjectKeyFromObject(snd), func(latest *seiv1alpha1.SeiNodeDeployment) bool { return latest.Status.NetworkingStatus == nil || - len(latest.Status.NetworkingStatus.PublishableEndpoints) == 0 - }, "PublishableEndpoints cleared after opt-out") + len(latest.Status.NetworkingStatus.P2PEndpoints) == 0 + }, "P2PEndpoints cleared after opt-out") } // Re-opt-in: TCP restored brings the address back and recreates the Service. -func TestPublishableP2P_ReOptIn_RestoresAddressAndService(t *testing.T) { +func TestP2PEndpointP2P_ReOptIn_RestoresAddressAndService(t *testing.T) { g := NewWithT(t) - withPublishableDomain(t, publishableTestDomain) + withP2PEndpointDomain(t, p2pEndpointTestDomain) ns := makeNamespace(t) snd := fixtures.NewSND(ns, "pub-reoptin", @@ -194,7 +194,7 @@ func TestPublishableP2P_ReOptIn_RestoresAddressAndService(t *testing.T) { g.Expect(testCli.Create(testCtx, snd)).To(Succeed()) const chainID = "pacific-1" - wantAddr := expectedPublishableAddr(snd.Name, chainID, 0) + wantAddr := expectedP2PEndpointAddr(snd.Name, chainID, 0) childKey := types.NamespacedName{Name: snd.Name + "-0", Namespace: ns} svcKey := types.NamespacedName{Name: snd.Name + "-0-p2p", Namespace: ns} @@ -234,13 +234,13 @@ func TestPublishableP2P_ReOptIn_RestoresAddressAndService(t *testing.T) { // Service is recreated. waitFor(t, func() bool { return testCli.Get(testCtx, svcKey, &corev1.Service{}) == nil - }, "publishable Service recreated after re-opt-in") + }, "P2P endpoint Service recreated after re-opt-in") } // Scale-down deletes the ordinal's Service before the child SeiNode. -func TestPublishableP2P_ScaleDown_DeletesOrdinalServiceBeforeChild(t *testing.T) { +func TestP2PEndpointP2P_ScaleDown_DeletesOrdinalServiceBeforeChild(t *testing.T) { g := NewWithT(t) - withPublishableDomain(t, publishableTestDomain) + withP2PEndpointDomain(t, p2pEndpointTestDomain) ns := makeNamespace(t) snd := fixtures.NewSND(ns, "pub-scale", @@ -249,12 +249,12 @@ func TestPublishableP2P_ScaleDown_DeletesOrdinalServiceBeforeChild(t *testing.T) ) g.Expect(testCli.Create(testCtx, snd)).To(Succeed()) - // Wait for both replicas to converge with their publishable Services. + // Wait for both replicas to converge with their P2P endpoint Services. for i := 0; i < 2; i++ { svcKey := types.NamespacedName{Name: fmt.Sprintf("%s-%d-p2p", snd.Name, i), Namespace: ns} waitFor(t, func() bool { return testCli.Get(testCtx, svcKey, &corev1.Service{}) == nil - }, fmt.Sprintf("publishable Service %s created", svcKey.Name)) + }, fmt.Sprintf("P2P endpoint Service %s created", svcKey.Name)) } // Scale down to 1 replica. @@ -268,7 +268,7 @@ func TestPublishableP2P_ScaleDown_DeletesOrdinalServiceBeforeChild(t *testing.T) waitFor(t, func() bool { err := testCli.Get(testCtx, scaledOutKey, &corev1.Service{}) return apierrors.IsNotFound(err) - }, "ordinal-1 publishable Service deleted on scale-down") + }, "ordinal-1 P2P endpoint Service deleted on scale-down") // ordinal-0 Service survives. survivingKey := types.NamespacedName{Name: snd.Name + "-0-p2p", Namespace: ns} @@ -278,9 +278,9 @@ func TestPublishableP2P_ScaleDown_DeletesOrdinalServiceBeforeChild(t *testing.T) } // Standalone SeiNodes own Spec.ExternalAddress directly; no SND touches it. -func TestPublishableP2P_StandaloneSeiNode_PreservesUserAddress(t *testing.T) { +func TestP2PEndpointP2P_StandaloneSeiNode_PreservesUserAddress(t *testing.T) { g := NewWithT(t) - withPublishableDomain(t, publishableTestDomain) + withP2PEndpointDomain(t, p2pEndpointTestDomain) ns := makeNamespace(t) addr := "custom.example.com:26656" @@ -308,9 +308,9 @@ func TestPublishableP2P_StandaloneSeiNode_PreservesUserAddress(t *testing.T) { } // kubectl-edit stomp: external clear of Spec.ExternalAddress reconverges via ensureSeiNode. -func TestPublishableP2P_KubectlEditStomp_ReconverresViaEnsureSeiNode(t *testing.T) { +func TestP2PEndpointP2P_KubectlEditStomp_ReconverresViaEnsureSeiNode(t *testing.T) { g := NewWithT(t) - withPublishableDomain(t, publishableTestDomain) + withP2PEndpointDomain(t, p2pEndpointTestDomain) ns := makeNamespace(t) snd := fixtures.NewSND(ns, "pub-stomp", @@ -320,7 +320,7 @@ func TestPublishableP2P_KubectlEditStomp_ReconverresViaEnsureSeiNode(t *testing. g.Expect(testCli.Create(testCtx, snd)).To(Succeed()) const chainID = "pacific-1" - wantAddr := expectedPublishableAddr(snd.Name, chainID, 0) + wantAddr := expectedP2PEndpointAddr(snd.Name, chainID, 0) childKey := types.NamespacedName{Name: snd.Name + "-0", Namespace: ns} waitFor(t, func() bool { @@ -369,12 +369,12 @@ func TestPublishableP2P_KubectlEditStomp_ReconverresViaEnsureSeiNode(t *testing. }, "child reconverged via ensureSeiNode after external stomp") } -// TestPublishableP2P_NoDomainConfigured_SkipsServices verifies the silent -// no-op path: when PublishableDomain is unset, TCP-enabled SNDs get no -// publishable Service and no child ExternalAddress. -func TestPublishableP2P_NoDomainConfigured_SkipsServices(t *testing.T) { +// TestP2PEndpointP2P_NoDomainConfigured_SkipsServices verifies the silent +// no-op path: when P2PEndpointDomain is unset, TCP-enabled SNDs get no +// P2P endpoint Service and no child ExternalAddress. +func TestP2PEndpointP2P_NoDomainConfigured_SkipsServices(t *testing.T) { g := NewWithT(t) - withPublishableDomain(t, "") + withP2PEndpointDomain(t, "") ns := makeNamespace(t) snd := fixtures.NewSND(ns, "pub-no-domain", diff --git a/internal/controller/nodedeployment/envtest/suite_test.go b/internal/controller/nodedeployment/envtest/suite_test.go index ffe97fbb..40e4aa35 100644 --- a/internal/controller/nodedeployment/envtest/suite_test.go +++ b/internal/controller/nodedeployment/envtest/suite_test.go @@ -207,7 +207,7 @@ func run(m *testing.M) (int, error) { GatewayNamespace: "gateway", GatewayDomain: "test.local", GatewayPublicDomain: "", - PublishableDomain: "", + P2PEndpointDomain: "", PlanExecutor: &planner.Executor[*seiv1alpha1.SeiNodeDeployment]{ ConfigFor: func(_ context.Context, group *seiv1alpha1.SeiNodeDeployment) task.ExecutionConfig { return task.ExecutionConfig{ diff --git a/internal/controller/nodedeployment/networking.go b/internal/controller/nodedeployment/networking.go index be316b3b..498b6f3f 100644 --- a/internal/controller/nodedeployment/networking.go +++ b/internal/controller/nodedeployment/networking.go @@ -38,15 +38,8 @@ type effectiveRoute struct { WSPort int32 } -// routeHostnameResolvable returns true when the deployment's first public -// HTTP hostname resolves in DNS, indicating the HTTPRoute + External-DNS -// pipeline is ready. Returns true when no HTTP routes are expected -// (TCP-only deployments, validator mode, private deployments). -// -// The publishable-P2P hostname has no parallel resolvability gate. CometBFT -// peers retry; the controller does not block STS creation on external-dns -// catching up — that would re-introduce the layering inversion the original -// PR #362 design suffered from. +// routeHostnameResolvable reports whether the first HTTP route hostname +// resolves in DNS. Returns true when no HTTP routes are expected. func (r *SeiNodeDeploymentReconciler) routeHostnameResolvable(ctx context.Context, group *seiv1alpha1.SeiNodeDeployment) bool { if !group.Spec.Networking.HTTPEnabled() { return true @@ -63,18 +56,9 @@ func (r *SeiNodeDeploymentReconciler) routeHostnameResolvable(ctx context.Contex return true } -// reconcileNetworking dispatches to two independent sub-reconcilers gated -// on independent presence checks. HTTP (L7 Gateway exposure) and TCP -// (per-pod L4 NLB for P2P) share only the group-level resource labels, -// the `fieldOwner` constant, and the `ConditionNetworkingReady` condition -// vocabulary; they do not nest. -// -// Condition contract: each branch sets `ConditionNetworkingReady` to -// reflect *its* terminal state. When both are enabled, the TCP branch -// runs second and its outcome wins; that's deliberate — TCP is the -// load-bearing path for publishable validators, and the operator wants -// to know its readiness first. When neither is enabled, -// `NetworkingDisabled/false` describes the steady state. +// reconcileNetworking runs the HTTP (L7 Gateway) and TCP (per-pod L4 NLB) +// sub-reconcilers independently. Both write ConditionNetworkingReady; when +// both are enabled the TCP branch runs second and its outcome wins. func (r *SeiNodeDeploymentReconciler) reconcileNetworking(ctx context.Context, group *seiv1alpha1.SeiNodeDeployment) error { if group.Spec.Networking == nil { setCondition(group, seiv1alpha1.ConditionNetworkingReady, metav1.ConditionFalse, @@ -98,13 +82,13 @@ func (r *SeiNodeDeploymentReconciler) reconcileNetworking(ctx context.Context, g } } - if group.Spec.Networking.TCPEnabled() && r.PublishableDomain != "" { - if err := r.reconcilePublishableServices(ctx, group); err != nil { - return fmt.Errorf("reconciling publishable services: %w", err) + if group.Spec.Networking.TCPEnabled() && r.P2PEndpointDomain != "" { + if err := r.reconcileP2PEndpoints(ctx, group); err != nil { + return fmt.Errorf("reconciling P2P endpoints: %w", err) } } else { - if err := r.deletePublishableServices(ctx, group); err != nil { - return fmt.Errorf("deleting publishable services: %w", err) + if err := r.deleteP2PEndpoints(ctx, group); err != nil { + return fmt.Errorf("deleting P2P endpoints: %w", err) } } @@ -403,8 +387,8 @@ func (r *SeiNodeDeploymentReconciler) deleteNetworkingResources(ctx context.Cont if err := r.deleteHTTPRoutesByLabel(ctx, group); err != nil { return fmt.Errorf("deleting HTTPRoutes: %w", err) } - if err := r.deletePublishableServices(ctx, group); err != nil { - return fmt.Errorf("deleting publishable Services: %w", err) + if err := r.deleteP2PEndpoints(ctx, group); err != nil { + return fmt.Errorf("deleting P2P endpoint Services: %w", err) } return nil } @@ -433,7 +417,7 @@ func (r *SeiNodeDeploymentReconciler) orphanNetworkingResources(ctx context.Cont } } - if err := r.orphanPublishableServices(ctx, group); err != nil { + if err := r.orphanP2PEndpoints(ctx, group); err != nil { return err } diff --git a/internal/controller/nodedeployment/nodes.go b/internal/controller/nodedeployment/nodes.go index c00b9a79..1abcd4b0 100644 --- a/internal/controller/nodedeployment/nodes.go +++ b/internal/controller/nodedeployment/nodes.go @@ -175,7 +175,7 @@ func (r *SeiNodeDeploymentReconciler) populateIncumbentNodes(ctx context.Context func (r *SeiNodeDeploymentReconciler) ensureSeiNode(ctx context.Context, group *seiv1alpha1.SeiNodeDeployment, ordinal int) error { desired := generateSeiNode(group, ordinal) - desired.Spec.ExternalAddress = r.publishableExternalAddressForChild(group, ordinal) + desired.Spec.ExternalAddress = r.p2pEndpointAddressForChild(group, ordinal) if err := ctrl.SetControllerReference(group, desired, r.Scheme); err != nil { return fmt.Errorf("setting owner reference: %w", err) } @@ -273,13 +273,13 @@ func generateSeiNode(group *seiv1alpha1.SeiNodeDeployment, ordinal int) *seiv1al } } -// publishableExternalAddressForChild returns the vanity host:port when the +// p2pEndpointAddressForChild returns the vanity host:port when the // SND opts into TCP networking, "" otherwise. -func (r *SeiNodeDeploymentReconciler) publishableExternalAddressForChild(group *seiv1alpha1.SeiNodeDeployment, ordinal int) string { +func (r *SeiNodeDeploymentReconciler) p2pEndpointAddressForChild(group *seiv1alpha1.SeiNodeDeployment, ordinal int) string { if !group.Spec.Networking.TCPEnabled() { return "" } - return r.publishableExternalAddress(group, ordinal) + return r.p2pEndpointAddress(group, ordinal) } // scaleDown deletes SeiNodes with ordinals >= the desired replica count. @@ -310,14 +310,11 @@ func (r *SeiNodeDeploymentReconciler) scaleDown(ctx context.Context, group *seiv continue } if ord >= int(group.Spec.Replicas) { - // Delete the per-ordinal publishable Service before the child - // SeiNode. Both are SND-owned, so owner-ref cascade would - // only fire on SND deletion — leaving the NLB stranded with - // no targets between scale-down and SND delete is the failure - // we explicitly avoid. Idempotent: no-op when publishability - // isn't in use. - if err := r.deletePublishableServiceForOrdinal(ctx, group, ord); err != nil { - return fmt.Errorf("deleting publishable Service for scaled-down ordinal %d: %w", ord, err) + // Delete the per-ordinal P2P endpoint Service before the + // child so the NLB is not stranded between scale-down and + // SND delete. Idempotent. + if err := r.deleteP2PEndpointForOrdinal(ctx, group, ord); err != nil { + return fmt.Errorf("deleting P2P endpoint Service for scaled-down ordinal %d: %w", ord, err) } if err := r.Delete(ctx, node); err != nil && !apierrors.IsNotFound(err) { return fmt.Errorf("deleting excess SeiNode %s: %w", node.Name, err) diff --git a/internal/controller/nodedeployment/publishable.go b/internal/controller/nodedeployment/p2p_endpoint.go similarity index 58% rename from internal/controller/nodedeployment/publishable.go rename to internal/controller/nodedeployment/p2p_endpoint.go index d1077928..dd9b0a31 100644 --- a/internal/controller/nodedeployment/publishable.go +++ b/internal/controller/nodedeployment/p2p_endpoint.go @@ -1,7 +1,6 @@ -// Package nodedeployment — publishable.go is the SND-side sub-reconciler -// for per-pod P2P LoadBalancer Services. Hostnames are derived from SND -// identity; external-dns creates the CNAME to the NLB. The child SeiNode -// reconciler is unaware and just reads Spec.ExternalAddress. +// p2p_endpoint.go provisions per-pod P2P endpoint Services (LoadBalancer) +// and the deterministic vanity hostname for each. external-dns CNAMEs the +// hostname to the NLB; the child SeiNode reads Spec.ExternalAddress. package nodedeployment import ( @@ -23,10 +22,10 @@ import ( ) const ( - publishableFieldOwner = client.FieldOwner("nodedeployment-controller-publishable") + p2pEndpointFieldOwner = client.FieldOwner("nodedeployment-controller-p2p-endpoint") - publishableComponentLabel = "sei.io/component" - publishableComponentValue = "p2p-lb" + p2pEndpointComponentLabel = "sei.io/component" + p2pEndpointComponentValue = "p2p-endpoint" externalDNSHostnameAnnotation = "external-dns.alpha.kubernetes.io/hostname" @@ -39,7 +38,7 @@ const ( awsLBTargetTypeValue = "ip" awsLBCrossZoneAttributeStr = "load_balancing.cross_zone.enabled=true" - publishableServiceSuffix = "-p2p" + p2pEndpointServiceSuffix = "-p2p" ) // effectiveChainID returns the chain ID for child SeiNodes, mirroring @@ -54,72 +53,72 @@ func effectiveChainID(group *seiv1alpha1.SeiNodeDeployment) string { return "" } -// publishableHostname returns "-p2p.." or "" when -// the publishable path is not configured. -func (r *SeiNodeDeploymentReconciler) publishableHostname(group *seiv1alpha1.SeiNodeDeployment, ordinal int) string { - if r.PublishableDomain == "" { +// p2pEndpointHostname returns "-p2p.." or "" when +// the P2P endpoint path is not configured. +func (r *SeiNodeDeploymentReconciler) p2pEndpointHostname(group *seiv1alpha1.SeiNodeDeployment, ordinal int) string { + if r.P2PEndpointDomain == "" { return "" } chainID := effectiveChainID(group) if chainID == "" { return "" } - return fmt.Sprintf("%s-p2p.%s.%s", seiNodeName(group, ordinal), chainID, r.PublishableDomain) + return fmt.Sprintf("%s-p2p.%s.%s", seiNodeName(group, ordinal), chainID, r.P2PEndpointDomain) } -// publishableExternalAddress returns ":" or "" when the +// p2pEndpointAddress returns ":" or "" when the // hostname is unavailable. -func (r *SeiNodeDeploymentReconciler) publishableExternalAddress(group *seiv1alpha1.SeiNodeDeployment, ordinal int) string { - host := r.publishableHostname(group, ordinal) +func (r *SeiNodeDeploymentReconciler) p2pEndpointAddress(group *seiv1alpha1.SeiNodeDeployment, ordinal int) string { + host := r.p2pEndpointHostname(group, ordinal) if host == "" { return "" } return fmt.Sprintf("%s:%d", host, seiconfig.PortP2P) } -func publishableServiceName(group *seiv1alpha1.SeiNodeDeployment, ordinal int) string { - return seiNodeName(group, ordinal) + publishableServiceSuffix +func p2pEndpointServiceName(group *seiv1alpha1.SeiNodeDeployment, ordinal int) string { + return seiNodeName(group, ordinal) + p2pEndpointServiceSuffix } -// reconcilePublishableServices server-side-applies one LoadBalancer Service +// reconcileP2PEndpoints server-side-applies one LoadBalancer Service // per desired ordinal and trims orphans. -func (r *SeiNodeDeploymentReconciler) reconcilePublishableServices(ctx context.Context, group *seiv1alpha1.SeiNodeDeployment) error { +func (r *SeiNodeDeploymentReconciler) reconcileP2PEndpoints(ctx context.Context, group *seiv1alpha1.SeiNodeDeployment) error { desiredNames := make(map[string]struct{}, group.Spec.Replicas) for i := range int(group.Spec.Replicas) { - desired := generatePublishableService(group, i, r.publishableHostname(group, i)) + desired := generateP2PEndpointService(group, i, r.p2pEndpointHostname(group, i)) desired.SetGroupVersionKind(corev1.SchemeGroupVersion.WithKind("Service")) if err := ctrl.SetControllerReference(group, desired, r.Scheme); err != nil { - return fmt.Errorf("setting owner reference on publishable Service %s: %w", desired.Name, err) + return fmt.Errorf("setting owner reference on P2P endpoint Service %s: %w", desired.Name, err) } //nolint:staticcheck // SSA apply via untyped object mirrors the rest of this controller - if err := r.Patch(ctx, desired, client.Apply, publishableFieldOwner, client.ForceOwnership); err != nil { - return fmt.Errorf("applying publishable Service %s: %w", desired.Name, err) + if err := r.Patch(ctx, desired, client.Apply, p2pEndpointFieldOwner, client.ForceOwnership); err != nil { + return fmt.Errorf("applying P2P endpoint Service %s: %w", desired.Name, err) } desiredNames[desired.Name] = struct{}{} } - if err := r.deleteOrphanPublishableServices(ctx, group, desiredNames); err != nil { - return fmt.Errorf("trimming orphan publishable Services: %w", err) + if err := r.deleteOrphanP2PEndpoints(ctx, group, desiredNames); err != nil { + return fmt.Errorf("trimming orphan P2P endpoint Services: %w", err) } setCondition(group, seiv1alpha1.ConditionNetworkingReady, metav1.ConditionTrue, - "PublishableServicesApplied", - fmt.Sprintf("%d publishable Service(s) reconciled", len(desiredNames))) + "P2PEndpointsApplied", + fmt.Sprintf("%d P2P endpoint Service(s) reconciled", len(desiredNames))) return nil } -// deleteOrphanPublishableServices removes publishable Services owned by the +// deleteOrphanP2PEndpoints removes P2P endpoint Services owned by the // SND whose names are not in desiredNames. Pass nil to delete all. -func (r *SeiNodeDeploymentReconciler) deleteOrphanPublishableServices(ctx context.Context, group *seiv1alpha1.SeiNodeDeployment, desiredNames map[string]struct{}) error { +func (r *SeiNodeDeploymentReconciler) deleteOrphanP2PEndpoints(ctx context.Context, group *seiv1alpha1.SeiNodeDeployment, desiredNames map[string]struct{}) error { list := &corev1.ServiceList{} if err := r.List(ctx, list, client.InNamespace(group.Namespace), client.MatchingLabels{ groupLabel: group.Name, - publishableComponentLabel: publishableComponentValue, + p2pEndpointComponentLabel: p2pEndpointComponentValue, }, ); err != nil { - return fmt.Errorf("listing publishable Services: %w", err) + return fmt.Errorf("listing P2P endpoint Services: %w", err) } for i := range list.Items { svc := &list.Items[i] @@ -130,66 +129,66 @@ func (r *SeiNodeDeploymentReconciler) deleteOrphanPublishableServices(ctx contex continue } if err := r.Delete(ctx, svc); err != nil && !apierrors.IsNotFound(err) { - return fmt.Errorf("deleting orphan publishable Service %s: %w", svc.Name, err) + return fmt.Errorf("deleting orphan P2P endpoint Service %s: %w", svc.Name, err) } } return nil } -func (r *SeiNodeDeploymentReconciler) deletePublishableServices(ctx context.Context, group *seiv1alpha1.SeiNodeDeployment) error { - return r.deleteOrphanPublishableServices(ctx, group, nil) +func (r *SeiNodeDeploymentReconciler) deleteP2PEndpoints(ctx context.Context, group *seiv1alpha1.SeiNodeDeployment) error { + return r.deleteOrphanP2PEndpoints(ctx, group, nil) } -// orphanPublishableServices drops the SND owner ref so the Services survive +// orphanP2PEndpoints drops the SND owner ref so the Services survive // SND deletion under DeletionPolicyRetain. -func (r *SeiNodeDeploymentReconciler) orphanPublishableServices(ctx context.Context, group *seiv1alpha1.SeiNodeDeployment) error { +func (r *SeiNodeDeploymentReconciler) orphanP2PEndpoints(ctx context.Context, group *seiv1alpha1.SeiNodeDeployment) error { list := &corev1.ServiceList{} if err := r.List(ctx, list, client.InNamespace(group.Namespace), client.MatchingLabels{ groupLabel: group.Name, - publishableComponentLabel: publishableComponentValue, + p2pEndpointComponentLabel: p2pEndpointComponentValue, }, ); err != nil { - return fmt.Errorf("listing publishable Services for orphan: %w", err) + return fmt.Errorf("listing P2P endpoint Services for orphan: %w", err) } for i := range list.Items { if err := r.removeOwnerRef(ctx, &list.Items[i], group); err != nil { - return fmt.Errorf("orphaning publishable Service %s: %w", list.Items[i].Name, err) + return fmt.Errorf("orphaning P2P endpoint Service %s: %w", list.Items[i].Name, err) } } return nil } -// deletePublishableServiceForOrdinal removes the per-ordinal Service before +// deleteP2PEndpointForOrdinal removes the per-ordinal Service before // scaleDown deletes the child. Idempotent. -func (r *SeiNodeDeploymentReconciler) deletePublishableServiceForOrdinal(ctx context.Context, group *seiv1alpha1.SeiNodeDeployment, ordinal int) error { - name := publishableServiceName(group, ordinal) +func (r *SeiNodeDeploymentReconciler) deleteP2PEndpointForOrdinal(ctx context.Context, group *seiv1alpha1.SeiNodeDeployment, ordinal int) error { + name := p2pEndpointServiceName(group, ordinal) svc := &corev1.Service{} err := r.Get(ctx, types.NamespacedName{Name: name, Namespace: group.Namespace}, svc) if apierrors.IsNotFound(err) { return nil } if err != nil { - return fmt.Errorf("fetching publishable Service %s for delete: %w", name, err) + return fmt.Errorf("fetching P2P endpoint Service %s for delete: %w", name, err) } if !metav1.IsControlledBy(svc, group) { return nil } if err := r.Delete(ctx, svc); err != nil && !apierrors.IsNotFound(err) { - return fmt.Errorf("deleting publishable Service %s: %w", name, err) + return fmt.Errorf("deleting P2P endpoint Service %s: %w", name, err) } return nil } -func generatePublishableService(group *seiv1alpha1.SeiNodeDeployment, ordinal int, hostname string) *corev1.Service { - name := publishableServiceName(group, ordinal) +func generateP2PEndpointService(group *seiv1alpha1.SeiNodeDeployment, ordinal int, hostname string) *corev1.Service { + name := p2pEndpointServiceName(group, ordinal) childName := seiNodeName(group, ordinal) labels := map[string]string{ groupLabel: group.Name, groupOrdinalLabel: strconv.Itoa(ordinal), - publishableComponentLabel: publishableComponentValue, + p2pEndpointComponentLabel: p2pEndpointComponentValue, } annotations := map[string]string{ diff --git a/internal/controller/nodedeployment/publishable_test.go b/internal/controller/nodedeployment/p2p_endpoint_test.go similarity index 77% rename from internal/controller/nodedeployment/publishable_test.go rename to internal/controller/nodedeployment/p2p_endpoint_test.go index f80e4b63..46fce55f 100644 --- a/internal/controller/nodedeployment/publishable_test.go +++ b/internal/controller/nodedeployment/p2p_endpoint_test.go @@ -14,10 +14,10 @@ import ( const ( testChainAtlantic = "atlantic-2" - testPublishableProd = "prod.platform.sei.io" + testP2PEndpointProd = "prod.platform.sei.io" ) -func TestPublishableHostname_Table(t *testing.T) { +func TestP2PEndpointHostname_Table(t *testing.T) { t.Parallel() cases := []struct { name string @@ -25,7 +25,7 @@ func TestPublishableHostname_Table(t *testing.T) { templateChainID string genesisChainID string ordinal int - publishableDomain string + p2pEndpointDomain string want string }{ { @@ -33,7 +33,7 @@ func TestPublishableHostname_Table(t *testing.T) { sndName: testChainAtlantic, templateChainID: testChainAtlantic, ordinal: 0, - publishableDomain: testPublishableProd, + p2pEndpointDomain: testP2PEndpointProd, want: "atlantic-2-0-p2p.atlantic-2.prod.platform.sei.io", }, { @@ -41,7 +41,7 @@ func TestPublishableHostname_Table(t *testing.T) { sndName: "validators", templateChainID: testNamespace, ordinal: 5, - publishableDomain: testPublishableProd, + p2pEndpointDomain: testP2PEndpointProd, want: "validators-5-p2p.pacific-1.prod.platform.sei.io", }, { @@ -49,7 +49,7 @@ func TestPublishableHostname_Table(t *testing.T) { sndName: "newchain-validators", genesisChainID: "newchain-1", ordinal: 1, - publishableDomain: "test.platform.sei.io", + p2pEndpointDomain: "test.platform.sei.io", want: "newchain-validators-1-p2p.newchain-1.test.platform.sei.io", }, { @@ -57,14 +57,14 @@ func TestPublishableHostname_Table(t *testing.T) { sndName: testChainAtlantic, templateChainID: testChainAtlantic, ordinal: 0, - publishableDomain: "", + p2pEndpointDomain: "", want: "", }, { name: "empty chain ID returns empty", sndName: testChainAtlantic, ordinal: 0, - publishableDomain: testPublishableProd, + p2pEndpointDomain: testP2PEndpointProd, want: "", }, } @@ -83,13 +83,13 @@ func TestPublishableHostname_Table(t *testing.T) { if tc.genesisChainID != "" { snd.Spec.Genesis = &seiv1alpha1.GenesisCeremonyConfig{ChainID: tc.genesisChainID} } - r := &SeiNodeDeploymentReconciler{PublishableDomain: tc.publishableDomain} - g.Expect(r.publishableHostname(snd, tc.ordinal)).To(Equal(tc.want)) + r := &SeiNodeDeploymentReconciler{P2PEndpointDomain: tc.p2pEndpointDomain} + g.Expect(r.p2pEndpointHostname(snd, tc.ordinal)).To(Equal(tc.want)) }) } } -func TestPublishableExternalAddress_AppendsP2PPort(t *testing.T) { +func TestP2PEndpointExternalAddress_AppendsP2PPort(t *testing.T) { g := NewWithT(t) snd := &seiv1alpha1.SeiNodeDeployment{ ObjectMeta: metav1.ObjectMeta{Name: testChainAtlantic}, @@ -99,26 +99,26 @@ func TestPublishableExternalAddress_AppendsP2PPort(t *testing.T) { }, }, } - r := &SeiNodeDeploymentReconciler{PublishableDomain: testPublishableProd} - g.Expect(r.publishableExternalAddress(snd, 0)).To(Equal("atlantic-2-0-p2p.atlantic-2.prod.platform.sei.io:26656")) + r := &SeiNodeDeploymentReconciler{P2PEndpointDomain: testP2PEndpointProd} + g.Expect(r.p2pEndpointAddress(snd, 0)).To(Equal("atlantic-2-0-p2p.atlantic-2.prod.platform.sei.io:26656")) } -func TestPublishableExternalAddress_EmptyWhenHostnameRejected(t *testing.T) { +func TestP2PEndpointExternalAddress_EmptyWhenHostnameRejected(t *testing.T) { g := NewWithT(t) snd := &seiv1alpha1.SeiNodeDeployment{ ObjectMeta: metav1.ObjectMeta{Name: testChainAtlantic}, } - r := &SeiNodeDeploymentReconciler{PublishableDomain: testPublishableProd} - g.Expect(r.publishableExternalAddress(snd, 0)).To(BeEmpty()) + r := &SeiNodeDeploymentReconciler{P2PEndpointDomain: testP2PEndpointProd} + g.Expect(r.p2pEndpointAddress(snd, 0)).To(BeEmpty()) } -func TestPublishableServiceName(t *testing.T) { +func TestP2PEndpointServiceName(t *testing.T) { g := NewWithT(t) snd := &seiv1alpha1.SeiNodeDeployment{ ObjectMeta: metav1.ObjectMeta{Name: testChainAtlantic}, } - g.Expect(publishableServiceName(snd, 0)).To(Equal("atlantic-2-0-p2p")) - g.Expect(publishableServiceName(snd, 7)).To(Equal("atlantic-2-7-p2p")) + g.Expect(p2pEndpointServiceName(snd, 0)).To(Equal("atlantic-2-0-p2p")) + g.Expect(p2pEndpointServiceName(snd, 7)).To(Equal("atlantic-2-7-p2p")) } func TestGeneratePublishableService_Annotations(t *testing.T) { @@ -126,7 +126,7 @@ func TestGeneratePublishableService_Annotations(t *testing.T) { snd := &seiv1alpha1.SeiNodeDeployment{ ObjectMeta: metav1.ObjectMeta{Name: testChainAtlantic, Namespace: "sei-test-1"}, } - svc := generatePublishableService(snd, 0, "atlantic-2-0-p2p.atlantic-2.prod.platform.sei.io") + svc := generateP2PEndpointService(snd, 0, "atlantic-2-0-p2p.atlantic-2.prod.platform.sei.io") g.Expect(svc.Annotations).To(HaveKeyWithValue( "external-dns.alpha.kubernetes.io/hostname", diff --git a/internal/controller/nodedeployment/status.go b/internal/controller/nodedeployment/status.go index 74312e33..54d2b0b8 100644 --- a/internal/controller/nodedeployment/status.go +++ b/internal/controller/nodedeployment/status.go @@ -118,13 +118,13 @@ func (r *SeiNodeDeploymentReconciler) buildNetworkingStatus(group *seiv1alpha1.S } } - if group.Spec.Networking.TCPEnabled() && r.PublishableDomain != "" { + if group.Spec.Networking.TCPEnabled() && r.P2PEndpointDomain != "" { for i := range int(group.Spec.Replicas) { - host := r.publishableExternalAddress(group, i) + host := r.p2pEndpointAddress(group, i) if host == "" { continue } - out.PublishableEndpoints = append(out.PublishableEndpoints, seiv1alpha1.PublishableEndpoint{ + out.P2PEndpoints = append(out.P2PEndpoints, seiv1alpha1.P2PEndpoint{ Ordinal: int32(i), SeiNodeName: seiNodeName(group, i), Hostname: host, @@ -132,7 +132,7 @@ func (r *SeiNodeDeploymentReconciler) buildNetworkingStatus(group *seiv1alpha1.S } } - if len(out.Routes) == 0 && len(out.PublishableEndpoints) == 0 { + if len(out.Routes) == 0 && len(out.P2PEndpoints) == 0 { return nil } return out diff --git a/manifests/sei.io_seinodedeployments.yaml b/manifests/sei.io_seinodedeployments.yaml index 75167670..56b6d451 100644 --- a/manifests/sei.io_seinodedeployments.yaml +++ b/manifests/sei.io_seinodedeployments.yaml @@ -109,7 +109,7 @@ spec: description: |- ChainID for the new network. Constrained to DNS-1123 label characters because child SeiNodes - compose it into publishable hostnames when the SND has + compose it into P2P endpoint hostnames when the SND has `spec.networking.tcp` set; the address is a one-way door once peers cache it. maxLength: 64 @@ -230,7 +230,7 @@ spec: description: |- ChainID of the chain this node belongs to. Constrained to DNS-1123 label characters because the controller composes - it into publishable hostnames (e.g. `-p2p..`) when + it into P2P endpoint hostnames (e.g. `-p2p..`) when the parent SND opts into TCP networking; the address is a one-way door once peers cache it. minLength: 1 @@ -1090,9 +1090,9 @@ spec: description: NetworkingStatus reports the observed state of networking resources. properties: - publishableEndpoints: + p2pEndpoints: description: |- - PublishableEndpoints lists the per-ordinal publishable P2P + P2PEndpoints lists the per-ordinal P2P endpoint hostnames stamped by the SND when `spec.networking.tcp` is set. Each entry mirrors the value injected into the child SeiNode's `spec.externalAddress` (hostname:port). Hostnames are deterministic @@ -1100,7 +1100,7 @@ spec: Service status back. items: description: |- - PublishableEndpoint is the observed state of one child's publishable + P2PEndpoint is the observed state of one child's publishable P2P endpoint. Stamped by the SND networking reconciler. properties: hostname: diff --git a/manifests/sei.io_seinodes.yaml b/manifests/sei.io_seinodes.yaml index 268716a0..8b2f86e2 100644 --- a/manifests/sei.io_seinodes.yaml +++ b/manifests/sei.io_seinodes.yaml @@ -91,7 +91,7 @@ spec: description: |- ChainID of the chain this node belongs to. Constrained to DNS-1123 label characters because the controller composes - it into publishable hostnames (e.g. `-p2p..`) when + it into P2P endpoint hostnames (e.g. `-p2p..`) when the parent SND opts into TCP networking; the address is a one-way door once peers cache it. minLength: 1 From 842823500f34d2358f553626b27588aaddd1fd7c Mon Sep 17 00:00:00 2001 From: bdchatham Date: Fri, 29 May 2026 10:46:52 -0700 Subject: [PATCH 6/6] fix(p2p-endpoint): set NetworkingDisabled when neither tier reconciles Cursor Bugbot finding: when `networking: { tcp: {} }` (TCP-only) and `P2PEndpointDomain` is empty, both branches in reconcileNetworking take their delete-side paths and nobody updates ConditionNetworkingReady. The condition stays stale or unset. This is the initial state of every cluster until SEI_P2P_ENDPOINT_DOMAIN is wired, so the gap fires on every reconcile of a TCP-only SND until then. Fix: track which tier actually reconciled and set `NetworkingDisabled` as a catch-all when neither did. Existing envtest extended with an assertion on the condition. Refs sei-protocol/sei-k8s-controller#365 (Cursor bug 3d2af5a2) Co-Authored-By: Claude Opus 4.7 (1M context) --- .../nodedeployment/envtest/p2p_endpoint_test.go | 6 ++++++ internal/controller/nodedeployment/networking.go | 12 ++++++++++-- 2 files changed, 16 insertions(+), 2 deletions(-) diff --git a/internal/controller/nodedeployment/envtest/p2p_endpoint_test.go b/internal/controller/nodedeployment/envtest/p2p_endpoint_test.go index e4477e04..c3e44822 100644 --- a/internal/controller/nodedeployment/envtest/p2p_endpoint_test.go +++ b/internal/controller/nodedeployment/envtest/p2p_endpoint_test.go @@ -10,6 +10,7 @@ import ( . "github.com/onsi/gomega" corev1 "k8s.io/api/core/v1" apierrors "k8s.io/apimachinery/pkg/api/errors" + apimeta "k8s.io/apimachinery/pkg/api/meta" metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" "k8s.io/apimachinery/pkg/types" "sigs.k8s.io/controller-runtime/pkg/client" @@ -397,4 +398,9 @@ func TestP2PEndpointP2P_NoDomainConfigured_SkipsServices(t *testing.T) { } return child.Spec.ExternalAddress }, 5*time.Second, 200*time.Millisecond).Should(BeEmpty()) + + waitForStatus(t, client.ObjectKeyFromObject(snd), func(latest *seiv1alpha1.SeiNodeDeployment) bool { + c := apimeta.FindStatusCondition(latest.Status.Conditions, seiv1alpha1.ConditionNetworkingReady) + return c != nil && c.Status == metav1.ConditionFalse && c.Reason == "NetworkingDisabled" + }, "ConditionNetworkingReady=False/NetworkingDisabled when neither tier active") } diff --git a/internal/controller/nodedeployment/networking.go b/internal/controller/nodedeployment/networking.go index 498b6f3f..63d1e03d 100644 --- a/internal/controller/nodedeployment/networking.go +++ b/internal/controller/nodedeployment/networking.go @@ -66,7 +66,10 @@ func (r *SeiNodeDeploymentReconciler) reconcileNetworking(ctx context.Context, g return r.deleteNetworkingResources(ctx, group) } - if group.Spec.Networking.HTTPEnabled() { + httpActive := group.Spec.Networking.HTTPEnabled() + tcpActive := group.Spec.Networking.TCPEnabled() && r.P2PEndpointDomain != "" + + if httpActive { if err := r.reconcileExternalService(ctx, group); err != nil { return fmt.Errorf("reconciling external service: %w", err) } @@ -82,7 +85,7 @@ func (r *SeiNodeDeploymentReconciler) reconcileNetworking(ctx context.Context, g } } - if group.Spec.Networking.TCPEnabled() && r.P2PEndpointDomain != "" { + if tcpActive { if err := r.reconcileP2PEndpoints(ctx, group); err != nil { return fmt.Errorf("reconciling P2P endpoints: %w", err) } @@ -92,6 +95,11 @@ func (r *SeiNodeDeploymentReconciler) reconcileNetworking(ctx context.Context, g } } + if !httpActive && !tcpActive { + setCondition(group, seiv1alpha1.ConditionNetworkingReady, metav1.ConditionFalse, + "NetworkingDisabled", "spec.networking has no active tier (no HTTP, and TCP requires SEI_P2P_ENDPOINT_DOMAIN)") + } + return nil }