Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
62 changes: 55 additions & 7 deletions cmd/api/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -193,13 +193,27 @@ type CapacityConfig struct {

// HypervisorConfig holds hypervisor settings.
type HypervisorConfig struct {
Default string `koanf:"default"`
CloudHypervisorDefaultVersion string `koanf:"cloud_hypervisor_default_version"`
FirecrackerBinaryPath string `koanf:"firecracker_binary_path"`
FirecrackerSnapshotMemoryBackend string `koanf:"firecracker_snapshot_memory_backend"`
FirecrackerUFFDCacheMaxBytes string `koanf:"firecracker_uffd_cache_max_bytes"`
FirecrackerMaxConcurrentRestores int `koanf:"firecracker_max_concurrent_restores"`
Memory HypervisorMemoryConfig `koanf:"memory"`
Default string `koanf:"default"`
CloudHypervisorDefaultVersion string `koanf:"cloud_hypervisor_default_version"`
FirecrackerBinaryPath string `koanf:"firecracker_binary_path"`
FirecrackerSnapshotMemoryBackend string `koanf:"firecracker_snapshot_memory_backend"`
FirecrackerUFFDCacheMaxBytes string `koanf:"firecracker_uffd_cache_max_bytes"`
FirecrackerMaxConcurrentRestores int `koanf:"firecracker_max_concurrent_restores"`
FirecrackerUFFDGraduation FirecrackerUFFDGraduationConfig `koanf:"firecracker_uffd_graduation"`
Memory HypervisorMemoryConfig `koanf:"memory"`
}

// FirecrackerUFFDGraduationConfig controls the background controller that
// detaches running UFFD-backed VMs from their snapshot memory pager once they
// have soaked, bounding active pager sessions and letting old pager versions
// retire. Disabled by default and only active on the uffd backend.
type FirecrackerUFFDGraduationConfig struct {
Enabled bool `koanf:"enabled"`
MinSessionAge string `koanf:"min_session_age"`
MaxConcurrent int `koanf:"max_concurrent"`
MaxActiveSessions int `koanf:"max_active_sessions"`
ScanInterval string `koanf:"scan_interval"`
CompletionTimeout string `koanf:"completion_timeout"`
}

// HypervisorMemoryConfig holds guest memory management settings.
Expand Down Expand Up @@ -413,6 +427,14 @@ func defaultConfig() *Config {
FirecrackerSnapshotMemoryBackend: "file",
FirecrackerUFFDCacheMaxBytes: "4294967296",
FirecrackerMaxConcurrentRestores: 32,
FirecrackerUFFDGraduation: FirecrackerUFFDGraduationConfig{
Enabled: false,
MinSessionAge: "10m",
MaxConcurrent: 1,
MaxActiveSessions: 0,
ScanInterval: "1m",
CompletionTimeout: "10m",
},
Memory: HypervisorMemoryConfig{
Enabled: false,
KernelPageInitMode: "hardened",
Expand Down Expand Up @@ -640,6 +662,9 @@ func (c *Config) Validate() error {
if err := validateByteSize("hypervisor.firecracker_uffd_cache_max_bytes", c.Hypervisor.FirecrackerUFFDCacheMaxBytes); err != nil {
return err
}
if err := c.validateFirecrackerUFFDGraduation(); err != nil {
return err
}
if err := validateDuration("hypervisor.memory.active_ballooning.poll_interval", c.Hypervisor.Memory.ActiveBallooning.PollInterval); err != nil {
return err
}
Expand Down Expand Up @@ -692,6 +717,29 @@ func validateDuration(field string, value string) error {
return nil
}

func (c *Config) validateFirecrackerUFFDGraduation() error {
g := c.Hypervisor.FirecrackerUFFDGraduation
if !g.Enabled {
return nil
}
for field, value := range map[string]string{
"hypervisor.firecracker_uffd_graduation.min_session_age": g.MinSessionAge,
"hypervisor.firecracker_uffd_graduation.scan_interval": g.ScanInterval,
"hypervisor.firecracker_uffd_graduation.completion_timeout": g.CompletionTimeout,
} {
if err := validateDuration(field, value); err != nil {
return err
}
}
if g.MaxConcurrent < 0 {
return fmt.Errorf("hypervisor.firecracker_uffd_graduation.max_concurrent must not be negative")
}
if g.MaxActiveSessions < 0 {
return fmt.Errorf("hypervisor.firecracker_uffd_graduation.max_active_sessions must not be negative")
}
return nil
}

func intPtr(v int) *int {
return &v
}
40 changes: 40 additions & 0 deletions cmd/api/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,8 +35,10 @@ import (
"github.com/kernel/hypeman/lib/ocicachegc"
"github.com/kernel/hypeman/lib/otel"
"github.com/kernel/hypeman/lib/paths"
"github.com/kernel/hypeman/lib/providers"
"github.com/kernel/hypeman/lib/registry"
"github.com/kernel/hypeman/lib/scopes"
"github.com/kernel/hypeman/lib/uffdgraduate"
"github.com/kernel/hypeman/lib/vmm"
nethttpmiddleware "github.com/oapi-codegen/nethttp-middleware"
"github.com/riandyrn/otelchi"
Expand Down Expand Up @@ -131,6 +133,33 @@ func startOCICacheGC(grp *errgroup.Group, ctx context.Context, runner ociCacheGC
return true
}

func configureUFFDGraduationController(cfg *config.Config, instanceManager instances.Manager, logger *slog.Logger) (*uffdgraduate.Controller, error) {
g := cfg.Hypervisor.FirecrackerUFFDGraduation
if !g.Enabled {
return nil, nil
}
minSessionAge, err := time.ParseDuration(g.MinSessionAge)
if err != nil {
return nil, fmt.Errorf("invalid hypervisor.firecracker_uffd_graduation.min_session_age %q: %w", g.MinSessionAge, err)
}
scanInterval, err := time.ParseDuration(g.ScanInterval)
if err != nil {
return nil, fmt.Errorf("invalid hypervisor.firecracker_uffd_graduation.scan_interval %q: %w", g.ScanInterval, err)
}
completionTimeout, err := time.ParseDuration(g.CompletionTimeout)
if err != nil {
return nil, fmt.Errorf("invalid hypervisor.firecracker_uffd_graduation.completion_timeout %q: %w", g.CompletionTimeout, err)
}
return providers.ProvideUFFDGraduationController(instanceManager, uffdgraduate.Config{
Enabled: true,
MinSessionAge: minSessionAge,
MaxConcurrent: g.MaxConcurrent,
MaxActiveSessions: g.MaxActiveSessions,
ScanInterval: scanInterval,
CompletionTimeout: completionTimeout,
}, logger), nil
}

func run() error {
// Load config early for OTel initialization
// Config path can be specified via CONFIG_PATH env var or defaults to platform-specific locations
Expand Down Expand Up @@ -565,6 +594,17 @@ func run() error {
return app.AutoStandbyController.Run(gctx)
})
}

uffdGraduationController, err := configureUFFDGraduationController(app.Config, app.InstanceManager, logger)
if err != nil {
return err
}
if uffdGraduationController != nil {
grp.Go(func() error {
logger.Info("starting uffd graduation controller")
return uffdGraduationController.Run(gctx)
})
}
if app.HealthCheckController != nil {
grp.Go(func() error {
logger.Info("starting health check controller")
Expand Down
21 changes: 11 additions & 10 deletions lib/hypervisor/firecracker/firecracker.go
Original file line number Diff line number Diff line change
Expand Up @@ -57,16 +57,17 @@ func (f *Firecracker) Capabilities() hypervisor.Capabilities {

func capabilities() hypervisor.Capabilities {
return hypervisor.Capabilities{
SupportsSnapshot: true,
SupportsHotplugMemory: false,
SupportsBalloonControl: true,
SupportsPause: true,
SupportsVsock: true,
SupportsGPUPassthrough: false,
SupportsDiskIOLimit: true,
SupportsGracefulVMMShutdown: false,
SupportsSnapshotBaseReuse: true,
SupportsConcurrentForkPrepare: true,
SupportsSnapshot: true,
SupportsHotplugMemory: false,
SupportsBalloonControl: true,
SupportsPause: true,
SupportsVsock: true,
SupportsGPUPassthrough: false,
SupportsDiskIOLimit: true,
SupportsGracefulVMMShutdown: false,
SupportsSnapshotBaseReuse: true,
SupportsConcurrentForkPrepare: true,
UsesDetachableSnapshotMemoryPager: true,
}
}

Expand Down
5 changes: 5 additions & 0 deletions lib/hypervisor/hypervisor.go
Original file line number Diff line number Diff line change
Expand Up @@ -263,6 +263,11 @@ type Capabilities struct {
// SupportsDiskResize indicates if live disk resizing (/vm.resize-disk) is available.
// Cloud Hypervisor v50.0+ only.
SupportsDiskResize bool

// UsesDetachableSnapshotMemoryPager indicates restores can be backed by an
// external snapshot-memory pager that a running VM can later be detached
// from (populate remaining pages, then release the session).
UsesDetachableSnapshotMemoryPager bool
}

// VsockDialer provides vsock connectivity to a guest VM.
Expand Down
153 changes: 153 additions & 0 deletions lib/instances/firecracker_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -861,6 +861,159 @@ func TestFCUFFDOneShotLifecycle(t *testing.T) {
snapshotDeleted = true
}

// TestFCUFFDGraduationLifecycle exercises detaching a running UFFD-backed VM
// from its pager: the pager populates the remaining pages and unregisters the
// session, and the VM must keep running on resident memory with its guest state
// intact. It is a sibling of TestFCUFFDOneShotLifecycle and leaves that test's
// coverage unchanged.
func TestFCUFFDGraduationLifecycle(t *testing.T) {
// Intentionally not parallel: graduation forces a full guest-memory populate,
// and overlapping that with the sibling UFFD lifecycle test's VMs saturated
// the CI runner and timed out guest-agent readiness. Running solo keeps peak
// concurrent UFFD VM load the same as before this test existed.
requireFirecrackerIntegrationPrereqs(t)
requireUserfaultfdIntegrationPrereqs(t)
if pagerBinary := strings.TrimSpace(os.Getenv("HYPEMAN_UFFD_PAGER_BINARY")); pagerBinary == "" {
t.Skip("HYPEMAN_UFFD_PAGER_BINARY must point at hypeman-uffd-pager for UFFD integration tests")
} else if st, err := os.Stat(pagerBinary); err != nil || !st.Mode().IsRegular() {
t.Skipf("HYPEMAN_UFFD_PAGER_BINARY is not a regular file: %s", pagerBinary)
}

mgr, tmpDir := setupTestManagerForFirecrackerWithConfig(t, legacyParallelTestNetworkConfig(testNetworkSeq.Add(1)), ManagerConfig{
FirecrackerSnapshotMemoryBackend: uffdpager.BackendUFFD,
FirecrackerUFFDCacheMaxBytes: 512 << 20,
})
ctx := context.Background()
p := paths.New(tmpDir)

imageManager, err := images.NewManager(p, 1, nil)
require.NoError(t, err)
imageName := integrationTestImageRef(t, "docker.io/library/alpine:latest")
snapshottest.EnsureImageReady(t, ctx, p, imageManager, imageName)

systemManager := system.NewManager(p)
require.NoError(t, systemManager.EnsureSystemFiles(ctx))

source, err := mgr.CreateInstance(ctx, CreateInstanceRequest{
Name: "fc-uffd-grad-src",
Image: imageName,
Size: 1024 * 1024 * 1024,
OverlaySize: 1024 * 1024 * 1024,
Vcpus: 1,
NetworkEnabled: false,
Hypervisor: hypervisor.TypeFirecracker,
Cmd: []string{"sleep", "infinity"},
})
require.NoError(t, err)
sourceID := source.Id
sourceDeleted := false
t.Cleanup(func() {
if !sourceDeleted {
_ = mgr.DeleteInstance(context.Background(), sourceID)
}
})

source = requireRunningSleepInstance(t, ctx, mgr, sourceID)
requireGuestTmpfs(t, ctx, source)
writeGuestFile(t, ctx, source, "/root/uffd-grad/source", "source-disk")
writeGuestFile(t, ctx, source, "/dev/shm/uffd-grad/source", "source-memory")

// A VM with no pager session (the freshly created, file-backed source) is a
// no-op to graduate.
require.NoError(t, mgr.GraduateSnapshotMemoryPager(ctx, sourceID))

snapshot, err := mgr.CreateSnapshot(ctx, sourceID, CreateSnapshotRequest{
Kind: SnapshotKindStandby,
Name: "fc-uffd-grad-snap",
})
require.NoError(t, err)
snapshotDeleted := false
t.Cleanup(func() {
if !snapshotDeleted {
_ = mgr.DeleteSnapshot(context.Background(), snapshot.Id)
}
})

// Forking the standby snapshot to a running VM restores it UFFD-backed and
// pins a live pager session.
parent, err := mgr.ForkSnapshot(ctx, snapshot.Id, ForkSnapshotRequest{
Name: "fc-uffd-grad-parent",
TargetState: StateRunning,
})
require.NoError(t, err)
parentID := parent.Id
parentDeleted := false
t.Cleanup(func() {
if !parentDeleted {
_ = mgr.DeleteInstance(context.Background(), parentID)
}
})

parent = requireRunningSleepInstance(t, ctx, mgr, parentID)
assertGuestFile(t, ctx, parent, "/root/uffd-grad/source", "source-disk")
assertGuestFile(t, ctx, parent, "/dev/shm/uffd-grad/source", "source-memory")
writeGuestFile(t, ctx, parent, "/root/uffd-grad/parent", "parent-disk")
writeGuestFile(t, ctx, parent, "/dev/shm/uffd-grad/parent", "parent-memory")

parentMeta, err := mgr.loadMetadata(parentID)
require.NoError(t, err)
require.NotEmpty(t, parentMeta.StoredMetadata.FirecrackerUFFDSessionID, "running UFFD fork should hold a pager session")
target := mgr.UFFDGraduationTargetVersion()
require.NotEmpty(t, target, "uffd backend should expose a target pager version")
require.Equal(t, target, parentMeta.StoredMetadata.FirecrackerUFFDPagerVersion)

// Graduate: the pager fully populates memory from the backing file and
// unregisters the session. The VM keeps running with no pager dependency.
require.NoError(t, mgr.GraduateSnapshotMemoryPager(ctx, parentID))

parentMeta, err = mgr.loadMetadata(parentID)
require.NoError(t, err)
require.Empty(t, parentMeta.StoredMetadata.FirecrackerUFFDSessionID, "graduation should clear the pager session binding")
require.Empty(t, parentMeta.StoredMetadata.FirecrackerUFFDPagerVersion)

// The VM is still running and all guest memory and disk content survived the
// populate + unregister.
parent = requireRunningSleepInstance(t, ctx, mgr, parentID)
assertGuestFile(t, ctx, parent, "/root/uffd-grad/source", "source-disk")
assertGuestFile(t, ctx, parent, "/dev/shm/uffd-grad/source", "source-memory")
assertGuestFile(t, ctx, parent, "/root/uffd-grad/parent", "parent-disk")
assertGuestFile(t, ctx, parent, "/dev/shm/uffd-grad/parent", "parent-memory")

// New guest memory and disk writes still work, proving the guest did not hang
// on a previously untouched page after userfaultfd was unregistered.
writeGuestFile(t, ctx, parent, "/root/uffd-grad/post", "post-disk")
writeGuestFile(t, ctx, parent, "/dev/shm/uffd-grad/post", "post-memory")
assertGuestFile(t, ctx, parent, "/root/uffd-grad/post", "post-disk")
assertGuestFile(t, ctx, parent, "/dev/shm/uffd-grad/post", "post-memory")

// Graduating again is a no-op now that the session is gone.
require.NoError(t, mgr.GraduateSnapshotMemoryPager(ctx, parentID))

// A graduated VM still standbys and restores via the file backend, and its
// memory survives the round trip.
parent, err = mgr.StandbyInstance(ctx, parentID, StandbyInstanceRequest{})
require.NoError(t, err)
require.Equal(t, StateStandby, parent.State)

parent, err = mgr.RestoreInstance(ctx, parentID)
require.NoError(t, err)
parent = requireRunningSleepInstance(t, ctx, mgr, parentID)
assertGuestFile(t, ctx, parent, "/dev/shm/uffd-grad/source", "source-memory")
assertGuestFile(t, ctx, parent, "/dev/shm/uffd-grad/parent", "parent-memory")
assertGuestFile(t, ctx, parent, "/dev/shm/uffd-grad/post", "post-memory")

parentMeta, err = mgr.loadMetadata(parentID)
require.NoError(t, err)
require.Empty(t, parentMeta.StoredMetadata.FirecrackerUFFDSessionID, "file-backed restore after graduation should not create a pager session")

require.NoError(t, mgr.DeleteInstance(ctx, parentID))
parentDeleted = true
require.NoError(t, mgr.DeleteInstance(ctx, sourceID))
sourceDeleted = true
require.NoError(t, mgr.DeleteSnapshot(ctx, snapshot.Id))
snapshotDeleted = true
}

func requireRunningSleepInstance(t *testing.T, ctx context.Context, mgr Manager, instanceID string) *Instance {
t.Helper()
inst, err := waitForInstanceState(ctx, mgr, instanceID, StateRunning, integrationTestTimeout(20*time.Second))
Expand Down
Loading
Loading