From c1bef6a668d3f55376dec734a0cd0158ee4befea Mon Sep 17 00:00:00 2001 From: Harsh Rawat Date: Fri, 17 Apr 2026 00:19:08 +0530 Subject: [PATCH 1/2] adds HCS live migration APIs Introduces the HCS live migration APIs including destination start, source-side initialize/start/transfer, finalization, and event notifications. Add supporting computecore types and HCS schema definitions for migration options and settings. Also, augments the Properties request to query for specific properties using the "Query" param. Signed-off-by: Harsh Rawat --- internal/computecore/computecore.go | 28 -- internal/computecore/types.go | 103 +++++++ internal/hcs/migration.go | 372 ++++++++++++++++++++++++ internal/hcs/schema2/migration.go | 118 ++++++++ internal/hcs/schema2/properties.go | 10 + internal/hcs/schema2/property_query.go | 4 + internal/hcs/schema2/virtual_machine.go | 2 + internal/hcs/system.go | 109 +++++-- 8 files changed, 701 insertions(+), 45 deletions(-) create mode 100644 internal/computecore/types.go create mode 100644 internal/hcs/migration.go create mode 100644 internal/hcs/schema2/migration.go diff --git a/internal/computecore/computecore.go b/internal/computecore/computecore.go index ea1071cb48..e00a287d15 100644 --- a/internal/computecore/computecore.go +++ b/internal/computecore/computecore.go @@ -94,34 +94,6 @@ import ( // errVmcomputeOperationPending is an error encountered when the operation is being completed asynchronously const errVmcomputeOperationPending = syscall.Errno(0xC0370103) -// HcsSystem is the handle associated with a created compute system. -type HcsSystem syscall.Handle - -// HcsProcess is the handle associated with a created process in a compute -// system. -type HcsProcess syscall.Handle - -// HcsOperation is the handle associated with an operation on a compute system. -type HcsOperation syscall.Handle - -// HcsCallback is the handle associated with the function to call when events -// occur. -type HcsCallback syscall.Handle - -// HcsProcessInformation is the structure used when creating or getting process -// info. -type HcsProcessInformation struct { - // ProcessID is the pid of the created process. - ProcessID uint32 - _ uint32 // reserved padding - // StdInput is the handle associated with the stdin of the process. - StdInput syscall.Handle - // StdOutput is the handle associated with the stdout of the process. - StdOutput syscall.Handle - // StdError is the handle associated with the stderr of the process. - StdError syscall.Handle -} - func execute(ctx gcontext.Context, timeout time.Duration, f func() error) error { now := time.Now() if timeout > 0 { diff --git a/internal/computecore/types.go b/internal/computecore/types.go new file mode 100644 index 0000000000..19b5167157 --- /dev/null +++ b/internal/computecore/types.go @@ -0,0 +1,103 @@ +//go:build windows + +package computecore + +import ( + "fmt" + "syscall" +) + +// HcsSystem is the handle associated with a created compute system. +type HcsSystem syscall.Handle + +// HcsProcess is the handle associated with a created process in a compute system. +type HcsProcess syscall.Handle + +// HcsOperation is the handle associated with an operation on a compute system. +type HcsOperation syscall.Handle + +// HcsCallback is the handle associated with the function to call when events occur. +type HcsCallback syscall.Handle + +// HcsProcessInformation is the structure used when creating or getting process info. +type HcsProcessInformation struct { + ProcessID uint32 + _ uint32 // reserved padding + StdInput syscall.Handle + StdOutput syscall.Handle + StdError syscall.Handle +} + +// HcsResourceType specifies the type of resource to add to an operation. +const ( + HcsResourceTypeNone uint32 = 0 + HcsResourceTypeFile uint32 = 1 + HcsResourceTypeJob uint32 = 2 + HcsResourceTypeComObject uint32 = 3 + HcsResourceTypeSocket uint32 = 4 +) + +// HcsEventType represents the type of event received from HCS. +type HcsEventType uint32 + +const ( + HcsEventTypeInvalid HcsEventType = 0x00000000 + HcsEventTypeSystemExited HcsEventType = 0x00000001 + HcsEventTypeSystemCrashInitiated HcsEventType = 0x00000002 + HcsEventTypeSystemCrashReport HcsEventType = 0x00000003 + HcsEventTypeSystemRdpEnhancedModeStateChanged HcsEventType = 0x00000004 + HcsEventTypeSystemSiloJobCreated HcsEventType = 0x00000005 + HcsEventTypeSystemGuestConnectionClosed HcsEventType = 0x00000006 + HcsEventTypeProcessExited HcsEventType = 0x00010000 + HcsEventTypeOperationCallback HcsEventType = 0x01000000 + HcsEventTypeServiceDisconnect HcsEventType = 0x02000000 + HcsEventTypeGroupVMLifecycle HcsEventType = 0x80000002 + HcsEventTypeGroupLiveMigration HcsEventType = 0x80000003 + HcsEventTypeGroupOperationInfo HcsEventType = 0xC0000001 +) + +func (t HcsEventType) String() string { + switch t { + case HcsEventTypeInvalid: + return "Invalid" + case HcsEventTypeSystemExited: + return "SystemExited" + case HcsEventTypeSystemCrashInitiated: + return "SystemCrashInitiated" + case HcsEventTypeSystemCrashReport: + return "SystemCrashReport" + case HcsEventTypeSystemRdpEnhancedModeStateChanged: + return "SystemRdpEnhancedModeStateChanged" + case HcsEventTypeSystemSiloJobCreated: + return "SystemSiloJobCreated" + case HcsEventTypeSystemGuestConnectionClosed: + return "SystemGuestConnectionClosed" + case HcsEventTypeProcessExited: + return "ProcessExited" + case HcsEventTypeOperationCallback: + return "OperationCallback" + case HcsEventTypeServiceDisconnect: + return "ServiceDisconnect" + case HcsEventTypeGroupVMLifecycle: + return "GroupVmLifecycle" + case HcsEventTypeGroupLiveMigration: + return "GroupLiveMigration" + case HcsEventTypeGroupOperationInfo: + return "GroupOperationInfo" + default: + return fmt.Sprintf("Unknown: 0x%08X", uint32(t)) + } +} + +// HcsEventOptions controls which event groups are enabled for a callback. +const ( + HcsEventOptionNone uint32 = 0 + HcsEventOptionEnableOperationCallbacks uint32 = 1 + HcsEventOptionEnableLiveMigrationEvents uint32 = 4 +) + +// HcsEvent is the event structure passed to HCS_EVENT_CALLBACK. +type HcsEvent struct { + Type HcsEventType + EventData *uint16 +} diff --git a/internal/hcs/migration.go b/internal/hcs/migration.go new file mode 100644 index 0000000000..198a4e618c --- /dev/null +++ b/internal/hcs/migration.go @@ -0,0 +1,372 @@ +//go:build windows + +package hcs + +import ( + "context" + "encoding/json" + "errors" + "syscall" + "unsafe" + + "github.com/Microsoft/hcsshim/internal/computecore" + hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" + "github.com/Microsoft/hcsshim/internal/oc" + + "github.com/sirupsen/logrus" + "go.opencensus.io/trace" + "golang.org/x/sys/windows" +) + +// liveMigrationSocketURI is the HCS resource URI for the live migration transport socket. +const liveMigrationSocketURI = "hcs:/VirtualMachine/LiveMigrationSocket" + +// migrationNotificationBufferSize is the capacity of the LM notification channel. +const migrationNotificationBufferSize = 16 + +// MigrationConfig holds parameters for starting a compute system as a live migration +// destination, or for initiating the source side of a live migration. +type MigrationConfig struct { + // Socket is the handle to the live migration transport socket. + Socket syscall.Handle + // SessionID identifies the migration session. + SessionID uint32 +} + +// migrationCallback is the syscall callback registered with HcsSetComputeSystemCallback +// for live migration events. It receives events and dispatches them to the channel +// stored in the System via the callbackContext pointer. +var migrationCallback = syscall.NewCallback(migrationCallbackHandler) + +// migrationCallbackHandler is invoked by computecore.dll for live migration events. +func migrationCallbackHandler(eventPtr uintptr, ctx uintptr) uintptr { + if eventPtr == 0 || ctx == 0 { + return 0 + } + + e := (*computecore.HcsEvent)(unsafe.Pointer(eventPtr)) + ch := *(*chan string)(unsafe.Pointer(ctx)) + + eventData := "" + if e.EventData != nil { + eventData = windows.UTF16PtrToString(e.EventData) + } + + logrus.WithFields(logrus.Fields{ + "event-type": e.Type.String(), + "event-data": eventData, + }).Debug("HCS migration notification") + + // Non-blocking send to avoid blocking the HCS callback thread. + select { + case ch <- eventData: + default: + logrus.WithField("event-type", e.Type.String()).Warn("migration notification channel full, dropping event") + } + + return 0 +} + +// openMigrationHandle opens a second computecore handle to the same system and +// registers a callback for live migration events. It populates +// computeSystem.migrationHandle and computeSystem.migrationNotifyCh. +// +// The caller MUST hold computeSystem.handleLock. +func (computeSystem *System) openMigrationHandle(ctx context.Context) error { + if computeSystem.migrationHandle != 0 { + // Already open — idempotent. + return nil + } + + // Sanity check: the primary handle must be valid. + if computeSystem.handle == 0 { + return ErrAlreadyClosed + } + + // Open a second handle via computecore for LM operations and events. + handle, err := computecore.HcsOpenComputeSystem(ctx, computeSystem.id, syscall.GENERIC_ALL) + if err != nil { + return err + } + + // Create the notification channel and store it on the struct. + computeSystem.migrationHandle = handle + computeSystem.migrationNotifyCh = make(chan string, migrationNotificationBufferSize) + // Register the callback. + if err := computecore.HcsSetComputeSystemCallback(ctx, handle, computecore.HcsEventOptionEnableLiveMigrationEvents, uintptr(unsafe.Pointer(&computeSystem.migrationNotifyCh)), migrationCallback); err != nil { + computeSystem.migrationNotifyCh = nil + computeSystem.migrationHandle = 0 + computecore.HcsCloseComputeSystem(ctx, handle) + return err + } + return nil +} + +// closeMigrationHandle unregisters the LM callback, closes the migration handle, +// and drains the notification channel. +// +// The caller MUST hold computeSystem.handleLock. +func (computeSystem *System) closeMigrationHandle(ctx context.Context) { + if computeSystem.migrationHandle == 0 { + return + } + + // Unregister callback by passing zeros. + _ = computecore.HcsSetComputeSystemCallback(ctx, computeSystem.migrationHandle, computecore.HcsEventOptionNone, 0, 0) + + // Close compute system. + computecore.HcsCloseComputeSystem(ctx, computeSystem.migrationHandle) + computeSystem.migrationHandle = 0 + + // Nullify the handle and notification channel. + if computeSystem.migrationNotifyCh != nil { + close(computeSystem.migrationNotifyCh) + computeSystem.migrationNotifyCh = nil + } +} + +// StartWithMigrationOptions synchronously starts the compute system as a live +// migration destination using the provided configuration. +func (computeSystem *System) StartWithMigrationOptions(ctx context.Context, config *MigrationConfig) (err error) { + if config == nil { + return errors.New("live migration config must not be nil") + } + + operation := "hcs::System::Start" + + computeSystem.handleLock.Lock() + defer computeSystem.handleLock.Unlock() + + if computeSystem.handle == 0 { + return makeSystemError(computeSystem, operation, ErrAlreadyClosed, nil) + } + + // Open the migration handle for LM events and operations. + if err := computeSystem.openMigrationHandle(ctx); err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + defer func() { + if err != nil { + computeSystem.closeMigrationHandle(ctx) + } + }() + + // Create a computecore operation to track the start request. + op, err := computecore.HcsCreateOperation(ctx, 0, 0) + if err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + defer computecore.HcsCloseOperation(ctx, op) + + // Attach the live migration socket to the operation. + if err := computecore.HcsAddResourceToOperation(ctx, op, computecore.HcsResourceTypeSocket, liveMigrationSocketURI, config.Socket); err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + + // Build start options with destination migration settings. + options := hcsschema.StartOptions{ + DestinationMigrationOptions: &hcsschema.MigrationStartOptions{ + NetworkSettings: &hcsschema.MigrationNetworkSettings{SessionID: config.SessionID}, + }, + } + raw, err := json.Marshal(options) + if err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + + return computeSystem.start(ctx, op, string(raw)) +} + +// InitializeLiveMigrationOnSource initializes a live migration on the source side with the given options. +func (computeSystem *System) InitializeLiveMigrationOnSource(ctx context.Context, options *hcsschema.MigrationInitializeOptions) (err error) { + operation := "hcs::System::InitializeLiveMigrationOnSource" + + ctx, span := oc.StartSpan(ctx, operation) + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) + + computeSystem.handleLock.Lock() + defer computeSystem.handleLock.Unlock() + + // Open the migration handle for LM events and operations. + if err = computeSystem.openMigrationHandle(ctx); err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + defer func() { + if err != nil { + computeSystem.closeMigrationHandle(ctx) + } + }() + + if options == nil { + options = &hcsschema.MigrationInitializeOptions{} + } + optionsJSON, err := json.Marshal(options) + if err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + + op, err := computecore.HcsCreateOperation(ctx, 0, 0) + if err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + defer computecore.HcsCloseOperation(ctx, op) + + // Issue the initialize call and wait for completion. + if err = computecore.HcsInitializeLiveMigrationOnSource(ctx, computeSystem.migrationHandle, op, string(optionsJSON)); err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + if _, err = computecore.HcsWaitForOperationResult(ctx, op, 0xFFFFFFFF); err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + return nil +} + +// StartLiveMigrationOnSource starts the live migration on the source side using the provided +// transport socket and session ID. +func (computeSystem *System) StartLiveMigrationOnSource(ctx context.Context, config *MigrationConfig) (err error) { + if config == nil { + return errors.New("migration config must not be nil") + } + + operation := "hcs::System::StartLiveMigrationOnSource" + + ctx, span := oc.StartSpan(ctx, operation) + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) + + computeSystem.handleLock.Lock() + defer computeSystem.handleLock.Unlock() + + if computeSystem.migrationHandle == 0 { + return makeSystemError(computeSystem, operation, ErrAlreadyClosed, nil) + } + + op, err := computecore.HcsCreateOperation(ctx, 0, 0) + if err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + defer computecore.HcsCloseOperation(ctx, op) + + // Attach the migration socket to the operation before starting. + if err := computecore.HcsAddResourceToOperation(ctx, op, computecore.HcsResourceTypeSocket, liveMigrationSocketURI, config.Socket); err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + + options := hcsschema.MigrationStartOptions{ + NetworkSettings: &hcsschema.MigrationNetworkSettings{SessionID: config.SessionID}, + } + optionsJSON, err := json.Marshal(options) + if err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + + // Issue the start call and wait for completion. + if err := computecore.HcsStartLiveMigrationOnSource(ctx, computeSystem.migrationHandle, op, string(optionsJSON)); err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + if _, err := computecore.HcsWaitForOperationResult(ctx, op, 0xFFFFFFFF); err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + return nil +} + +// StartLiveMigrationTransfer starts the memory transfer phase of a live migration. +func (computeSystem *System) StartLiveMigrationTransfer(ctx context.Context, options *hcsschema.MigrationTransferOptions) (err error) { + operation := "hcs::System::StartLiveMigrationTransfer" + + ctx, span := oc.StartSpan(ctx, operation) + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) + + computeSystem.handleLock.Lock() + defer computeSystem.handleLock.Unlock() + + if computeSystem.migrationHandle == 0 { + return makeSystemError(computeSystem, operation, ErrAlreadyClosed, nil) + } + + if options == nil { + options = &hcsschema.MigrationTransferOptions{} + } + optionsJSON, err := json.Marshal(options) + if err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + + op, err := computecore.HcsCreateOperation(ctx, 0, 0) + if err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + defer computecore.HcsCloseOperation(ctx, op) + + // Begin the memory transfer and wait for completion. + if err := computecore.HcsStartLiveMigrationTransfer(ctx, computeSystem.migrationHandle, op, string(optionsJSON)); err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + if _, err := computecore.HcsWaitForOperationResult(ctx, op, 0xFFFFFFFF); err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + return nil +} + +// FinalizeLiveMigration completes the live migration workflow. If resume is true the VM +// is resumed on the destination; otherwise it is stopped. +func (computeSystem *System) FinalizeLiveMigration(ctx context.Context, resume bool) (err error) { + operation := "hcs::System::FinalizeLiveMigration" + + ctx, span := oc.StartSpan(ctx, operation) + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) + + computeSystem.handleLock.Lock() + defer computeSystem.handleLock.Unlock() + + if computeSystem.migrationHandle == 0 { + return makeSystemError(computeSystem, operation, ErrAlreadyClosed, nil) + } + + // Choose whether to resume or stop the VM after migration. + finalOp := hcsschema.MigrationFinalOperationStop + if resume { + finalOp = hcsschema.MigrationFinalOperationResume + } + optionsJSON, err := json.Marshal(hcsschema.MigrationFinalizedOptions{FinalizedOperation: finalOp}) + if err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + + op, err := computecore.HcsCreateOperation(ctx, 0, 0) + if err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + defer computecore.HcsCloseOperation(ctx, op) + + // Finalize the migration and wait for completion. + if err := computecore.HcsFinalizeLiveMigration(ctx, computeSystem.migrationHandle, op, string(optionsJSON)); err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + if _, err := computecore.HcsWaitForOperationResult(ctx, op, 0xFFFFFFFF); err != nil { + return makeSystemError(computeSystem, operation, err, nil) + } + + // Migration is complete — release the migration handle and callback. + computeSystem.closeMigrationHandle(ctx) + return nil +} + +// MigrationNotifications returns a read-only channel that receives live migration +// event data strings. Returns an error if no migration handle is open. +func (computeSystem *System) MigrationNotifications() (<-chan string, error) { + computeSystem.handleLock.RLock() + defer computeSystem.handleLock.RUnlock() + + if computeSystem.migrationHandle == 0 { + return nil, errors.New("migration handle not open; call StartWithMigrationOptions or InitializeLiveMigrationOnSource first") + } + return computeSystem.migrationNotifyCh, nil +} diff --git a/internal/hcs/schema2/migration.go b/internal/hcs/schema2/migration.go new file mode 100644 index 0000000000..e459ae1c18 --- /dev/null +++ b/internal/hcs/schema2/migration.go @@ -0,0 +1,118 @@ +package hcsschema + +// MigrationInitializeOptions is a set of options for the migration workflow. +type MigrationInitializeOptions struct { + // Origin is the side of migration the workflow is performed on. + Origin MigrationOrigin `json:"Origin,omitempty"` + // MemoryTransport specifies the settings for memory transfer during migration. On source, this + // setting is required when migration is started. On destination, this setting is required when + // migration is initiated. + MemoryTransport MigrationMemoryTransport `json:"MemoryTransport,omitempty"` + // MemoryTransferThrottleParams specifies settings for throttling during memory transfer. + MemoryTransferThrottleParams *MemoryMigrationTransferThrottleParams `json:"MemoryTransferThrottleParams,omitempty"` + // CompressionSettings specifies additional settings when compression is enabled. + CompressionSettings *MigrationCompressionSettings `json:"CompressionSettings,omitempty"` + // ChecksumVerification enables memory checksum verification. + ChecksumVerification bool `json:"ChecksumVerification,omitempty"` + // PerfTracingEnabled enables performance tracing during migration. + PerfTracingEnabled bool `json:"PerfTracingEnabled,omitempty"` + // CancelIfBlackoutThresholdExceeds cancels the operation if the blackout threshold is exceeded. + CancelIfBlackoutThresholdExceeds bool `json:"CancelIfBlackoutThresholdExceeds,omitempty"` + // PrepareMemoryTransferMode extends timeout for cross-version live migration. + PrepareMemoryTransferMode bool `json:"PrepareMemoryTransferMode,omitempty"` + // CompatibilityData is the compatibility information required for the destination VM. + CompatibilityData *CompatibilityInfo `json:"CompatibilityData,omitempty"` +} + +// MigrationFinalizedOptions is a set of additional options used for HcsLiveMigrationFinalization. +type MigrationFinalizedOptions struct { + // Origin is the side of migration the workflow is performed on. + Origin MigrationOrigin `json:"Origin,omitempty"` + // FinalizedOperation is the final state transition for the VM as part of concluding the LM workflow. + FinalizedOperation MigrationFinalOperation `json:"FinalizedOperation,omitempty"` +} + +// MigrationStartOptions specifies options for starting a migration. +type MigrationStartOptions struct { + // NetworkSettings specifies network settings for the socket provided. + NetworkSettings *MigrationNetworkSettings `json:"NetworkSettings,omitempty"` +} + +// MigrationTransferOptions specifies options for the migration transfer phase. +type MigrationTransferOptions struct { + // Origin is the side of migration the workflow is performed on. + Origin MigrationOrigin `json:"Origin,omitempty"` +} + +// StartOptions specifies options for starting a compute system. +type StartOptions struct { + // DestinationMigrationOptions specifies settings to use when starting a migration on the destination side. + DestinationMigrationOptions *MigrationStartOptions `json:"DestinationMigrationOptions,omitempty"` +} + +// MigrationOrigin indicates where migration is initiated from. +type MigrationOrigin string + +const ( + // MigrationOriginSource indicates the source side of migration. + MigrationOriginSource MigrationOrigin = "Source" + // MigrationOriginDestination indicates the destination side of migration. + MigrationOriginDestination MigrationOrigin = "Destination" +) + +// MigrationMemoryTransport is the transport protocol used for memory transfer during migration. +type MigrationMemoryTransport string + +const ( + // MigrationMemoryTransportTCP indicates the VM memory is copied over a TCP/IP connection. + MigrationMemoryTransportTCP MigrationMemoryTransport = "TCP" +) + +// MemoryMigrationTransferThrottleParams specifies settings for migration memory transfer throttling. +type MemoryMigrationTransferThrottleParams struct { + // SkipThrottling indicates whether throttling should be skipped. + SkipThrottling *bool `json:"SkipThrottling,omitempty"` + // ThrottlingScale is the scale of the throttling as a percentage (1-100). + ThrottlingScale *float64 `json:"ThrottlingScale,omitempty"` + // MinimumThrottlePercentage is the minimum percentage to which memory transfer can be throttled. + MinimumThrottlePercentage *uint8 `json:"MinimumThrottlePercentage,omitempty"` + // TargetNumberOfBrownoutTransferPasses is the number of passes targeted before the VM enters blackout. + TargetNumberOfBrownoutTransferPasses *uint32 `json:"TargetNumberOfBrownoutTransferPasses,omitempty"` + // StartingBrownoutPassNumberForThrottling is the transfer pass where throttling begins. + StartingBrownoutPassNumberForThrottling *uint32 `json:"StartingBrownoutPassNumberForThrottling,omitempty"` + // MaximumNumberOfBrownoutTransferPasses is the maximum number of passes before forcing blackout. + MaximumNumberOfBrownoutTransferPasses *uint32 `json:"MaximumNumberOfBrownoutTransferPasses,omitempty"` + // TargetBlackoutTransferTime is the expected duration for blackout transfer time. + TargetBlackoutTransferTime *uint32 `json:"TargetBlackoutTransferTime,omitempty"` + // BlackoutTimeThresholdForCancellingMigration is the blackout duration threshold for cancelling migration. + BlackoutTimeThresholdForCancellingMigration *uint32 `json:"BlackoutTimeThresholdForCancellingMigration,omitempty"` +} + +// MigrationCompressionSettings specifies compression settings for migration. +type MigrationCompressionSettings struct { + // ThrottleWorkerCount is the [de]compression thread count. Values higher than what the host + // and VM configuration can support will be adjusted. The value should be non-zero. + ThrottleWorkerCount *uint32 `json:"ThrottleWorkerCount,omitempty"` +} + +// CompatibilityInfo is opaque VM compatibility data, primarily used in migration. +type CompatibilityInfo struct { + // Data is the raw compatibility information. + Data []byte `json:"Data,omitempty"` +} + +// MigrationFinalOperation is the final operation performed on the compute system to finalize the live migration workflow. +type MigrationFinalOperation string + +const ( + // MigrationFinalOperationResume resumes the VM. + MigrationFinalOperationResume MigrationFinalOperation = "Resume" + // MigrationFinalOperationStop stops the VM. + MigrationFinalOperationStop MigrationFinalOperation = "Stop" +) + +// MigrationNetworkSettings specifies the transport protocol for network connection provided by client. +type MigrationNetworkSettings struct { + // SessionID is the session ID associated with the socket connection between source and destination. + SessionID uint32 `json:"SessionId,omitempty"` +} diff --git a/internal/hcs/schema2/properties.go b/internal/hcs/schema2/properties.go index d4cb95bdde..b5ec26bbac 100644 --- a/internal/hcs/schema2/properties.go +++ b/internal/hcs/schema2/properties.go @@ -10,6 +10,8 @@ package hcsschema import ( + "encoding/json" + v1 "github.com/containerd/cgroups/v3/cgroup1/stats" ) @@ -50,7 +52,15 @@ type Properties struct { GuestConnectionInfo *GuestConnectionInfo `json:"GuestConnectionInfo,omitempty"` + // PropertyResponses maps requested property names to their associated response objects. + PropertyResponses map[string]PropertyResponse `json:"PropertyResponses,omitempty"` + // Metrics is not part of the API for HCS but this is used for LCOW v2 to // return the full cgroup metrics from the guest. Metrics *v1.Metrics `json:"LCOWMetrics,omitempty"` } + +// PropertyResponse is the response object associated with a property query. +type PropertyResponse struct { + Response json.RawMessage `json:"Response,omitempty"` +} diff --git a/internal/hcs/schema2/property_query.go b/internal/hcs/schema2/property_query.go index c7ebd66092..7c06750066 100644 --- a/internal/hcs/schema2/property_query.go +++ b/internal/hcs/schema2/property_query.go @@ -12,4 +12,8 @@ package hcsschema // By default the basic properties will be returned. This query provides a way to request specific properties. type PropertyQuery struct { PropertyTypes []PropertyType `json:"PropertyTypes,omitempty"` + + // Queries is a new property request object, introduced in version 2.5, which takes the + // names of the properties and their associated query objects if needed. + Queries map[string]interface{} `json:"Queries,omitempty"` } diff --git a/internal/hcs/schema2/virtual_machine.go b/internal/hcs/schema2/virtual_machine.go index 0b66870ec6..630d1b7820 100644 --- a/internal/hcs/schema2/virtual_machine.go +++ b/internal/hcs/schema2/virtual_machine.go @@ -26,4 +26,6 @@ type VirtualMachine struct { DebugOptions *DebugOptions `json:"DebugOptions,omitempty"` GuestConnection *GuestConnection `json:"GuestConnection,omitempty"` SecuritySettings *SecuritySettings `json:"SecuritySettings,omitempty"` + // Live migration options to be used on destination. + MigrationOptions *MigrationInitializeOptions `json:"MigrationOptions,omitempty"` } diff --git a/internal/hcs/system.go b/internal/hcs/system.go index 823e27b0b7..861fec6003 100644 --- a/internal/hcs/system.go +++ b/internal/hcs/system.go @@ -12,6 +12,7 @@ import ( "syscall" "time" + "github.com/Microsoft/hcsshim/internal/computecore" "github.com/Microsoft/hcsshim/internal/cow" "github.com/Microsoft/hcsshim/internal/hcs/schema1" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" @@ -26,10 +27,12 @@ import ( ) type System struct { - handleLock sync.RWMutex - handle vmcompute.HcsSystem - id string - callbackNumber uintptr + handleLock sync.RWMutex + handle vmcompute.HcsSystem + migrationHandle computecore.HcsSystem + migrationNotifyCh chan string + id string + callbackNumber uintptr closedWaitOnce sync.Once waitBlock chan struct{} @@ -192,7 +195,30 @@ func GetComputeSystems(ctx context.Context, q schema1.ComputeSystemQuery) ([]sch } // Start synchronously starts the computeSystem. -func (computeSystem *System) Start(ctx context.Context) (err error) { +func (computeSystem *System) Start(ctx context.Context) error { + computeSystem.handleLock.RLock() + defer computeSystem.handleLock.RUnlock() + + if computeSystem.handle == 0 { + return makeSystemError(computeSystem, "hcs::System::Start", ErrAlreadyClosed, nil) + } + + op, err := computecore.HcsCreateOperation(ctx, 0, 0) + if err != nil { + return makeSystemError(computeSystem, "hcs::System::Start", err, nil) + } + defer computecore.HcsCloseOperation(ctx, op) + + return computeSystem.start(ctx, op, "") +} + +// start is the shared implementation used by Start and StartWithMigrationOptions. +// The caller provides a pre-created computecore operation (with any resources already +// attached) and the JSON-encoded options string to pass to HcsStartComputeSystem. +// +// The caller MUST hold computeSystem.handleLock and verify the handle is valid +// before calling this method. +func (computeSystem *System) start(ctx context.Context, op computecore.HcsOperation, opts string) (err error) { operation := "hcs::System::Start" // hcsStartComputeSystemContext is an async operation. Start the outer span @@ -202,21 +228,19 @@ func (computeSystem *System) Start(ctx context.Context) (err error) { defer func() { oc.SetSpanStatus(span, err) }() span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) - computeSystem.handleLock.RLock() - defer computeSystem.handleLock.RUnlock() - - // prevent starting an exited system because waitblock we do not recreate waitBlock - // or rerun waitBackground, so we have no way to be notified of it closing again - if computeSystem.handle == 0 { - return makeSystemError(computeSystem, operation, ErrAlreadyClosed, nil) + if err := computecore.HcsStartComputeSystem( + ctx, + computecore.HcsSystem(computeSystem.handle), + op, + opts, + ); err != nil { + return makeSystemError(computeSystem, operation, err, nil) } - resultJSON, err := vmcompute.HcsStartComputeSystem(ctx, computeSystem.handle, "") - events, err := processAsyncHcsResult(ctx, err, resultJSON, computeSystem.callbackNumber, - hcsNotificationSystemStartCompleted, &timeout.SystemStart) - if err != nil { - return makeSystemError(computeSystem, operation, err, events) + if _, err := computecore.HcsWaitForOperationResult(ctx, op, 0xFFFFFFFF); err != nil { + return makeSystemError(computeSystem, operation, err, nil) } + computeSystem.startTime = time.Now() return nil } @@ -574,6 +598,54 @@ func (computeSystem *System) PropertiesV2(ctx context.Context, types ...hcsschem return hcsProperties, nil } +// PropertiesV3 returns the requested compute system properties using a V2 schema property query. +// Unlike [System.PropertiesV2], this method accepts a full [hcsschema.PropertyQuery] directly, +// giving the caller more control over the query structure. The query is forwarded to HCS as-is +// without any in-proc optimisations such as that is V2. +func (computeSystem *System) PropertiesV3(ctx context.Context, query *hcsschema.PropertyQuery) (_ *hcsschema.Properties, err error) { + operation := "hcs::System::PropertiesV3" + + ctx, span := oc.StartSpan(ctx, operation) + defer span.End() + defer func() { oc.SetSpanStatus(span, err) }() + span.AddAttributes(trace.StringAttribute("cid", computeSystem.id)) + + computeSystem.handleLock.RLock() + defer computeSystem.handleLock.RUnlock() + + if computeSystem.handle == 0 { + return nil, makeSystemError(computeSystem, operation, ErrAlreadyClosed, nil) + } + + log.G(ctx).WithFields(logrus.Fields{ + logfields.ContainerID: computeSystem.id, + "propertyTypes": query.PropertyTypes, + "propertyQueries": query.Queries, + }).Debug("querying compute system properties via PropertiesV3") + + queryBytes, err := json.Marshal(query) + if err != nil { + return nil, makeSystemError(computeSystem, operation, err, nil) + } + + propertiesJSON, resultJSON, err := vmcompute.HcsGetComputeSystemProperties(ctx, computeSystem.handle, string(queryBytes)) + events := processHcsResult(ctx, resultJSON) + if err != nil { + return nil, makeSystemError(computeSystem, operation, err, events) + } + + if propertiesJSON == "" { + return nil, ErrUnexpectedValue + } + + props := &hcsschema.Properties{} + if err := json.Unmarshal([]byte(propertiesJSON), props); err != nil { + return nil, makeSystemError(computeSystem, operation, err, nil) + } + + return props, nil +} + // Pause pauses the execution of the computeSystem. This feature is not enabled in TP5. func (computeSystem *System) Pause(ctx context.Context) (err error) { operation := "hcs::System::Pause" @@ -787,6 +859,9 @@ func (computeSystem *System) CloseCtx(ctx context.Context) (err error) { close(computeSystem.waitBlock) }) + // Clean up migration handle if it was opened. + computeSystem.closeMigrationHandle(ctx) + return nil } From 767fb53959fd741f637ef1757f8fefc079388cc4 Mon Sep 17 00:00:00 2001 From: Harsh Rawat Date: Tue, 21 Apr 2026 01:01:13 +0530 Subject: [PATCH 2/2] review 1 Signed-off-by: Harsh Rawat --- internal/hcs/migration.go | 44 +++++++++++++------- internal/hcs/resourcepaths/virtualmachine.go | 2 + internal/hcs/system.go | 20 ++++++--- 3 files changed, 44 insertions(+), 22 deletions(-) diff --git a/internal/hcs/migration.go b/internal/hcs/migration.go index 198a4e618c..f9597ae9ba 100644 --- a/internal/hcs/migration.go +++ b/internal/hcs/migration.go @@ -10,6 +10,7 @@ import ( "unsafe" "github.com/Microsoft/hcsshim/internal/computecore" + "github.com/Microsoft/hcsshim/internal/hcs/resourcepaths" hcsschema "github.com/Microsoft/hcsshim/internal/hcs/schema2" "github.com/Microsoft/hcsshim/internal/oc" @@ -18,9 +19,6 @@ import ( "golang.org/x/sys/windows" ) -// liveMigrationSocketURI is the HCS resource URI for the live migration transport socket. -const liveMigrationSocketURI = "hcs:/VirtualMachine/LiveMigrationSocket" - // migrationNotificationBufferSize is the capacity of the LM notification channel. const migrationNotificationBufferSize = 16 @@ -39,6 +37,11 @@ type MigrationConfig struct { var migrationCallback = syscall.NewCallback(migrationCallbackHandler) // migrationCallbackHandler is invoked by computecore.dll for live migration events. +// ctx is &computeSystem.migrationNotifyCh, kept alive across the cgo boundary by +// computeSystem.migrationPinner (unpinned only after HcsCloseComputeSystem has +// drained any in-flight callbacks). The notification channel is never closed. +// Skipping the close keeps tear-down trivially safe and removes the only +// thing that could turn a channel send into a panic. func migrationCallbackHandler(eventPtr uintptr, ctx uintptr) uintptr { if eventPtr == 0 || ctx == 0 { return 0 @@ -67,7 +70,7 @@ func migrationCallbackHandler(eventPtr uintptr, ctx uintptr) uintptr { return 0 } -// openMigrationHandle opens a second computecore handle to the same system and +// openMigrationHandle opens a computecore handle to the same system and // registers a callback for live migration events. It populates // computeSystem.migrationHandle and computeSystem.migrationNotifyCh. // @@ -92,8 +95,15 @@ func (computeSystem *System) openMigrationHandle(ctx context.Context) error { // Create the notification channel and store it on the struct. computeSystem.migrationHandle = handle computeSystem.migrationNotifyCh = make(chan string, migrationNotificationBufferSize) + + // Pin the address of the notification channel field so it stays visible + // to the GC while HCS holds it as a uintptr callback context. Without + // pinning, this would violate cgo's pointer-passing rules. + computeSystem.migrationPinner.Pin(&computeSystem.migrationNotifyCh) + // Register the callback. if err := computecore.HcsSetComputeSystemCallback(ctx, handle, computecore.HcsEventOptionEnableLiveMigrationEvents, uintptr(unsafe.Pointer(&computeSystem.migrationNotifyCh)), migrationCallback); err != nil { + computeSystem.migrationPinner.Unpin() computeSystem.migrationNotifyCh = nil computeSystem.migrationHandle = 0 computecore.HcsCloseComputeSystem(ctx, handle) @@ -102,8 +112,8 @@ func (computeSystem *System) openMigrationHandle(ctx context.Context) error { return nil } -// closeMigrationHandle unregisters the LM callback, closes the migration handle, -// and drains the notification channel. +// closeMigrationHandle unregisters the LM callback and closes the migration +// handle. // // The caller MUST hold computeSystem.handleLock. func (computeSystem *System) closeMigrationHandle(ctx context.Context) { @@ -111,18 +121,20 @@ func (computeSystem *System) closeMigrationHandle(ctx context.Context) { return } - // Unregister callback by passing zeros. + // Unregister callback by passing zeros, then close the compute system. + // HcsCloseComputeSystem waits for any in-flight callbacks to return, so + // after it completes no callback can still be reading the pinned + // channel pointer and it is safe to Unpin. _ = computecore.HcsSetComputeSystemCallback(ctx, computeSystem.migrationHandle, computecore.HcsEventOptionNone, 0, 0) - - // Close compute system. computecore.HcsCloseComputeSystem(ctx, computeSystem.migrationHandle) computeSystem.migrationHandle = 0 - // Nullify the handle and notification channel. - if computeSystem.migrationNotifyCh != nil { - close(computeSystem.migrationNotifyCh) - computeSystem.migrationNotifyCh = nil - } + computeSystem.migrationPinner.Unpin() + + // Drop the channel reference. The channel is intentionally not closed: + // consumers signal end-of-stream via the System's context, so a close + // would add no information and would only complicate tear-down. + computeSystem.migrationNotifyCh = nil } // StartWithMigrationOptions synchronously starts the compute system as a live @@ -159,7 +171,7 @@ func (computeSystem *System) StartWithMigrationOptions(ctx context.Context, conf defer computecore.HcsCloseOperation(ctx, op) // Attach the live migration socket to the operation. - if err := computecore.HcsAddResourceToOperation(ctx, op, computecore.HcsResourceTypeSocket, liveMigrationSocketURI, config.Socket); err != nil { + if err := computecore.HcsAddResourceToOperation(ctx, op, computecore.HcsResourceTypeSocket, resourcepaths.LiveMigrationSocketURI, config.Socket); err != nil { return makeSystemError(computeSystem, operation, err, nil) } @@ -251,7 +263,7 @@ func (computeSystem *System) StartLiveMigrationOnSource(ctx context.Context, con defer computecore.HcsCloseOperation(ctx, op) // Attach the migration socket to the operation before starting. - if err := computecore.HcsAddResourceToOperation(ctx, op, computecore.HcsResourceTypeSocket, liveMigrationSocketURI, config.Socket); err != nil { + if err := computecore.HcsAddResourceToOperation(ctx, op, computecore.HcsResourceTypeSocket, resourcepaths.LiveMigrationSocketURI, config.Socket); err != nil { return makeSystemError(computeSystem, operation, err, nil) } diff --git a/internal/hcs/resourcepaths/virtualmachine.go b/internal/hcs/resourcepaths/virtualmachine.go index 328077288e..d8803f29cb 100644 --- a/internal/hcs/resourcepaths/virtualmachine.go +++ b/internal/hcs/resourcepaths/virtualmachine.go @@ -20,4 +20,6 @@ const ( VPMemDeviceResourceFormat string = "VirtualMachine/Devices/VirtualPMem/Devices/%d/Mappings/%d" VSMBShareResourcePath string = "VirtualMachine/Devices/VirtualSmb/Shares" HvSocketConfigResourceFormat string = "VirtualMachine/Devices/HvSocket/HvSocketConfig/ServiceTable/%s" + // LiveMigrationSocketURI is the HCS resource URI for the live migration transport socket. + LiveMigrationSocketURI string = "hcs:/VirtualMachine/LiveMigrationSocket" ) diff --git a/internal/hcs/system.go b/internal/hcs/system.go index 861fec6003..762b49786a 100644 --- a/internal/hcs/system.go +++ b/internal/hcs/system.go @@ -7,6 +7,7 @@ import ( "encoding/json" "errors" "fmt" + "runtime" "strings" "sync" "syscall" @@ -27,12 +28,10 @@ import ( ) type System struct { - handleLock sync.RWMutex - handle vmcompute.HcsSystem - migrationHandle computecore.HcsSystem - migrationNotifyCh chan string - id string - callbackNumber uintptr + handleLock sync.RWMutex + handle vmcompute.HcsSystem + id string + callbackNumber uintptr closedWaitOnce sync.Once waitBlock chan struct{} @@ -41,6 +40,15 @@ type System struct { os, typ, owner string startTime time.Time stopTime time.Time + + // Live Migration specific fields. + migrationHandle computecore.HcsSystem + migrationNotifyCh chan string + // migrationPinner pins &migrationNotifyCh while it is registered as the + // callback context with HCS, so the GC sees the cgo-held uintptr as a + // live reference. Unpinned in closeMigrationHandle after HCS guarantees + // no further callbacks will fire. + migrationPinner runtime.Pinner } var _ cow.Container = &System{}