From d50adbf6ab630727d93161a93bc3b23bb6d65e6a Mon Sep 17 00:00:00 2001 From: Zican Li Date: Tue, 21 Apr 2026 22:56:38 +0000 Subject: [PATCH] Update inference operator CRDs and Helm chart to v3.1 Sync CRDs, Chart.yaml, and values.yaml from internal inference operator repo. New features: - Data capture: 3-tier capture (SageMaker endpoint, LoadBalancer, Model Pod) - DNS automation: Route53 dnsConfig and dnsStatus - HuggingFace model source with token secret reference - Kubernetes volume model source type - ServiceAccount support for inference pods (IRSA) - SageMaker endpoint registration: data capture config and user-defined tags - APS workspace URL pattern fix - Init container image configuration in values.yaml Note: pdSpec (disaggregated prefill-decode) excluded, not yet released. Signed-off-by: Zican Li --- .../charts/inference-operator/Chart.yaml | 4 +- ...s.amazon.com_inferenceendpointconfigs.yaml | 307 +++++++++++++++++- ...emaker.aws.amazon.com_jumpstartmodels.yaml | 257 ++++++++++++++- ...on.com_sagemakerendpointregistrations.yaml | 103 ++++++ .../charts/inference-operator/values.yaml | 3 + 5 files changed, 660 insertions(+), 14 deletions(-) diff --git a/helm_chart/HyperPodHelmChart/charts/inference-operator/Chart.yaml b/helm_chart/HyperPodHelmChart/charts/inference-operator/Chart.yaml index d753d900..3600ea61 100644 --- a/helm_chart/HyperPodHelmChart/charts/inference-operator/Chart.yaml +++ b/helm_chart/HyperPodHelmChart/charts/inference-operator/Chart.yaml @@ -15,7 +15,7 @@ type: application # This is the chart version. This version number should be incremented each time you make changes # to the chart and its templates, including the app version. # Versions are expected to follow Semantic Versioning (https://semver.org/) -version: 2.1.0 +version: 2.2.0 # This is the version number of the application being deployed. Keep this aligned # with operator image MAJOR.MINOR version. @@ -50,4 +50,4 @@ dependencies: alias: keda version: 2.17.1 repository: "https://kedacore.github.io/charts" - condition: keda.enabled + condition: keda.enabled \ No newline at end of file diff --git a/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_inferenceendpointconfigs.yaml b/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_inferenceendpointconfigs.yaml index a99280a3..f9e1166d 100644 --- a/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_inferenceendpointconfigs.yaml +++ b/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_inferenceendpointconfigs.yaml @@ -270,7 +270,7 @@ spec: type: string serverAddress: description: Server address for AMP workspace - pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+\/[a-zA-Z0-9-]+$|^$ + pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+$|^$ type: string targetValue: description: Target metric value for scaling @@ -315,7 +315,7 @@ spec: type: string serverAddress: description: Server address for AMP workspace - pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+\/[a-zA-Z0-9-]+$|^$ + pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+$|^$ type: string targetValue: description: Target metric value for scaling @@ -343,6 +343,211 @@ spec: minimum: 0 type: integer type: object + dataCapture: + description: Configuration for data capture across multiple tiers + (SageMaker, LoadBalancer, Model Pod) + properties: + loadBalancer: + description: Configuration for LoadBalancer level data capture + (Tier 2) + properties: + enabled: + description: Enable or disable load balancer access logs + type: boolean + required: + - enabled + type: object + modelPod: + description: Configuration for Model Pod level data capture (Tier + 3) + properties: + bufferConfig: + description: Configuration for buffering and flushing captured + data + properties: + batchSize: + default: 10 + description: Number of records to batch before writing + to S3 + format: int32 + maximum: 1000 + minimum: 1 + type: integer + flushIntervalSeconds: + default: 60 + description: Flush interval in seconds + format: int32 + maximum: 300 + minimum: 10 + type: integer + type: object + captureContentTypeHeader: + description: Configuration for how to treat different content + type headers during capture + properties: + csvContentTypes: + description: |- + List of content type headers to treat as CSV + Each item must be 1-256 characters and match pattern: [a-zA-Z0-9](-*[a-zA-Z0-9])*/[a-zA-Z0-9](-*[a-zA-Z0-9.])* + Example: text/csv, application/csv + items: + type: string + maxItems: 10 + minItems: 1 + type: array + x-kubernetes-list-type: set + jsonContentTypes: + description: |- + List of content type headers to treat as JSON + Each item must be 1-256 characters and match pattern: [a-zA-Z0-9](-*[a-zA-Z0-9])*/[a-zA-Z0-9](-*[a-zA-Z0-9.])* + Example: application/json, application/jsonlines + items: + type: string + maxItems: 10 + minItems: 1 + type: array + x-kubernetes-list-type: set + type: object + captureOptions: + description: Capture options (Input, Output, or both). Defaults + to [Input, Output] when enabled. + items: + description: CaptureOption defines what data to capture + (input, output, or both). + properties: + captureMode: + description: 'Capture mode: Input or Output' + enum: + - Input + - Output + type: string + required: + - captureMode + type: object + maxItems: 32 + minItems: 1 + type: array + enabled: + description: Enable or disable model pod data capture + type: boolean + initialSamplingPercentage: + description: Percentage of requests to capture (0-100). Defaults + to 100 when enabled. + format: int32 + maximum: 100 + minimum: 0 + type: integer + kmsKeyId: + description: Optional KMS key ID, ARN, alias name, or alias + ARN for encrypting captured data + maxLength: 2048 + pattern: ^[a-zA-Z0-9:/_-]*$ + type: string + payloadConfig: + description: Configuration for payload size limits + properties: + maxPayloadSizeKB: + default: 0 + description: Maximum payload size in KB to capture. 0 + means no limit (capture full payload). + format: int32 + maximum: 10240 + minimum: 0 + type: integer + type: object + required: + - enabled + type: object + s3Uri: + description: |- + Common S3 URI for all data capture tiers. Each tier will write to a specific prefix within this bucket. + Must use s3:// protocol (required by ALB access logs). + If not provided, the TLS certificate bucket will be used for data capture storage. + maxLength: 512 + pattern: ^s3://([^/]+)(/[^,=]*)?$ + type: string + sagemakerEndpoint: + description: Configuration for SageMaker Endpoint level data capture + (Tier 1) + properties: + captureContentTypeHeader: + description: Configuration for how to treat different content + type headers during capture + properties: + csvContentTypes: + description: |- + List of content type headers to treat as CSV + Each item must be 1-256 characters and match pattern: [a-zA-Z0-9](-*[a-zA-Z0-9])*/[a-zA-Z0-9](-*[a-zA-Z0-9.])* + Example: text/csv, application/csv + items: + type: string + maxItems: 10 + minItems: 1 + type: array + x-kubernetes-list-type: set + jsonContentTypes: + description: |- + List of content type headers to treat as JSON + Each item must be 1-256 characters and match pattern: [a-zA-Z0-9](-*[a-zA-Z0-9])*/[a-zA-Z0-9](-*[a-zA-Z0-9.])* + Example: application/json, application/jsonlines + items: + type: string + maxItems: 10 + minItems: 1 + type: array + x-kubernetes-list-type: set + type: object + captureOptions: + description: Capture options (Input, Output, or both). Defaults + to [Input, Output] when enabled. + items: + description: CaptureOption defines what data to capture + (input, output, or both). + properties: + captureMode: + description: 'Capture mode: Input or Output' + enum: + - Input + - Output + type: string + required: + - captureMode + type: object + maxItems: 32 + minItems: 1 + type: array + enabled: + description: Enable or disable SageMaker endpoint data capture + type: boolean + initialSamplingPercentage: + description: Percentage of requests to capture (0-100). Defaults + to 100 when enabled. + format: int32 + maximum: 100 + minimum: 0 + type: integer + kmsKeyId: + description: Optional KMS key ID, ARN, alias name, or alias + ARN for encrypting captured data + maxLength: 2048 + pattern: ^[a-zA-Z0-9:/_-]*$ + type: string + required: + - enabled + type: object + type: object + dnsConfig: + description: DNS automation configuration for Route53. Requires tlsConfig.customCertificateConfig + to be set. + properties: + hostedZoneId: + description: Route53 Hosted Zone ID where the DNS record will + be created. + pattern: ^Z[A-Z0-9]+$ + type: string + required: + - hostedZoneId + type: object endpointName: description: |- Name used for Sagemaker Endpoint @@ -589,7 +794,7 @@ spec: type: string serverAddress: description: Server address for AMP workspace - pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+\/[a-zA-Z0-9-]+$|^$ + pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+$|^$ type: string targetValue: description: Target metric value for scaling @@ -636,7 +841,7 @@ spec: type: string serverAddress: description: Server address for AMP workspace - pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+\/[a-zA-Z0-9-]+$|^$ + pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+$|^$ type: string targetValue: description: Target metric value for scaling @@ -2230,6 +2435,14 @@ spec: maxLength: 253 pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ type: string + serviceAccountName: + description: |- + Name of the Kubernetes ServiceAccount to use for the inference pod. + If not specified, the namespace's default service account will be used. + This is useful for providing AWS credentials via IRSA to init containers or the worker. + maxLength: 253 + pattern: ^[a-z0-9]([-a-z0-9]*[a-z0-9])?(\.[a-z0-9]([-a-z0-9]*[a-z0-9])?)*$ + type: string volumes: description: |- Additional volumes to add to the pod spec. @@ -4249,6 +4462,50 @@ spec: required: - fileSystemId type: object + huggingFaceModel: + description: HuggingFace model configuration. Required when modelSourceType + is "huggingface". + properties: + commitSHA: + description: |- + Git commit SHA for the model revision. Must be a full 40-character lowercase hex SHA. + If not provided, the operator defaults to "main" branch. + pattern: ^[0-9a-f]{40}$ + type: string + modelId: + description: HuggingFace Hub model identifier in org/model + format (e.g. "meta-llama/Llama-3.1-8B-Instruct"). + pattern: ^[a-zA-Z0-9_.-]+/[a-zA-Z0-9_.-]+$ + type: string + tokenSecretRef: + description: |- + Reference to a Kubernetes Secret containing the HuggingFace API token. + The token is injected as the HF_TOKEN environment variable into the InitContainer only. + properties: + key: + description: The key of the secret to select from. Must + be a valid secret key. + type: string + name: + default: "" + description: |- + Name of the referent. + This field is effectively required, but due to backwards compatibility is + allowed to be empty. Instances of this type with an empty value here are + almost certainly wrong. + More info: https://kubernetes.io/docs/concepts/overview/working-with-objects/names/#names + type: string + optional: + description: Specify whether the Secret or its key must + be defined + type: boolean + required: + - key + type: object + x-kubernetes-map-type: atomic + required: + - modelId + type: object modelLocation: description: Specific location where the model data exists type: string @@ -4256,6 +4513,8 @@ spec: enum: - fsx - s3 + - huggingface + - kubernetesVolume type: string prefetchEnabled: default: false @@ -5492,6 +5751,44 @@ spec: - lastUpdated - name type: object + dnsStatus: + description: Status of the operator-managed Route53 DNS record + properties: + dnsHealth: + description: 'DNS resolution status: Active, Pending, or Error.' + enum: + - Active + - Pending + - Error + type: string + hostedZoneId: + description: Route53 hosted zone ID. + type: string + lastTransitionTime: + description: When the status last transitioned, used for propagation + timeout. + format: date-time + type: string + managedByOperator: + description: Whether the operator manages this DNS record. + type: boolean + message: + description: Human-readable status or error message. + type: string + previousHostedZoneId: + description: Previous hosted zone ID, retained during domain/zone + changes until cleanup completes. + type: string + previousRecordName: + description: Previous record name, retained during domain/zone + changes until cleanup completes. + type: string + recordName: + description: Route53 record name. + type: string + required: + - managedByOperator + type: object endpoints: description: EndpointStatus contains the status of SageMaker endpoints properties: @@ -5801,7 +6098,7 @@ spec: type: string serverAddress: description: Server address for AMP workspace - pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+\/[a-zA-Z0-9-]+$|^$ + pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+$|^$ type: string targetValue: description: Target metric value for scaling diff --git a/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_jumpstartmodels.yaml b/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_jumpstartmodels.yaml index f86ce2d6..363a807a 100644 --- a/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_jumpstartmodels.yaml +++ b/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_jumpstartmodels.yaml @@ -262,7 +262,7 @@ spec: type: string serverAddress: description: Server address for AMP workspace - pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+\/[a-zA-Z0-9-]+$|^$ + pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+$|^$ type: string targetValue: description: Target metric value for scaling @@ -307,7 +307,7 @@ spec: type: string serverAddress: description: Server address for AMP workspace - pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+\/[a-zA-Z0-9-]+$|^$ + pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+$|^$ type: string targetValue: description: Target metric value for scaling @@ -335,6 +335,211 @@ spec: minimum: 0 type: integer type: object + dataCapture: + description: Configuration for data capture across multiple tiers + (SageMaker, LoadBalancer, Model Pod) + properties: + loadBalancer: + description: Configuration for LoadBalancer level data capture + (Tier 2) + properties: + enabled: + description: Enable or disable load balancer access logs + type: boolean + required: + - enabled + type: object + modelPod: + description: Configuration for Model Pod level data capture (Tier + 3) + properties: + bufferConfig: + description: Configuration for buffering and flushing captured + data + properties: + batchSize: + default: 10 + description: Number of records to batch before writing + to S3 + format: int32 + maximum: 1000 + minimum: 1 + type: integer + flushIntervalSeconds: + default: 60 + description: Flush interval in seconds + format: int32 + maximum: 300 + minimum: 10 + type: integer + type: object + captureContentTypeHeader: + description: Configuration for how to treat different content + type headers during capture + properties: + csvContentTypes: + description: |- + List of content type headers to treat as CSV + Each item must be 1-256 characters and match pattern: [a-zA-Z0-9](-*[a-zA-Z0-9])*/[a-zA-Z0-9](-*[a-zA-Z0-9.])* + Example: text/csv, application/csv + items: + type: string + maxItems: 10 + minItems: 1 + type: array + x-kubernetes-list-type: set + jsonContentTypes: + description: |- + List of content type headers to treat as JSON + Each item must be 1-256 characters and match pattern: [a-zA-Z0-9](-*[a-zA-Z0-9])*/[a-zA-Z0-9](-*[a-zA-Z0-9.])* + Example: application/json, application/jsonlines + items: + type: string + maxItems: 10 + minItems: 1 + type: array + x-kubernetes-list-type: set + type: object + captureOptions: + description: Capture options (Input, Output, or both). Defaults + to [Input, Output] when enabled. + items: + description: CaptureOption defines what data to capture + (input, output, or both). + properties: + captureMode: + description: 'Capture mode: Input or Output' + enum: + - Input + - Output + type: string + required: + - captureMode + type: object + maxItems: 32 + minItems: 1 + type: array + enabled: + description: Enable or disable model pod data capture + type: boolean + initialSamplingPercentage: + description: Percentage of requests to capture (0-100). Defaults + to 100 when enabled. + format: int32 + maximum: 100 + minimum: 0 + type: integer + kmsKeyId: + description: Optional KMS key ID, ARN, alias name, or alias + ARN for encrypting captured data + maxLength: 2048 + pattern: ^[a-zA-Z0-9:/_-]*$ + type: string + payloadConfig: + description: Configuration for payload size limits + properties: + maxPayloadSizeKB: + default: 0 + description: Maximum payload size in KB to capture. 0 + means no limit (capture full payload). + format: int32 + maximum: 10240 + minimum: 0 + type: integer + type: object + required: + - enabled + type: object + s3Uri: + description: |- + Common S3 URI for all data capture tiers. Each tier will write to a specific prefix within this bucket. + Must use s3:// protocol (required by ALB access logs). + If not provided, the TLS certificate bucket will be used for data capture storage. + maxLength: 512 + pattern: ^s3://([^/]+)(/[^,=]*)?$ + type: string + sagemakerEndpoint: + description: Configuration for SageMaker Endpoint level data capture + (Tier 1) + properties: + captureContentTypeHeader: + description: Configuration for how to treat different content + type headers during capture + properties: + csvContentTypes: + description: |- + List of content type headers to treat as CSV + Each item must be 1-256 characters and match pattern: [a-zA-Z0-9](-*[a-zA-Z0-9])*/[a-zA-Z0-9](-*[a-zA-Z0-9.])* + Example: text/csv, application/csv + items: + type: string + maxItems: 10 + minItems: 1 + type: array + x-kubernetes-list-type: set + jsonContentTypes: + description: |- + List of content type headers to treat as JSON + Each item must be 1-256 characters and match pattern: [a-zA-Z0-9](-*[a-zA-Z0-9])*/[a-zA-Z0-9](-*[a-zA-Z0-9.])* + Example: application/json, application/jsonlines + items: + type: string + maxItems: 10 + minItems: 1 + type: array + x-kubernetes-list-type: set + type: object + captureOptions: + description: Capture options (Input, Output, or both). Defaults + to [Input, Output] when enabled. + items: + description: CaptureOption defines what data to capture + (input, output, or both). + properties: + captureMode: + description: 'Capture mode: Input or Output' + enum: + - Input + - Output + type: string + required: + - captureMode + type: object + maxItems: 32 + minItems: 1 + type: array + enabled: + description: Enable or disable SageMaker endpoint data capture + type: boolean + initialSamplingPercentage: + description: Percentage of requests to capture (0-100). Defaults + to 100 when enabled. + format: int32 + maximum: 100 + minimum: 0 + type: integer + kmsKeyId: + description: Optional KMS key ID, ARN, alias name, or alias + ARN for encrypting captured data + maxLength: 2048 + pattern: ^[a-zA-Z0-9:/_-]*$ + type: string + required: + - enabled + type: object + type: object + dnsConfig: + description: DNS automation configuration for Route53. Requires tlsConfig.customCertificateConfig + to be set. + properties: + hostedZoneId: + description: Route53 Hosted Zone ID where the DNS record will + be created. + pattern: ^Z[A-Z0-9]+$ + type: string + required: + - hostedZoneId + type: object environmentVariables: description: Additional environment variables to be passed to the inference server. Limited to 100 key-value pairs. @@ -576,7 +781,7 @@ spec: type: string serverAddress: description: Server address for AMP workspace - pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+\/[a-zA-Z0-9-]+$|^$ + pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+$|^$ type: string targetValue: description: Target metric value for scaling @@ -623,7 +828,7 @@ spec: type: string serverAddress: description: Server address for AMP workspace - pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+\/[a-zA-Z0-9-]+$|^$ + pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+$|^$ type: string targetValue: description: Target metric value for scaling @@ -1031,7 +1236,7 @@ spec: Total number of terminating pods targeted by this deployment. Terminating pods have a non-null .metadata.deletionTimestamp and have not yet reached the Failed or Succeeded .status.phase. - This is an alpha field. Enable DeploymentReplicaSetTerminatingReplicas to be able to use this field. + This is a beta field and requires enabling DeploymentReplicaSetTerminatingReplicas feature (enabled by default). format: int32 type: integer unavailableReplicas: @@ -1051,6 +1256,44 @@ spec: - lastUpdated - name type: object + dnsStatus: + description: Status of the operator-managed Route53 DNS record + properties: + dnsHealth: + description: 'DNS resolution status: Active, Pending, or Error.' + enum: + - Active + - Pending + - Error + type: string + hostedZoneId: + description: Route53 hosted zone ID. + type: string + lastTransitionTime: + description: When the status last transitioned, used for propagation + timeout. + format: date-time + type: string + managedByOperator: + description: Whether the operator manages this DNS record. + type: boolean + message: + description: Human-readable status or error message. + type: string + previousHostedZoneId: + description: Previous hosted zone ID, retained during domain/zone + changes until cleanup completes. + type: string + previousRecordName: + description: Previous record name, retained during domain/zone + changes until cleanup completes. + type: string + recordName: + description: Route53 record name. + type: string + required: + - managedByOperator + type: object endpoints: description: EndpointStatus contains the status of SageMaker endpoints properties: @@ -1352,7 +1595,7 @@ spec: type: string serverAddress: description: Server address for AMP workspace - pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+\/[a-zA-Z0-9-]+$|^$ + pattern: ^https:\/\/aps-workspaces\.[a-zA-Z0-9-]+(?:\.[a-zA-Z0-9-]+)*\.amazonaws\.com\/workspaces\/ws-[a-zA-Z0-9-]+$|^$ type: string targetValue: description: Target metric value for scaling @@ -1701,7 +1944,7 @@ spec: Total number of terminating pods targeted by this deployment. Terminating pods have a non-null .metadata.deletionTimestamp and have not yet reached the Failed or Succeeded .status.phase. - This is an alpha field. Enable DeploymentReplicaSetTerminatingReplicas to be able to use this field. + This is a beta field and requires enabling DeploymentReplicaSetTerminatingReplicas feature (enabled by default). format: int32 type: integer unavailableReplicas: diff --git a/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_sagemakerendpointregistrations.yaml b/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_sagemakerendpointregistrations.yaml index 80f1c56a..aaab9e29 100644 --- a/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_sagemakerendpointregistrations.yaml +++ b/helm_chart/HyperPodHelmChart/charts/inference-operator/config/crd/inference.sagemaker.aws.amazon.com_sagemakerendpointregistrations.yaml @@ -114,6 +114,95 @@ spec: description: REST API Gateway identifier that proxies requests to the HyperPod endpoint (via NLB/ALB) type: string + sageMakerEndpointDataCaptureConfig: + description: Configuration for SageMaker endpoint data capture + properties: + captureContentTypeHeader: + description: Configuration for how to treat different content + type headers during capture + properties: + csvContentTypes: + description: |- + List of content type headers to treat as CSV + Each item must be 1-256 characters and match pattern: [a-zA-Z0-9](-*[a-zA-Z0-9])*/[a-zA-Z0-9](-*[a-zA-Z0-9.])* + Example: text/csv, application/csv + items: + type: string + maxItems: 10 + minItems: 1 + type: array + x-kubernetes-list-type: set + jsonContentTypes: + description: |- + List of content type headers to treat as JSON + Each item must be 1-256 characters and match pattern: [a-zA-Z0-9](-*[a-zA-Z0-9])*/[a-zA-Z0-9](-*[a-zA-Z0-9.])* + Example: application/json, application/jsonlines + items: + type: string + maxItems: 10 + minItems: 1 + type: array + x-kubernetes-list-type: set + type: object + captureOptions: + description: Capture options specifying what to capture (Input, + Output, or both) + items: + description: CaptureOption defines what data to capture (input, + output, or both). + properties: + captureMode: + description: 'Capture mode: Input or Output' + enum: + - Input + - Output + type: string + required: + - captureMode + type: object + maxItems: 32 + minItems: 1 + type: array + destinationS3Uri: + description: S3 URI where captured data will be stored + maxLength: 512 + pattern: ^s3://([^/]+)(/.*)?$ + type: string + enabled: + description: Enable or disable SageMaker endpoint data capture + type: boolean + initialSamplingPercentage: + description: Percentage of requests to capture (0-100) + format: int32 + maximum: 100 + minimum: 0 + type: integer + kmsKeyId: + description: Optional KMS key ID, ARN, alias name, or alias ARN + for encrypting captured data + maxLength: 2048 + pattern: ^[a-zA-Z0-9:/_-]*$ + type: string + required: + - captureOptions + - destinationS3Uri + - enabled + - initialSamplingPercentage + type: object + tags: + description: User-defined tags to propagate to SageMaker resources + (Model, EndpointConfig, Endpoint) + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array tlsConfig: properties: tlsCertificateOutputS3Bucket: @@ -352,6 +441,20 @@ spec: description: REST API Gateway identifier that proxies requests to the HyperPod endpoint (via NLB/ALB) type: string + tags: + description: User-defined tags to propagate to SageMaker resources + (Model, EndpointConfig, Endpoint) + items: + properties: + name: + type: string + value: + type: string + required: + - name + - value + type: object + type: array tlsConfig: properties: tlsCertificateOutputS3Bucket: diff --git a/helm_chart/HyperPodHelmChart/charts/inference-operator/values.yaml b/helm_chart/HyperPodHelmChart/charts/inference-operator/values.yaml index 02793113..d37ec919 100644 --- a/helm_chart/HyperPodHelmChart/charts/inference-operator/values.yaml +++ b/helm_chart/HyperPodHelmChart/charts/inference-operator/values.yaml @@ -27,6 +27,9 @@ image: tag: v3.1 pullPolicy: Always repository: + initContainer: + repository: hyperpod-inference-operator-init-container + tag: v1.0.1 hyperpodClusterArn: executionRoleArn: jumpstartGatedModelDownloadRoleArn: ""