Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ scratch*.py
examples/tensorflow/distributed_mnist/data
*.iml
doc/_build
docs/_build
docs/api/generated
doc/_static
doc/_templates
**/.DS_Store
Expand Down
14 changes: 10 additions & 4 deletions src/sagemaker/modules/local_core/local_container.py
Original file line number Diff line number Diff line change
Expand Up @@ -204,12 +204,18 @@ def train(
# Print our Job Complete line
logger.info("Local training job completed, output artifacts saved to %s", artifacts)

shutil.rmtree(os.path.join(self.container_root, "input"))
shutil.rmtree(os.path.join(self.container_root, "shared"))
for dir_name in ["input", "shared"]:
dir_path = os.path.join(self.container_root, dir_name)
if os.path.exists(dir_path):
shutil.rmtree(dir_path, ignore_errors=True)
for host in self.hosts:
shutil.rmtree(os.path.join(self.container_root, host))
host_path = os.path.join(self.container_root, host)
if os.path.exists(host_path):
shutil.rmtree(host_path, ignore_errors=True)
for folder in self._temporary_folders:
shutil.rmtree(os.path.join(self.container_root, folder))
folder_path = os.path.join(self.container_root, folder)
if os.path.exists(folder_path):
shutil.rmtree(folder_path, ignore_errors=True)
return artifacts

def retrieve_artifacts(
Expand Down
6 changes: 5 additions & 1 deletion src/sagemaker/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -419,7 +419,11 @@ def download_folder(bucket_name, prefix, target, sagemaker_session):

# Spot check: enforce ownership only when downloading from the session's default
# bucket. Cross-account buckets are left untouched.
expected_owner = sagemaker_session._get_account_id_if_default_bucket(bucket_name)
expected_owner = (
sagemaker_session._get_account_id_if_default_bucket(bucket_name)
if hasattr(sagemaker_session, "_get_account_id_if_default_bucket")
else None
)
extra_args = None
if expected_owner:
extra_args = {"ExpectedBucketOwner": expected_owner}
Expand Down
42 changes: 4 additions & 38 deletions tests/conftest.py
Original file line number Diff line number Diff line change
Expand Up @@ -641,42 +641,17 @@ def cpu_instance_type(sagemaker_session, request):

@pytest.fixture(scope="session")
def gpu_instance_type(sagemaker_session, request):
region = sagemaker_session.boto_session.region_name
if region in NO_P3_REGIONS:
return "ml.p2.xlarge"
else:
return "ml.p3.2xlarge"
return "ml.g4dn.xlarge"


@pytest.fixture()
def gpu_pytorch_instance_type(sagemaker_session, request):
fw_version = None
for pytorch_version_fixture in [
"pytorch_inference_version",
"huggingface_training_pytorch_latest_version",
"huggingface_inference_pytorch_latest_version",
]:
if pytorch_version_fixture in request.fixturenames:
fw_version = request.getfixturevalue(pytorch_version_fixture)
if fw_version is None:
fw_version = request.param
region = sagemaker_session.boto_session.region_name
if region in NO_P3_REGIONS:
if Version(fw_version) >= Version("1.13"):
return PYTORCH_RENEWED_GPU
else:
return "ml.p2.xlarge"
else:
return "ml.p3.2xlarge"
return "ml.g4dn.xlarge"


@pytest.fixture(scope="session")
def gpu_instance_type_list(sagemaker_session, request):
region = sagemaker_session.boto_session.region_name
if region in NO_P3_REGIONS:
return ["ml.p2.xlarge"]
else:
return ["ml.p3.2xlarge", "ml.p2.xlarge"]
return ["ml.g4dn.xlarge"]


@pytest.fixture(scope="session")
Expand Down Expand Up @@ -717,16 +692,7 @@ def pytest_generate_tests(metafunc):
cpu_instance_type = "ml.m5.xlarge" if region in NO_M4_REGIONS else "ml.m4.xlarge"

params = [cpu_instance_type]
if not (
region in tests.integ.HOSTING_NO_P3_REGIONS
or region in tests.integ.TRAINING_NO_P3_REGIONS
):
params.append("ml.p3.2xlarge")
elif not (
region in tests.integ.HOSTING_NO_P2_REGIONS
or region in tests.integ.TRAINING_NO_P2_REGIONS
):
params.append("ml.p2.xlarge")
params.append("ml.g4dn.xlarge")

metafunc.parametrize("instance_type", params, scope="session")

Expand Down
Binary file not shown.
Binary file not shown.
12 changes: 7 additions & 5 deletions tests/integ/sagemaker/jumpstart/model/test_jumpstart_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,10 @@ def test_prepacked_jumpstart_model(setup):
sagemaker_session=get_sm_session(),
)

# uses ml.p3.2xlarge instance
# uses ml.g4dn.xlarge instance
predictor = model.deploy(
tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}],
instance_type="ml.g4dn.xlarge",
)

response = predictor.predict("hello world!")
Expand All @@ -120,7 +121,7 @@ def test_model_package_arn_jumpstart_model(setup):
sagemaker_session=get_sm_session(),
)

# uses ml.g5.2xlarge instance
# uses ml.g4dn.2xlarge instance
predictor = model.deploy(
tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}],
)
Expand Down Expand Up @@ -175,7 +176,7 @@ def test_jumpstart_gated_model(setup):
sagemaker_session=get_sm_session(),
)

# uses ml.g5.2xlarge instance
# uses ml.g4dn.2xlarge instance
predictor = model.deploy(
tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}],
accept_eula=True,
Expand Down Expand Up @@ -206,6 +207,7 @@ def test_jumpstart_gated_model_inference_component_enabled(setup):
tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}],
accept_eula=True,
endpoint_type=EndpointType.INFERENCE_COMPONENT_BASED,
instance_type="ml.g5.24xlarge",
)

predictor = retrieve_default(
Expand Down Expand Up @@ -262,7 +264,7 @@ def test_jumpstart_model_register(setup):

# uses instance
predictor = model_package.deploy(
instance_type="ml.p3.2xlarge",
instance_type="ml.g4dn.xlarge",
initial_instance_count=1,
)

Expand Down Expand Up @@ -379,7 +381,7 @@ def test_jumpstart_model_with_deployment_configs(setup):

model.set_deployment_config(
configs[0]["ConfigName"],
"ml.g5.2xlarge",
"ml.g4dn.2xlarge",
)
assert model.config_name == configs[0]["ConfigName"]

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def test_jumpstart_hub_estimator(setup, add_model_references):
model_id=model_id,
hub_name=os.environ[ENV_VAR_JUMPSTART_SDK_TEST_HUB_NAME],
tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}],
instance_type="ml.g4dn.xlarge",
)

estimator.fit(
Expand All @@ -85,9 +86,10 @@ def test_jumpstart_hub_estimator(setup, add_model_references):
model_version=model_version,
)

# uses ml.p3.2xlarge instance
# uses ml.g4dn.xlarge instance
predictor = estimator.deploy(
tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}],
instance_type="ml.g4dn.xlarge",
)

response = predictor.predict(["hello", "world"])
Expand All @@ -107,6 +109,7 @@ def test_jumpstart_hub_estimator_with_session(setup, add_model_references):
sagemaker_session=sagemaker_session,
tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}],
hub_name=os.environ[ENV_VAR_JUMPSTART_SDK_TEST_HUB_NAME],
instance_type="ml.g4dn.xlarge",
)

estimator.fit(
Expand All @@ -124,11 +127,12 @@ def test_jumpstart_hub_estimator_with_session(setup, add_model_references):
sagemaker_session=get_sm_session(),
)

# uses ml.p3.2xlarge instance
# uses ml.g4dn.xlarge instance
predictor = estimator.deploy(
tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}],
role=get_sm_session().get_caller_identity_arn(),
sagemaker_session=get_sm_session(),
instance_type="ml.g4dn.xlarge",
)

response = predictor.predict(["hello", "world"])
Expand All @@ -144,6 +148,7 @@ def test_jumpstart_hub_gated_estimator_with_eula(setup, add_model_references):
model_id=model_id,
hub_name=os.environ[ENV_VAR_JUMPSTART_SDK_TEST_HUB_NAME],
tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}],
instance_type="ml.g5.2xlarge",
)

estimator.fit(
Expand All @@ -158,6 +163,7 @@ def test_jumpstart_hub_gated_estimator_with_eula(setup, add_model_references):
tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}],
role=get_sm_session().get_caller_identity_arn(),
sagemaker_session=get_sm_session(),
instance_type="ml.g5.2xlarge",
)

payload = {
Expand All @@ -178,6 +184,7 @@ def test_jumpstart_hub_gated_estimator_without_eula(setup, add_model_references)
model_id=model_id,
hub_name=os.environ[ENV_VAR_JUMPSTART_SDK_TEST_HUB_NAME],
tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}],
instance_type="ml.g5.2xlarge",
)
with pytest.raises(Exception):
estimator.fit(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -116,6 +116,7 @@ def test_jumpstart_hub_gated_model(setup, add_model_references):
predictor = model.deploy(
accept_eula=True,
tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}],
instance_type="ml.g5.2xlarge",
)

payload = model.retrieve_example_payload()
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@
def test_jumpstart_transfer_learning_retrieve_functions(setup):

model_id, model_version = "huggingface-spc-bert-base-cased", "2.0.3"
training_instance_type = "ml.p3.2xlarge"
inference_instance_type = "ml.p2.xlarge"
training_instance_type = "ml.g4dn.xlarge"
inference_instance_type = "ml.g4dn.xlarge"

# training
print("Starting training...")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -53,12 +53,8 @@ def test_jumpstart_transfer_learning_estimator_class(setup):

model_id, model_version = "huggingface-spc-bert-base-cased", "1.2.3"

inference_instance_type = instance_types.retrieve_default(
model_id=model_id, model_version=model_version, scope="inference"
)
training_instance_type = instance_types.retrieve_default(
model_id=model_id, model_version=model_version, scope="training"
)
inference_instance_type = "ml.g4dn.xlarge"
training_instance_type = "ml.g4dn.xlarge"
instance_count = 1

print("Starting training...")
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -147,15 +147,14 @@ def test_single_container_local_mode_s3_data(modules_sagemaker_session):
finally:
subprocess.run(["docker", "compose", "down", "-v"])

assert not os.path.exists(os.path.join(CWD, "shared"))
assert not os.path.exists(os.path.join(CWD, "input"))
assert not os.path.exists(os.path.join(CWD, "algo-1"))

directories = [
"compressed_artifacts",
"artifacts",
"model",
"output",
"shared",
"input",
"algo-1",
]

for directory in directories:
Expand Down
10 changes: 5 additions & 5 deletions tests/integ/test_training_compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@

@pytest.fixture(scope="module")
def gpu_instance_type(request):
return "ml.p3.2xlarge"
return "ml.g4dn.xlarge"


@pytest.fixture(scope="module")
Expand Down Expand Up @@ -86,8 +86,8 @@ def skip_if_incompatible(gpu_instance_type, request):
@pytest.mark.parametrize(
"gpu_instance_type,instance_count",
[
pytest.param("ml.p3.2xlarge", 1, marks=pytest.mark.release),
pytest.param("ml.p3.16xlarge", 2),
pytest.param("ml.g4dn.xlarge", 1, marks=pytest.mark.release),
pytest.param("ml.g4dn.12xlarge", 2),
],
)
@pytest.mark.skipif(
Expand Down Expand Up @@ -138,8 +138,8 @@ def test_huggingface_pytorch(
@pytest.mark.parametrize(
"gpu_instance_type,instance_count",
[
pytest.param("ml.p3.2xlarge", 1, marks=pytest.mark.release),
pytest.param("ml.p3.16xlarge", 2),
pytest.param("ml.g4dn.xlarge", 1, marks=pytest.mark.release),
pytest.param("ml.g4dn.12xlarge", 2),
],
)
@pytest.mark.skip("Temporarily skip to unblock")
Expand Down
Loading