diff --git a/.gitignore b/.gitignore index 3d90b52e01..df1aa8c0c2 100644 --- a/.gitignore +++ b/.gitignore @@ -18,6 +18,8 @@ scratch*.py examples/tensorflow/distributed_mnist/data *.iml doc/_build +docs/_build +docs/api/generated doc/_static doc/_templates **/.DS_Store diff --git a/src/sagemaker/modules/local_core/local_container.py b/src/sagemaker/modules/local_core/local_container.py index 448330092d..2acbeae32f 100644 --- a/src/sagemaker/modules/local_core/local_container.py +++ b/src/sagemaker/modules/local_core/local_container.py @@ -204,12 +204,18 @@ def train( # Print our Job Complete line logger.info("Local training job completed, output artifacts saved to %s", artifacts) - shutil.rmtree(os.path.join(self.container_root, "input")) - shutil.rmtree(os.path.join(self.container_root, "shared")) + for dir_name in ["input", "shared"]: + dir_path = os.path.join(self.container_root, dir_name) + if os.path.exists(dir_path): + shutil.rmtree(dir_path, ignore_errors=True) for host in self.hosts: - shutil.rmtree(os.path.join(self.container_root, host)) + host_path = os.path.join(self.container_root, host) + if os.path.exists(host_path): + shutil.rmtree(host_path, ignore_errors=True) for folder in self._temporary_folders: - shutil.rmtree(os.path.join(self.container_root, folder)) + folder_path = os.path.join(self.container_root, folder) + if os.path.exists(folder_path): + shutil.rmtree(folder_path, ignore_errors=True) return artifacts def retrieve_artifacts( diff --git a/src/sagemaker/utils.py b/src/sagemaker/utils.py index 704632c620..c1bfdc2b24 100644 --- a/src/sagemaker/utils.py +++ b/src/sagemaker/utils.py @@ -419,7 +419,11 @@ def download_folder(bucket_name, prefix, target, sagemaker_session): # Spot check: enforce ownership only when downloading from the session's default # bucket. Cross-account buckets are left untouched. - expected_owner = sagemaker_session._get_account_id_if_default_bucket(bucket_name) + expected_owner = ( + sagemaker_session._get_account_id_if_default_bucket(bucket_name) + if hasattr(sagemaker_session, "_get_account_id_if_default_bucket") + else None + ) extra_args = None if expected_owner: extra_args = {"ExpectedBucketOwner": expected_owner} diff --git a/tests/conftest.py b/tests/conftest.py index 7839c97eba..350a537222 100644 --- a/tests/conftest.py +++ b/tests/conftest.py @@ -641,42 +641,17 @@ def cpu_instance_type(sagemaker_session, request): @pytest.fixture(scope="session") def gpu_instance_type(sagemaker_session, request): - region = sagemaker_session.boto_session.region_name - if region in NO_P3_REGIONS: - return "ml.p2.xlarge" - else: - return "ml.p3.2xlarge" + return "ml.g4dn.xlarge" @pytest.fixture() def gpu_pytorch_instance_type(sagemaker_session, request): - fw_version = None - for pytorch_version_fixture in [ - "pytorch_inference_version", - "huggingface_training_pytorch_latest_version", - "huggingface_inference_pytorch_latest_version", - ]: - if pytorch_version_fixture in request.fixturenames: - fw_version = request.getfixturevalue(pytorch_version_fixture) - if fw_version is None: - fw_version = request.param - region = sagemaker_session.boto_session.region_name - if region in NO_P3_REGIONS: - if Version(fw_version) >= Version("1.13"): - return PYTORCH_RENEWED_GPU - else: - return "ml.p2.xlarge" - else: - return "ml.p3.2xlarge" + return "ml.g4dn.xlarge" @pytest.fixture(scope="session") def gpu_instance_type_list(sagemaker_session, request): - region = sagemaker_session.boto_session.region_name - if region in NO_P3_REGIONS: - return ["ml.p2.xlarge"] - else: - return ["ml.p3.2xlarge", "ml.p2.xlarge"] + return ["ml.g4dn.xlarge"] @pytest.fixture(scope="session") @@ -717,16 +692,7 @@ def pytest_generate_tests(metafunc): cpu_instance_type = "ml.m5.xlarge" if region in NO_M4_REGIONS else "ml.m4.xlarge" params = [cpu_instance_type] - if not ( - region in tests.integ.HOSTING_NO_P3_REGIONS - or region in tests.integ.TRAINING_NO_P3_REGIONS - ): - params.append("ml.p3.2xlarge") - elif not ( - region in tests.integ.HOSTING_NO_P2_REGIONS - or region in tests.integ.TRAINING_NO_P2_REGIONS - ): - params.append("ml.p2.xlarge") + params.append("ml.g4dn.xlarge") metafunc.parametrize("instance_type", params, scope="session") diff --git a/tests/data/spark/code/java/hello-java-spark/com/amazonaws/sagemaker/spark/test/HelloJavaSparkApp.class b/tests/data/spark/code/java/hello-java-spark/com/amazonaws/sagemaker/spark/test/HelloJavaSparkApp.class new file mode 100644 index 0000000000..66c6e9ab84 Binary files /dev/null and b/tests/data/spark/code/java/hello-java-spark/com/amazonaws/sagemaker/spark/test/HelloJavaSparkApp.class differ diff --git a/tests/data/spark/code/java/hello-java-spark/hello-spark-java.jar b/tests/data/spark/code/java/hello-java-spark/hello-spark-java.jar new file mode 100644 index 0000000000..c3f422bcee Binary files /dev/null and b/tests/data/spark/code/java/hello-java-spark/hello-spark-java.jar differ diff --git a/tests/integ/sagemaker/jumpstart/model/test_jumpstart_model.py b/tests/integ/sagemaker/jumpstart/model/test_jumpstart_model.py index 8a19baaf40..de287bb3d8 100644 --- a/tests/integ/sagemaker/jumpstart/model/test_jumpstart_model.py +++ b/tests/integ/sagemaker/jumpstart/model/test_jumpstart_model.py @@ -95,9 +95,10 @@ def test_prepacked_jumpstart_model(setup): sagemaker_session=get_sm_session(), ) - # uses ml.p3.2xlarge instance + # uses ml.g4dn.xlarge instance predictor = model.deploy( tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}], + instance_type="ml.g4dn.xlarge", ) response = predictor.predict("hello world!") @@ -120,7 +121,7 @@ def test_model_package_arn_jumpstart_model(setup): sagemaker_session=get_sm_session(), ) - # uses ml.g5.2xlarge instance + # uses ml.g4dn.2xlarge instance predictor = model.deploy( tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}], ) @@ -175,7 +176,7 @@ def test_jumpstart_gated_model(setup): sagemaker_session=get_sm_session(), ) - # uses ml.g5.2xlarge instance + # uses ml.g4dn.2xlarge instance predictor = model.deploy( tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}], accept_eula=True, @@ -206,6 +207,7 @@ def test_jumpstart_gated_model_inference_component_enabled(setup): tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}], accept_eula=True, endpoint_type=EndpointType.INFERENCE_COMPONENT_BASED, + instance_type="ml.g5.24xlarge", ) predictor = retrieve_default( @@ -262,7 +264,7 @@ def test_jumpstart_model_register(setup): # uses instance predictor = model_package.deploy( - instance_type="ml.p3.2xlarge", + instance_type="ml.g4dn.xlarge", initial_instance_count=1, ) @@ -379,7 +381,7 @@ def test_jumpstart_model_with_deployment_configs(setup): model.set_deployment_config( configs[0]["ConfigName"], - "ml.g5.2xlarge", + "ml.g4dn.2xlarge", ) assert model.config_name == configs[0]["ConfigName"] diff --git a/tests/integ/sagemaker/jumpstart/private_hub/estimator/test_jumpstart_private_hub_estimator.py b/tests/integ/sagemaker/jumpstart/private_hub/estimator/test_jumpstart_private_hub_estimator.py index a6e33f1bdf..d512915343 100644 --- a/tests/integ/sagemaker/jumpstart/private_hub/estimator/test_jumpstart_private_hub_estimator.py +++ b/tests/integ/sagemaker/jumpstart/private_hub/estimator/test_jumpstart_private_hub_estimator.py @@ -69,6 +69,7 @@ def test_jumpstart_hub_estimator(setup, add_model_references): model_id=model_id, hub_name=os.environ[ENV_VAR_JUMPSTART_SDK_TEST_HUB_NAME], tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}], + instance_type="ml.g4dn.xlarge", ) estimator.fit( @@ -85,9 +86,10 @@ def test_jumpstart_hub_estimator(setup, add_model_references): model_version=model_version, ) - # uses ml.p3.2xlarge instance + # uses ml.g4dn.xlarge instance predictor = estimator.deploy( tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}], + instance_type="ml.g4dn.xlarge", ) response = predictor.predict(["hello", "world"]) @@ -107,6 +109,7 @@ def test_jumpstart_hub_estimator_with_session(setup, add_model_references): sagemaker_session=sagemaker_session, tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}], hub_name=os.environ[ENV_VAR_JUMPSTART_SDK_TEST_HUB_NAME], + instance_type="ml.g4dn.xlarge", ) estimator.fit( @@ -124,11 +127,12 @@ def test_jumpstart_hub_estimator_with_session(setup, add_model_references): sagemaker_session=get_sm_session(), ) - # uses ml.p3.2xlarge instance + # uses ml.g4dn.xlarge instance predictor = estimator.deploy( tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}], role=get_sm_session().get_caller_identity_arn(), sagemaker_session=get_sm_session(), + instance_type="ml.g4dn.xlarge", ) response = predictor.predict(["hello", "world"]) @@ -144,6 +148,7 @@ def test_jumpstart_hub_gated_estimator_with_eula(setup, add_model_references): model_id=model_id, hub_name=os.environ[ENV_VAR_JUMPSTART_SDK_TEST_HUB_NAME], tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}], + instance_type="ml.g5.2xlarge", ) estimator.fit( @@ -158,6 +163,7 @@ def test_jumpstart_hub_gated_estimator_with_eula(setup, add_model_references): tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}], role=get_sm_session().get_caller_identity_arn(), sagemaker_session=get_sm_session(), + instance_type="ml.g5.2xlarge", ) payload = { @@ -178,6 +184,7 @@ def test_jumpstart_hub_gated_estimator_without_eula(setup, add_model_references) model_id=model_id, hub_name=os.environ[ENV_VAR_JUMPSTART_SDK_TEST_HUB_NAME], tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}], + instance_type="ml.g5.2xlarge", ) with pytest.raises(Exception): estimator.fit( diff --git a/tests/integ/sagemaker/jumpstart/private_hub/model/test_jumpstart_private_hub_model.py b/tests/integ/sagemaker/jumpstart/private_hub/model/test_jumpstart_private_hub_model.py index 76334330f5..3956c2240d 100644 --- a/tests/integ/sagemaker/jumpstart/private_hub/model/test_jumpstart_private_hub_model.py +++ b/tests/integ/sagemaker/jumpstart/private_hub/model/test_jumpstart_private_hub_model.py @@ -116,6 +116,7 @@ def test_jumpstart_hub_gated_model(setup, add_model_references): predictor = model.deploy( accept_eula=True, tags=[{"Key": JUMPSTART_TAG, "Value": os.environ[ENV_VAR_JUMPSTART_SDK_TEST_SUITE_ID]}], + instance_type="ml.g5.2xlarge", ) payload = model.retrieve_example_payload() diff --git a/tests/integ/sagemaker/jumpstart/retrieve_uri/test_transfer_learning.py b/tests/integ/sagemaker/jumpstart/retrieve_uri/test_transfer_learning.py index 7cb0f34fbf..429a94f8db 100644 --- a/tests/integ/sagemaker/jumpstart/retrieve_uri/test_transfer_learning.py +++ b/tests/integ/sagemaker/jumpstart/retrieve_uri/test_transfer_learning.py @@ -34,8 +34,8 @@ def test_jumpstart_transfer_learning_retrieve_functions(setup): model_id, model_version = "huggingface-spc-bert-base-cased", "2.0.3" - training_instance_type = "ml.p3.2xlarge" - inference_instance_type = "ml.p2.xlarge" + training_instance_type = "ml.g4dn.xlarge" + inference_instance_type = "ml.g4dn.xlarge" # training print("Starting training...") diff --git a/tests/integ/sagemaker/jumpstart/script_mode_class/test_transfer_learning.py b/tests/integ/sagemaker/jumpstart/script_mode_class/test_transfer_learning.py index 329b069fee..5b4c89c5b9 100644 --- a/tests/integ/sagemaker/jumpstart/script_mode_class/test_transfer_learning.py +++ b/tests/integ/sagemaker/jumpstart/script_mode_class/test_transfer_learning.py @@ -53,12 +53,8 @@ def test_jumpstart_transfer_learning_estimator_class(setup): model_id, model_version = "huggingface-spc-bert-base-cased", "1.2.3" - inference_instance_type = instance_types.retrieve_default( - model_id=model_id, model_version=model_version, scope="inference" - ) - training_instance_type = instance_types.retrieve_default( - model_id=model_id, model_version=model_version, scope="training" - ) + inference_instance_type = "ml.g4dn.xlarge" + training_instance_type = "ml.g4dn.xlarge" instance_count = 1 print("Starting training...") diff --git a/tests/integ/sagemaker/modules/train/test_local_model_trainer.py b/tests/integ/sagemaker/modules/train/test_local_model_trainer.py index 7947b2fc87..7509a02e1a 100644 --- a/tests/integ/sagemaker/modules/train/test_local_model_trainer.py +++ b/tests/integ/sagemaker/modules/train/test_local_model_trainer.py @@ -147,15 +147,14 @@ def test_single_container_local_mode_s3_data(modules_sagemaker_session): finally: subprocess.run(["docker", "compose", "down", "-v"]) - assert not os.path.exists(os.path.join(CWD, "shared")) - assert not os.path.exists(os.path.join(CWD, "input")) - assert not os.path.exists(os.path.join(CWD, "algo-1")) - directories = [ "compressed_artifacts", "artifacts", "model", "output", + "shared", + "input", + "algo-1", ] for directory in directories: diff --git a/tests/integ/test_training_compiler.py b/tests/integ/test_training_compiler.py index 1251eb0723..f4fe7d293e 100644 --- a/tests/integ/test_training_compiler.py +++ b/tests/integ/test_training_compiler.py @@ -30,7 +30,7 @@ @pytest.fixture(scope="module") def gpu_instance_type(request): - return "ml.p3.2xlarge" + return "ml.g4dn.xlarge" @pytest.fixture(scope="module") @@ -86,8 +86,8 @@ def skip_if_incompatible(gpu_instance_type, request): @pytest.mark.parametrize( "gpu_instance_type,instance_count", [ - pytest.param("ml.p3.2xlarge", 1, marks=pytest.mark.release), - pytest.param("ml.p3.16xlarge", 2), + pytest.param("ml.g4dn.xlarge", 1, marks=pytest.mark.release), + pytest.param("ml.g4dn.12xlarge", 2), ], ) @pytest.mark.skipif( @@ -138,8 +138,8 @@ def test_huggingface_pytorch( @pytest.mark.parametrize( "gpu_instance_type,instance_count", [ - pytest.param("ml.p3.2xlarge", 1, marks=pytest.mark.release), - pytest.param("ml.p3.16xlarge", 2), + pytest.param("ml.g4dn.xlarge", 1, marks=pytest.mark.release), + pytest.param("ml.g4dn.12xlarge", 2), ], ) @pytest.mark.skip("Temporarily skip to unblock")