diff --git a/acceptance/bundle/deploy/mlops-stacks/out.test.toml b/acceptance/bundle/deploy/mlops-stacks/out.test.toml index 650836edeb3..bbc7fcfd1bd 100644 --- a/acceptance/bundle/deploy/mlops-stacks/out.test.toml +++ b/acceptance/bundle/deploy/mlops-stacks/out.test.toml @@ -1,3 +1,3 @@ -Local = false +Local = true Cloud = true EnvMatrix.DATABRICKS_BUNDLE_ENGINE = ["terraform", "direct"] diff --git a/acceptance/bundle/deploy/mlops-stacks/script b/acceptance/bundle/deploy/mlops-stacks/script index dec2a4ba473..a1c5a45f42a 100644 --- a/acceptance/bundle/deploy/mlops-stacks/script +++ b/acceptance/bundle/deploy/mlops-stacks/script @@ -20,7 +20,11 @@ cleanup() { } trap cleanup EXIT -trace $CLI bundle init mlops-stacks --config-file config.json +if [ -z "$CLOUD_ENV" ]; then + trace $CLI bundle init "$TESTDIR/template" --config-file config.json +else + trace $CLI bundle init mlops-stacks --config-file config.json +fi trace cat test_repo_mlops_stacks/README.md | head -n 4 cd "test_repo_mlops_stacks/project_name_${UNIQUE_NAME}" || exit 1 diff --git a/acceptance/bundle/deploy/mlops-stacks/template.REVISION b/acceptance/bundle/deploy/mlops-stacks/template.REVISION new file mode 100644 index 00000000000..78c0d6c75d1 --- /dev/null +++ b/acceptance/bundle/deploy/mlops-stacks/template.REVISION @@ -0,0 +1 @@ +1c87ae24d6c06050adc51185a5022ea58a8961f2 diff --git a/acceptance/bundle/deploy/mlops-stacks/template/databricks_template_schema.json b/acceptance/bundle/deploy/mlops-stacks/template/databricks_template_schema.json new file mode 100644 index 00000000000..69fc4797666 --- /dev/null +++ b/acceptance/bundle/deploy/mlops-stacks/template/databricks_template_schema.json @@ -0,0 +1,360 @@ +{ + "welcome_message": "Welcome to MLOps Stacks. For detailed information on project generation, see the README at https://github.com/databricks/mlops-stacks/blob/main/README.md.", + "min_databricks_cli_version": "v0.236.0", + "properties": { + "input_setup_cicd_and_project": { + "order": 1, + "type": "string", + "description": "{{if false}}\n\nERROR: This template is not supported by your current Databricks CLI version.\nPlease hit control-C and go to https://docs.databricks.com/en/dev-tools/cli/install.html for instructions on upgrading the CLI to the minimum version supported by MLOps Stacks.\n\n\n{{end}}\nSelect if both CI/CD and the Project should be set up, or only one of them.\nYou can always set up the other later by running initialization again", + "default": "CICD_and_Project", + "enum": ["CICD_and_Project", "Project_Only", "CICD_Only"] + }, + "input_project_name": { + "order": 2, + "type": "string", + "default": "my_mlops_project", + "description": "\nProject Name. Default", + "pattern": "^[^ .\\\\/]{3,}$", + "pattern_match_failure_message": "Project name must be at least 3 characters long and cannot contain the following characters: \"\\\", \"/\", \" \" and \".\".", + "skip_prompt_if": { + "properties": { + "input_setup_cicd_and_project": { + "const": "CICD_Only" + } + } + } + }, + "input_root_dir": { + "order": 3, + "type": "string", + "default": "{{ .input_project_name }}", + "description": "\nRoot directory name.\nFor monorepos, name of the root directory that contains all the projects.\nDefault", + "skip_prompt_if": { + "properties": { + "input_setup_cicd_and_project": { + "const": "CICD_and_Project" + } + } + } + }, + "input_cloud": { + "order": 4, + "type": "string", + "description": "\nSelect cloud", + "default": "azure", + "enum": ["azure", "aws", "gcp"] + }, + "input_cicd_platform": { + "order": 5, + "type": "string", + "description": "\nSelect CICD platform", + "default": "github_actions", + "enum": ["github_actions", "github_actions_for_github_enterprise_servers", "azure_devops", "gitlab"], + "skip_prompt_if": { + "properties": { + "input_setup_cicd_and_project": { + "const": "Project_Only" + } + } + } + }, + "input_databricks_staging_workspace_host": { + "order": 6, + "type": "string", + "default": "{{if eq .input_cloud `azure`}}https://adb-xxxx.xx.azuredatabricks.net{{else if eq .input_cloud `aws`}}https://your-staging-workspace.cloud.databricks.com{{else if eq .input_cloud `gcp`}}https://your-staging-workspace.gcp.databricks.com{{end}}", + "description": "\nURL of staging Databricks workspace,\nIt will run PR CI and preview changes before they're deployed to production.\nDefault", + "pattern": "^(https.*)?$", + "pattern_match_failure_message": "Databricks staging workspace host URLs must start with https. Got invalid workspace host.", + "skip_prompt_if": { + "properties": { + "input_setup_cicd_and_project": { + "const": "Project_Only" + } + } + } + }, + "input_databricks_prod_workspace_host": { + "order": 7, + "type": "string", + "default": "{{if eq .input_cloud `azure`}}https://adb-xxxx.xx.azuredatabricks.net{{else if eq .input_cloud `aws`}}https://your-prod-workspace.cloud.databricks.com{{else if eq .input_cloud `gcp`}}https://your-prod-workspace.gcp.databricks.com{{end}}", + "description": "\nURL of production Databricks workspace.\nDefault", + "pattern": "^(https.*)?$", + "pattern_match_failure_message": "Databricks production workspace host URLs must start with https. Got invalid workspace host.", + "skip_prompt_if": { + "properties": { + "input_setup_cicd_and_project": { + "const": "Project_Only" + } + } + } + }, + "input_default_branch": { + "order": 8, + "type": "string", + "default": "main", + "description": "\nName of the default branch,\nStaging resources are deployed from this branch and stages the latest ML code.\nDefault", + "skip_prompt_if": { + "properties": { + "input_setup_cicd_and_project": { + "const": "Project_Only" + } + } + } + }, + "input_release_branch": { + "order": 9, + "type": "string", + "default": "release", + "description": "\nName of the release branch.\nThe training and other production jobs pull ML code from this branch.\nDefault", + "skip_prompt_if": { + "properties": { + "input_setup_cicd_and_project": { + "const": "Project_Only" + } + } + } + }, + "input_read_user_group": { + "order": 10, + "type": "string", + "default": "users", + "description": "\nUser group name to give READ permissions to for project resources\n(ML jobs, integration test job runs, and machine learning resources).\nA group with this name must exist in both the staging and prod workspaces.\nDefault", + "skip_prompt_if": { + "properties": { + "input_setup_cicd_and_project": { + "const": "CICD_Only" + } + } + } + }, + "input_include_models_in_unity_catalog": { + "order": 11, + "type": "string", + "description": "\nWhether to use the Model Registry with Unity Catalog", + "default": "no", + "enum": ["yes", "no"] + }, + "input_staging_catalog_name": { + "order": 12, + "type": "string", + "description": "\nName of the catalog in Unity Catalog that will host the staging UC resources. \nThis catalog must already exist and service principals must have access to it.\nDefault", + "default": "staging", + "skip_prompt_if": { + "anyOf": [ + { + "properties": { + "input_include_models_in_unity_catalog": { + "const": "no" + } + } + }, + { + "properties": { + "input_setup_cicd_and_project": { + "const": "Project_Only" + } + } + } + ] + } + }, + "input_prod_catalog_name": { + "order": 13, + "type": "string", + "description": "\nName of the catalog in Unity Catalog that will host the production UC resources.\nThis catalog must already exist and service principals must have access to it.\nDefault", + "default": "prod", + "skip_prompt_if": { + "anyOf": [ + { + "properties": { + "input_include_models_in_unity_catalog": { + "const": "no" + } + } + }, + { + "properties": { + "input_setup_cicd_and_project": { + "const": "Project_Only" + } + } + } + ] + } + }, + "input_test_catalog_name": { + "order": 14, + "type": "string", + "description": "\nName of the catalog in Unity Catalog that will be used for integration tests.\nThis catalog must already exist and service principals must have access to it.\nDefault", + "default": "test", + "skip_prompt_if": { + "anyOf": [ + { + "properties": { + "input_include_models_in_unity_catalog": { + "const": "no" + } + } + }, + { + "properties": { + "input_setup_cicd_and_project": { + "const": "Project_Only" + } + } + } + ] + } + }, + "input_schema_name": { + "order": 15, + "type": "string", + "description": "\nName of schema to use when registering a model in Unity Catalog.\nThis schema must already exist and service principals must have access.\nWe recommend using the project name.\nDefault", + "default": "{{if (eq .input_include_models_in_unity_catalog `no`)}}schema{{else}}{{ .input_project_name }}{{end}}", + "pattern": "^[^ .\\-\\/]*$", + "pattern_match_failure_message": "Valid schema names cannot contain any of the following characters: \" \", \".\", \"-\", \"\\\", \"/\"", + "skip_prompt_if": { + "anyOf":[ + { + "properties": { + "input_include_models_in_unity_catalog": { + "const": "no" + } + } + }, + { + "properties": { + "input_setup_cicd_and_project": { + "const": "CICD_Only" + } + } + } + ] + } + }, + "input_unity_catalog_read_user_group": { + "order": 16, + "type": "string", + "default": "account users", + "description": "\nUser group name to give EXECUTE privileges to models in Unity Catalog (UC).\nIt must exist in UC with access granted to the staging and prod workspaces.\nDefault", + "skip_prompt_if": { + "anyOf":[ + { + "properties": { + "input_include_models_in_unity_catalog": { + "const": "no" + } + } + }, + { + "properties": { + "input_setup_cicd_and_project": { + "const": "CICD_Only" + } + } + } + ] + } + }, + "input_inference_table_name": { + "order": 17, + "type": "string", + "description": "\nFully qualified name of inference table to attach monitoring to.\nThis table must already exist and service principals must have access.", + "default": "dev.{{ .input_project_name }}.predictions", + "pattern": "^[^ .\\-\\/]+(\\.[^ .\\-\\/]+){2}$", + "pattern_match_failure_message": "Fully qualified Unity Catalog table names must have catalog, schema, and table separated by \".\" and each cannot contain any of the following characters: \" \", \".\", \"-\", \"\\\", \"/\"", + "skip_prompt_if": { + "properties": { + "input_setup_cicd_and_project": { + "const": "CICD_Only" + } + } + } + }, + "input_include_feature_store": { + "order": 18, + "type": "string", + "description": "\nWhether to include Feature Store", + "default": "no", + "enum": ["no", "yes"], + "skip_prompt_if": { + "properties": { + "input_setup_cicd_and_project": { + "const": "CICD_Only" + } + } + } + }, + "input_include_mlflow_recipes": { + "order": 19, + "type": "string", + "description": "\nWhether to include MLflow Recipes", + "default": "no", + "enum": ["no", "yes"], + "skip_prompt_if": { + "anyOf":[ + { + "properties": { + "input_include_models_in_unity_catalog": { + "const": "yes" + } + } + }, + { + "properties": { + "input_include_feature_store": { + "const": "yes" + } + } + }, + { + "properties": { + "input_setup_cicd_and_project": { + "const": "CICD_Only" + } + } + } + ] + } + }, + "input_docker_image": { + "order": 20, + "type": "string", + "description": "\nDocker image for the execution of Gitlab pipelines", + "default": "databricksfieldeng/mlopsstacks:latest", + "skip_prompt_if": { + "anyOf":[ + { + "properties": { + "input_cicd_platform": { + "const": "github_actions" + } + } + }, + { + "properties": { + "input_cicd_platform": { + "const": "github_actions_for_github_enterprise_servers" + } + } + }, + { + "properties": { + "input_cicd_platform": { + "const": "azure_devops" + } + } + }, + { + "properties": { + "input_setup_cicd_and_project": { + "const": "Project_Only" + } + } + } + ] + } + } + }, + "success_message" : "\n*** Your MLOps Stack has been created in the '{{.input_root_dir}}{{if not (eq .input_setup_cicd_and_project `CICD_Only`) }}/{{.input_project_name}}{{end}}' directory! ***\n\nPlease refer to the README.md for further instructions on getting started." +} diff --git a/acceptance/bundle/deploy/mlops-stacks/template/library/functions.tmpl b/acceptance/bundle/deploy/mlops-stacks/template/library/functions.tmpl new file mode 100644 index 00000000000..9d66c9e9ff4 --- /dev/null +++ b/acceptance/bundle/deploy/mlops-stacks/template/library/functions.tmpl @@ -0,0 +1,26 @@ +# define functions +{{ define "get_host" -}} + {{- with url . -}} + {{- print .Scheme `://` .Host -}} + {{- end -}} +{{- end }} + +{{ define "generate_doc_link" -}} + {{- if eq .cloud `aws` -}} + {{- if eq .path `repos/git-operations-with-repos#add-a-repo-and-connect-remotely-later` -}} + https://docs.databricks.com/repos/git-operations-with-repos.html#add-a-repo-connected-to-a-remote-repo + {{- else -}} + https://docs.databricks.com/{{ print .path }} + {{- end -}} + {{- else if eq .cloud `gcp` -}} + {{- if eq .path `repos/git-operations-with-repos#add-a-repo-and-connect-remotely-later` -}} + https://docs.gcp.databricks.com/en/repos/git-operations-with-repos.html#clone-a-repo-connected-to-a-remote-repo + {{- else -}} + https://docs.gcp.databricks.com/{{ print .path }} + {{- end -}} + {{- else if eq .cloud `azure` -}} + https://learn.microsoft.com/azure/databricks/{{ (regexp `\.html`).ReplaceAllString (print .path) `` }} + {{- else -}} + {{ fail `Invalid selection of cloud in function generate_doc_link. Please choose from [azure, aws, gcp]` }} + {{- end -}} +{{- end }} diff --git a/acceptance/bundle/deploy/mlops-stacks/template/library/input_validation.tmpl b/acceptance/bundle/deploy/mlops-stacks/template/library/input_validation.tmpl new file mode 100644 index 00000000000..2f54565f956 --- /dev/null +++ b/acceptance/bundle/deploy/mlops-stacks/template/library/input_validation.tmpl @@ -0,0 +1,2 @@ +{{ define `validation` }} +{{- end -}} diff --git a/acceptance/bundle/deploy/mlops-stacks/template/library/template_variables.tmpl b/acceptance/bundle/deploy/mlops-stacks/template/library/template_variables.tmpl new file mode 100644 index 00000000000..805f267d674 --- /dev/null +++ b/acceptance/bundle/deploy/mlops-stacks/template/library/template_variables.tmpl @@ -0,0 +1,64 @@ +# define template variables +{{ define `databricks_prod_workspace_host` -}} + {{- if not (eq (print .input_databricks_prod_workspace_host) "") -}} + {{- with url (print .input_databricks_prod_workspace_host) -}} + {{ print .Scheme `://` .Host }} + {{- end -}} + {{- else -}} + {{- if eq .input_cloud `azure` -}} + https://adb-xxxx.xx.azuredatabricks.net + {{- else if eq .input_cloud `aws` -}} + https://your-prod-workspace.cloud.databricks.com + {{- else if eq .input_cloud `gcp` -}} + https://your-prod-workspace.gcp.databricks.com + {{- end -}} + {{- end -}} +{{- end }} + +{{ define `databricks_staging_workspace_host` -}} + {{- if not (eq (print .input_databricks_staging_workspace_host) "") -}} + {{- with url (print .input_databricks_staging_workspace_host) -}} + {{- print .Scheme `://` .Host -}} + {{- end -}} + {{- else -}} + {{- if eq .input_cloud `azure` -}} + https://adb-xxxx.xx.azuredatabricks.net + {{- else if eq .input_cloud `aws` -}} + https://your-staging-workspace.cloud.databricks.com + {{- else if eq .input_cloud `gcp` -}} + https://your-staging-workspace.gcp.databricks.com + {{- end -}} + {{- end -}} +{{- end }} + +{{ define `cloud_specific_node_type_id` -}} + {{- if (eq .input_cloud `aws`) -}} + i3.xlarge + {{- else if (eq .input_cloud `azure`) -}} + Standard_D3_v2 + {{- else if (eq .input_cloud `gcp`) -}} + n2-highmem-4 + {{- else -}} + {{ fail `Unknown cloud platform` }} + {{- end -}} +{{- end -}} + +{{ define `model_name` -}} + {{ .input_project_name }}-model +{{- end }} + +{{ define `experiment_base_name` -}} + {{ .input_project_name }}-experiment +{{- end }} + +{{ define `project_name_alphanumeric_underscore` -}} + {{- (regexp `-`).ReplaceAllString ((regexp `[^A-Za-z0-9_-]`).ReplaceAllString (print .input_project_name) ``) `_` -}} +{{- end }} + +{{ define `cli_version` -}} + v0.236.0 +{{- end }} + +{{ define `stacks_version` -}} + 0.4 +{{- end }} diff --git a/acceptance/bundle/deploy/mlops-stacks/template/template/run_validations.tmpl b/acceptance/bundle/deploy/mlops-stacks/template/template/run_validations.tmpl new file mode 100644 index 00000000000..45a82b8eb1b --- /dev/null +++ b/acceptance/bundle/deploy/mlops-stacks/template/template/run_validations.tmpl @@ -0,0 +1 @@ +run validation{{ template `validation` . }} diff --git a/acceptance/bundle/deploy/mlops-stacks/template/template/update_layout.tmpl b/acceptance/bundle/deploy/mlops-stacks/template/template/update_layout.tmpl new file mode 100644 index 00000000000..a666e23c9b1 --- /dev/null +++ b/acceptance/bundle/deploy/mlops-stacks/template/template/update_layout.tmpl @@ -0,0 +1,91 @@ +# Remove unrelated CICD platform files +{{ $project_name_alphanumeric_underscore := (regexp `-`).ReplaceAllString ((regexp `[^A-Za-z0-9_-]`).ReplaceAllString .input_project_name ``) `_` -}} +{{ $root_dir := .input_root_dir}} + +{{ if (eq .input_setup_cicd_and_project `Project_Only`) }} + {{ skip (printf `%s/%s` $root_dir `.azure`) }} + {{ skip (printf `%s/%s` $root_dir `.github`) }} + {{ skip (printf `%s/%s` $root_dir `docs`) }} + {{ skip (printf `%s/%s` $root_dir `.gitignore`) }} + {{ skip (printf `%s/%s` $root_dir `README.md`) }} + {{ skip (printf `%s/%s` $root_dir `test-requirements.txt`) }} + {{ skip (printf `%s/%s` $root_dir `cicd.tar.gz`) }} + {{ skip (printf `%s/%s` $root_dir `cicd_params.json`) }} +{{ else if (eq .input_setup_cicd_and_project `CICD_Only`) }} + {{ skip (printf `%s/%s` $root_dir $project_name_alphanumeric_underscore) }} + {{ skip (printf `%s/%s/%s-*` $root_dir `.github/workflows` .input_project_name) }} + {{ skip (printf `%s/%s/%s-*` $root_dir `.azure/devops-pipelines` .input_project_name) }} +{{ end }} + +{{ if (or (eq .input_cicd_platform `github_actions`) (eq .input_cicd_platform `github_actions_for_github_enterprise_servers`)) }} + {{ skip (printf `%s/%s` $root_dir `.azure`) }} + {{ skip (printf `%s/%s` $root_dir `.gitlab`) }} +{{ else if (eq .input_cicd_platform `azure_devops`) }} + {{ skip (printf `%s/%s` $root_dir `.github`) }} + {{ skip (printf `%s/%s` $root_dir `.gitlab`) }} +{{ else if (eq .input_cicd_platform `gitlab`) }} + {{ skip (printf `%s/%s` $root_dir `.github`) }} + {{ skip (printf `%s/%s` $root_dir `.azure`) }} +{{ end }} + +{{ if (not (eq .input_project_name `27896cf3-bb3e-476e-8129-96df0406d5c7`)) }} + {{ skip (printf `%s/%s` $root_dir `_params_testing_only.txt`) }} +{{ end }} + +# Remove Delta and Feature Store code in cases of MLflow Recipes. +{{ if (eq .input_include_mlflow_recipes `yes`) }} + # delta_paths + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `training/notebooks/Train.py`) }} + # feature_store_paths + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `feature_engineering`) }} + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `tests/feature_engineering`) }} + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `training/notebooks/TrainWithFeatureStore.py`) }} + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `resources/feature-engineering-workflow-resource.yml`) }} +# Remove Delta and MLflow Recipes code in cases of Feature Store. +{{ else if (eq .input_include_feature_store `yes`) }} + # delta_paths + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `training/notebooks/Train.py`) }} + # recipe_paths + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `training/profiles`) }} + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `training/steps`) }} + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `training/data`) }} + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `training/__init__.py`) }} + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `training/notebooks/TrainWithMLflowRecipes.py`) }} + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `training/recipe.yaml`) }} + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `training/README.md`) }} + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `tests/training/ingest_test.py`) }} + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `tests/training/split_test.py`) }} + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `tests/training/train_test.py`) }} + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `tests/training/test_sample.parquet`) }} + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `tests/training/transform_test.py`) }} +# Remove MLflow Recipes and Feature Store code in cases of Delta Table. +{{ else }} + # recipe_paths + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `training/profiles`) }} + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `training/steps`) }} + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `training/data`) }} + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `training/__init__.py`) }} + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `training/notebooks/TrainWithMLflowRecipes.py`) }} + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `training/recipe.yaml`) }} + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `training/README.md`) }} + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `tests/training/ingest_test.py`) }} + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `tests/training/split_test.py`) }} + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `tests/training/train_test.py`) }} + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `tests/training/test_sample.parquet`) }} + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `tests/training/transform_test.py`) }} + # feature_store_paths + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `feature_engineering`) }} + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `tests/feature_engineering`) }} + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `training/notebooks/TrainWithFeatureStore.py`) }} + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `resources/feature-engineering-workflow-resource.yml`) }} +{{ end }} + +# Remove utils if using Models in Unity Catalog +{{ if (eq .input_include_models_in_unity_catalog `yes`) }} + {{ skip (printf `%s/%s/%s` $root_dir $project_name_alphanumeric_underscore `utils.py`) }} +{{ end }} + +# Remove template files +{{ skip (printf `%s/%s` $root_dir `cicd`) }} +{{ skip `update_layout` }} +{{ skip `run_validations` }} diff --git a/acceptance/bundle/deploy/mlops-stacks/template/template/{{.input_root_dir}}/README.md.tmpl b/acceptance/bundle/deploy/mlops-stacks/template/template/{{.input_root_dir}}/README.md.tmpl new file mode 100644 index 00000000000..9894e69faf9 --- /dev/null +++ b/acceptance/bundle/deploy/mlops-stacks/template/template/{{.input_root_dir}}/README.md.tmpl @@ -0,0 +1,198 @@ +# {{ .input_root_dir }} + +This directory contains an ML project based on the default +[Databricks MLOps Stacks](https://github.com/databricks/mlops-stacks), +defining a production-grade ML pipeline for automated retraining and batch inference of an ML model on tabular data. +The "Getting Started" docs can be found at {{ template `generate_doc_link` (map (pair "cloud" .input_cloud) (pair "path" "dev-tools/bundles/mlops-stacks.html")) }}. + +See the full pipeline structure below. The [MLOps Stacks README](https://github.com/databricks/mlops-stacks/blob/main/Pipeline.md) +contains additional details on how ML pipelines are tested and deployed across each of the dev, staging, prod environments below. + +![MLOps Stacks diagram](docs/images/mlops-stack-summary.png) + + +## Code structure +This project contains the following components: + +| Component | Description | +|----------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------| +{{- if (eq .input_setup_cicd_and_project `CICD_and_Project`)}} +| ML Code | Example ML project code, with unit tested Python modules and notebooks | +| ML Resources as Code | ML pipeline resources (training and batch inference jobs with schedules, etc) configured and deployed through [databricks CLI bundles]({{ template `generate_doc_link` (map (pair "cloud" .input_cloud) (pair "path" "dev-tools/cli/bundle-cli.html")) }}) | +{{- end }} +| CI/CD | {{ if (eq .input_cicd_platform `github_actions`) }}[GitHub Actions](https://github.com/actions) workflows to test and deploy ML code and resources + {{ else if (eq .input_cicd_platform `azure_devops`) }}[Azure DevOps Pipelines](https://learn.microsoft.com/en-us/azure/devops/pipelines) to test and deploy ML code and resources + {{ else if (eq .input_cicd_platform `gitlab`) }}[Gitlab Pipelines](https://docs.gitlab.com/ee/ci/pipelines/) to test and deploy ML code and resources + {{ end }} | + +contained in the following files: + +``` +{{ .input_root_dir }} <- Root directory. Both monorepo and polyrepo are supported. +│ +{{- if (eq .input_setup_cicd_and_project `CICD_and_Project`)}} +├── {{template `project_name_alphanumeric_underscore` .}} <- Contains python code, notebooks and ML resources related to one ML project. +│ │ +│ ├── requirements.txt <- Specifies Python dependencies for ML code (for example: model training, batch inference). +│ │ +│ ├── databricks.yml <- databricks.yml is the root bundle file for the ML project that can be loaded by databricks CLI bundles. It defines the bundle name, workspace URL and resource config component to be included. +│ │ +{{- if and (eq .input_include_feature_store `no`) (eq .input_include_mlflow_recipes `no`) }} +│ ├── training <- Training folder contains Notebook that trains and registers the model. +│ │ +│ ├── validation <- Optional model validation step before deploying a model. +│ │ +│ ├── monitoring <- Model monitoring, feature monitoring, etc. +│ │ +│ ├── deployment <- Deployment and Batch inference workflows +│ │ │ +│ │ ├── batch_inference <- Batch inference code that will run as part of scheduled workflow. +│ │ │ +│ │ ├── model_deployment <- As part of CD workflow, deploy the registered model by assigning it the appropriate alias. +│ │ +│ │ +│ ├── tests <- Unit tests for the ML project, including the modules under `features`. +│ │ +│ ├── resources <- ML resource (ML jobs, MLflow models) config definitions expressed as code, across dev/staging/prod/test. +│ │ +│ ├── model-workflow-resource.yml <- ML resource config definition for model training, validation, deployment workflow +│ │ +│ ├── batch-inference-workflow-resource.yml <- ML resource config definition for batch inference workflow +│ │ +│ ├── ml-artifacts-resource.yml <- ML resource config definition for model and experiment +│ │ +│ ├── monitoring-resource.yml <- ML resource config definition for quality monitoring workflow +{{- else if (eq .input_include_feature_store `yes`) }} +│ ├── training <- Training folder contains Notebook that trains and registers the model with feature store support. +│ │ +│ ├── feature_engineering <- Feature computation code (Python modules) that implements the feature transforms. +│ │ The output of these transforms get persisted as Feature Store tables. Most development +│ │ work happens here. +│ │ +│ ├── validation <- Optional model validation step before deploying a model. +│ │ +│ ├── monitoring <- Model monitoring, feature monitoring, etc. +│ │ +│ ├── deployment <- Deployment and Batch inference workflows +│ │ │ +│ │ ├── batch_inference <- Batch inference code that will run as part of scheduled workflow. +│ │ │ +│ │ ├── model_deployment <- As part of CD workflow, deploy the registered model by assigning it the appropriate alias. +│ │ +│ │ +│ ├── tests <- Unit tests for the ML project, including the modules under `features`. +│ │ +│ ├── resources <- ML resource (ML jobs, MLflow models) config definitions expressed as code, across dev/staging/prod/test. +│ │ +│ ├── model-workflow-resource.yml <- ML resource config definition for model training, validation, deployment workflow +│ │ +│ ├── batch-inference-workflow-resource.yml <- ML resource config definition for batch inference workflow +│ │ +│ ├── feature-engineering-workflow-resource.yml <- ML resource config definition for feature engineering workflow +│ │ +│ ├── ml-artifacts-resource.yml <- ML resource config definition for model and experiment +│ │ +│ ├── monitoring-resource.yml <- ML resource config definition for quality monitoring workflow +{{- else }} +│ ├── training <- Folder for model development via MLflow recipes. +│ │ │ +│ │ ├── steps <- MLflow recipe steps (Python modules) implementing ML pipeline logic, e.g. model training and evaluation. Most +│ │ │ development work happens here. See https://mlflow.org/docs/latest/recipes.html for details +│ │ │ +│ │ ├── notebooks <- Databricks notebook that runs the MLflow recipe, i.e. run the logic in `steps`. Used to +│ │ │ drive code execution on Databricks for CI/CD. In most cases, you do not need to modify +│ │ │ the notebook. +│ │ │ +│ │ ├── recipe.yaml <- The main recipe configuration file that declaratively defines the attributes and behavior +│ │ │ of each recipe step, such as the input dataset to use for training a model or the +│ │ │ performance criteria for promoting a model to production. +│ │ │ +│ │ ├── profiles <- Environment-specific (e.g. dev vs test vs prod) configurations for MLflow recipes execution. +│ │ +│ │ +│ ├── validation <- Optional model validation step before deploying a model. +│ │ +│ ├── monitoring <- Model monitoring, feature monitoring, etc. +│ │ +│ ├── deployment <- Model deployment and endpoint deployment. +│ │ │ +│ │ ├── batch_inference <- Batch inference code that will run as part of scheduled workflow. +│ │ │ +│ │ ├── model_deployment <- As part of CD workflow, promote model to Production stage in model registry. +│ │ +│ ├── tests <- Unit tests for the ML project, including modules under `steps`. +│ │ +│ ├── resources <- ML resource (ML jobs, MLflow models) config definitions expressed as code, across dev/staging/prod/test. +│ │ +│ ├── model-workflow-resource.yml <- ML resource config definition for model training, validation, deployment workflow +│ │ +│ ├── batch-inference-workflow-resource.yml <- ML resource config definition for batch inference workflow +│ │ +│ ├── ml-artifacts-resource.yml <- ML resource config definition for model and experiment +│ │ +│ ├── monitoring-resource.yml <- ML resource config definition for quality monitoring workflow +{{- end }} +{{- end }} +│ +{{- if or (eq .input_cicd_platform `github_actions`) (eq .input_cicd_platform `github_actions_for_github_enterprise_servers`) }} +├── .github <- Configuration folder for CI/CD using GitHub Actions. {{ if (eq .input_setup_cicd_and_project `CICD_and_Project`)}} The CI/CD workflows deploy ML resources defined in the `./resources/*` folder with databricks CLI bundles.{{ end }} +{{- else if (eq .input_cicd_platform `azure_devops`) }} +├── .azure <- Configuration folder for CI/CD using Azure DevOps Pipelines. {{ if (eq .input_setup_cicd_and_project `CICD_and_Project`)}} The CI/CD workflows deploy ML resources defined in the `./resources/*` folder with databricks CLI bundles.{{ end }} +{{- else if (eq .input_cicd_platform `gitlab`) }} +├── .gitlab <- Configuration folder for CI/CD using Gitlab Pipelines. {{ if (eq .input_setup_cicd_and_project `CICD_and_Project`)}} The CI/CD workflows deploy ML resources defined in the `./resources/*` folder with databricks CLI bundles.{{ end }} +{{- end }} +│ +├── docs <- Contains documentation for the repo. +│ +├── cicd.tar.gz <- Contains CI/CD bundle that should be deployed by deploy-cicd.yml to set up CI/CD for projects. +``` + +## Using this repo + +The table below links to detailed docs explaining how to use this repo for different use cases. + +{{ if (eq .input_setup_cicd_and_project `CICD_and_Project`)}} +This project comes with example ML code to train, validate and deploy a regression model to predict NYC taxi fares. +If you're a data scientist just getting started with this repo for a brand new ML project, we recommend +adapting the provided example code to your ML problem. Then making and +testing ML code changes on Databricks or your local machine. Follow the instructions from +the [project README](./{{template `project_name_alphanumeric_underscore` .}}/README.md). +{{ end }} + +When you're ready to deploy production training/inference +pipelines, ask your ops team to follow the [MLOps setup guide](docs/mlops-setup.md) to configure CI/CD and deploy +production ML pipelines. + +After that, follow the [ML pull request guide](docs/ml-pull-request.md) +{{ if (eq .input_setup_cicd_and_project `CICD_and_Project`)}} and [ML resource config guide]({{template `project_name_alphanumeric_underscore` .}}/resources/README.md) {{ end }} to propose, test, and deploy changes to production ML code (e.g. update model parameters) +or pipeline resources (e.g. use a larger instance type for model training) via pull request. + +| Role | Goal | Docs | +|-------------------------------|------------------------------------------------------------------------------|---------------------------------------------------------------------------------------------------------------------------------------------------------------------| +| Data Scientist | Get started writing ML code for a brand new project | {{ if (eq .input_setup_cicd_and_project `CICD_and_Project`)}}[project README](./{{template `project_name_alphanumeric_underscore` .}}/README.md) {{ else }} README when project is initialized {{ end }}| +| MLOps / DevOps | Set up CI/CD for the current ML project | [MLOps setup guide](docs/mlops-setup.md) | +| Data Scientist | Update production ML code (e.g. model training logic) for an existing project | [ML pull request guide](docs/ml-pull-request.md) | +| Data Scientist | Modify production model ML resources, e.g. model training or inference jobs | {{ if (eq .input_setup_cicd_and_project `CICD_and_Project`)}}[ML resource config guide]({{template `project_name_alphanumeric_underscore` .}}/resources/README.md) {{ else }} ML resources README when project is initialized {{ end }} | + +## Setting up CI/CD +This stack comes with a workflow to set up CI/CD for projects that can be found in +{{ if (or (eq .input_cicd_platform `github_actions`) (eq .input_cicd_platform `github_actions_for_github_enterprise_servers`)) }} +`.github/workflows/deploy-cicd.yml`. +{{ else if (eq .input_cicd_platform `azure_devops`)}} +`.azure/devops-pipelines/deploy-cicd.yml`. +{{ else if (eq .input_cicd_platform `gitlab`)}} +`.gitlab/pipelines/{{.input_project_name}}-triggers-cicd.yml`. +{{ end }} + +To set up CI/CD for projects that were created through MLOps Stacks with the `Project_Only` parameter, +run the above mentioned workflow, specifying the `project_name` as a parameter. For example, for the monorepo case: + +1. Setup your repository by initializing MLOps Stacks via Databricks CLI with the `CICD_and_Project` or `CICD_Only` parameter. +2. Follow the [MLOps Setup Guide](./docs/mlops-setup.md) to setup authentication and get the repo ready for CI/CD. +3. Create a new project by initializing MLOps Stacks again but this time with the `Project_Only` parameter. +4. Run the `deploy-cicd.yml` workflow with the `project_name` parameter set to the name of the project you want to set up CI/CD for. + +{{ if (eq .input_setup_cicd_and_project `CICD_and_Project`)}} +NOTE: This project has already been initialized with an instantiation of the above workflow, so there's no +need to run it again for project `{{.input_project_name}}`. +{{ end -}} diff --git a/acceptance/bundle/deploy/mlops-stacks/template/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/databricks.yml.tmpl b/acceptance/bundle/deploy/mlops-stacks/template/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/databricks.yml.tmpl new file mode 100644 index 00000000000..de207874329 --- /dev/null +++ b/acceptance/bundle/deploy/mlops-stacks/template/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/databricks.yml.tmpl @@ -0,0 +1,70 @@ +# The name of the bundle. run `databricks bundle schema` to see the full bundle settings schema. +bundle: + # Do not modify the below line, this autogenerated field is used by the Databricks backend. + uuid: {{ bundle_uuid }} + + name: {{ .input_project_name }} + +variables: + experiment_name: + description: Experiment name for the model training. + default: /Users/${workspace.current_user.userName}/${bundle.target}-{{template `experiment_base_name` .}} + model_name: + description: Model name for the model training. + {{ if (eq .input_include_models_in_unity_catalog `no`) }}default: ${bundle.target}-{{template `model_name` .}} + {{- else -}}default: {{template `model_name` .}}{{end}} + {{ if (eq .input_include_models_in_unity_catalog `yes`) }}catalog_name: + description: The catalog name to save the trained model{{end}} + +include: + # Resources folder contains ML artifact resources for the ML project that defines model and experiment + # And workflows resources for the ML project including model training -> validation -> deployment, + # {{- if (eq .input_include_feature_store `yes`) }} feature engineering, {{ end }} batch inference, quality monitoring, metric refresh, alerts and triggering retraining + - ./resources/batch-inference-workflow-resource.yml + - ./resources/ml-artifacts-resource.yml + - ./resources/model-workflow-resource.yml + {{- if (eq .input_include_feature_store `yes`) }} + - ./resources/feature-engineering-workflow-resource.yml + {{- end }} + # TODO: uncomment once monitoring inference table has been created + # - ./resources/monitoring-resource.yml + +# Deployment Target specific values for workspace +targets: + dev: {{ if (eq .input_include_models_in_unity_catalog `yes`)}} # UC Catalog Name {{ end }} + mode: development + default: true + {{- if (eq .input_include_models_in_unity_catalog `yes`)}} + variables: + catalog_name: dev + {{- end}} + workspace: + # TODO: add dev workspace URL + host: + +{{- if (eq .input_setup_cicd_and_project `CICD_and_Project`)}} + + staging: + {{- if (eq .input_include_models_in_unity_catalog `yes`) }} + variables: + catalog_name: {{ .input_staging_catalog_name }} + {{- end}} + workspace: + host: {{template `databricks_staging_workspace_host` .}} + + prod: + {{- if (eq .input_include_models_in_unity_catalog `yes`)}} + variables: + catalog_name: {{ .input_prod_catalog_name }} + {{- end}} + workspace: + host: {{template `databricks_prod_workspace_host` .}} + + test: + {{- if (eq .input_include_models_in_unity_catalog `yes`)}} + variables: + catalog_name: {{ .input_test_catalog_name }} + {{- end}} + workspace: + host: {{template `databricks_staging_workspace_host` .}} +{{ end }} \ No newline at end of file diff --git a/acceptance/bundle/deploy/mlops-stacks/template/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/deployment/batch_inference/notebooks/BatchInference.py.tmpl b/acceptance/bundle/deploy/mlops-stacks/template/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/deployment/batch_inference/notebooks/BatchInference.py.tmpl new file mode 100644 index 00000000000..99bff2b99a5 --- /dev/null +++ b/acceptance/bundle/deploy/mlops-stacks/template/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/deployment/batch_inference/notebooks/BatchInference.py.tmpl @@ -0,0 +1,111 @@ +# Databricks notebook source +################################################################################## +# Batch Inference Notebook +# +# This notebook is an example of applying a model for batch inference against an input delta table, +# It is configured and can be executed as the batch_inference_job in the batch_inference_job workflow defined under +# ``{{template `project_name_alphanumeric_underscore` .}}/resources/batch-inference-workflow-resource.yml`` +# +# Parameters: +# +# * env (optional) - String name of the current environment (dev, staging, or prod). Defaults to "dev" +# * input_table_name (required) - Delta table name containing your input data. +# * output_table_name (required) - Delta table name where the predictions will be written to. +# Note that this will create a new version of the Delta table if +# the table already exists +# * model_name (required) - The name of the model to be used in batch inference. +################################################################################## + + +# List of input args needed to run the notebook as a job. +# Provide them via DB widgets or notebook arguments. +# +# Name of the current environment +dbutils.widgets.dropdown("env", "dev", ["dev", "staging", "prod"], "Environment Name") +# A Hive-registered Delta table containing the input features. +dbutils.widgets.text("input_table_name", "", label="Input Table Name") +# Delta table to store the output predictions. +dbutils.widgets.text("output_table_name", "", label="Output Table Name") +{{- if (eq .input_include_models_in_unity_catalog "no") }} +# Batch inference model name +dbutils.widgets.text( + "model_name", "dev-{{template `model_name` .}}", label="Model Name" +) +{{else}} +# Unity Catalog registered model name to use for the trained mode. +dbutils.widgets.text( + "model_name", "dev.{{ .input_schema_name }}.{{template `model_name` .}}", label="Full (Three-Level) Model Name" +){{end}} + +# COMMAND ---------- + +import os + +notebook_path = '/Workspace/' + os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) +%cd $notebook_path + +# COMMAND ---------- + +# MAGIC %pip install -r ../../../requirements.txt + +# COMMAND ---------- + +dbutils.library.restartPython() + +# COMMAND ---------- + +import sys +import os +notebook_path = '/Workspace/' + os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) +%cd $notebook_path +%cd .. +sys.path.append("../..") + +# COMMAND ---------- + +# DBTITLE 1,Define input and output variables +{{- if (eq .input_include_models_in_unity_catalog "no") }} +from utils import get_deployed_model_stage_for_env{{end}} + +env = dbutils.widgets.get("env") +input_table_name = dbutils.widgets.get("input_table_name") +output_table_name = dbutils.widgets.get("output_table_name") +model_name = dbutils.widgets.get("model_name") +assert input_table_name != "", "input_table_name notebook parameter must be specified" +assert output_table_name != "", "output_table_name notebook parameter must be specified" +assert model_name != "", "model_name notebook parameter must be specified" +{{- if (eq .input_include_models_in_unity_catalog "no") }} +stage = get_deployed_model_stage_for_env(env) +model_uri = f"models:/{model_name}/{stage}"{{else}} +alias = "champion" +model_uri = f"models:/{model_name}@{alias}"{{end}} + +# COMMAND ---------- + +from mlflow import MlflowClient +{{ if (eq .input_include_models_in_unity_catalog "no") }} +# Get model version from stage +model_version_infos = MlflowClient().search_model_versions("name = '%s'" % model_name) +model_version = max( + int(version.version) + for version in model_version_infos + if version.current_stage == stage +){{else}} +# Get model version from alias +client = MlflowClient(registry_uri="databricks-uc") +model_version = client.get_model_version_by_alias(model_name, alias).version{{end}} + +# COMMAND ---------- + +# Get datetime +from datetime import datetime + +ts = datetime.now().strftime("%Y-%m-%d %H:%M:%S") + +# COMMAND ---------- +# DBTITLE 1,Load model and run inference + +from predict import predict_batch + +predict_batch(spark, model_uri, input_table_name, output_table_name, model_version, ts) +dbutils.notebook.exit(output_table_name) diff --git a/acceptance/bundle/deploy/mlops-stacks/template/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/deployment/model_deployment/notebooks/ModelDeployment.py.tmpl b/acceptance/bundle/deploy/mlops-stacks/template/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/deployment/model_deployment/notebooks/ModelDeployment.py.tmpl new file mode 100644 index 00000000000..64f526ce0bc --- /dev/null +++ b/acceptance/bundle/deploy/mlops-stacks/template/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/deployment/model_deployment/notebooks/ModelDeployment.py.tmpl @@ -0,0 +1,52 @@ +# Databricks notebook source +################################################################################## +# Helper notebook to transition the model stage. This notebook is run +# after the Train.py notebook as part of a multi-task job, in order to transition model +# to target stage after training completes. +# +# Note that we deploy the model to the stage in MLflow Model Registry equivalent to the +# environment in which the multi-task job is executed (e.g deploy the trained model to +# stage=Production if triggered in the prod environment). In a practical setting, we would +# recommend enabling the model validation step between model training and automatically +# registering the model to the Production stage in prod. +# +# This notebook has the following parameters: +# +# * env (required) - String name of the current environment for model deployment, which decides the target stage. +# * model_uri (required) - URI of the model to deploy. Must be in the format "models://", as described in +# https://www.mlflow.org/docs/latest/model-registry.html#fetching-an-mlflow-model-from-the-model-registry +# This parameter is read as a task value +# ({{ template `generate_doc_link` (map (pair "cloud" .input_cloud) (pair "path" "dev-tools/databricks-utils.html")) }}), +# rather than as a notebook widget. That is, we assume a preceding task (the Train.py +# notebook) has set a task value with key "model_uri". +################################################################################## + +# List of input args needed to run the notebook as a job. +# Provide them via DB widgets or notebook arguments. +# +# Name of the current environment +dbutils.widgets.dropdown("env", "None", ["None", "staging", "prod"], "Environment Name") + +# COMMAND ---------- + +import os +import sys +notebook_path = '/Workspace/' + os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) +%cd $notebook_path +%cd .. +sys.path.append("../..") + +# COMMAND ---------- + +from deploy import deploy + +model_uri = dbutils.jobs.taskValues.get("Train", "model_uri", debugValue="") +env = dbutils.widgets.get("env") +assert env != "None", "env notebook parameter must be specified" +assert model_uri != "", "model_uri notebook parameter must be specified" +deploy(model_uri, env) + +# COMMAND ---------- +print( + f"Successfully completed model deployment for {model_uri}" +) diff --git a/acceptance/bundle/deploy/mlops-stacks/template/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/batch-inference-workflow-resource.yml.tmpl b/acceptance/bundle/deploy/mlops-stacks/template/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/batch-inference-workflow-resource.yml.tmpl new file mode 100644 index 00000000000..e11b630656f --- /dev/null +++ b/acceptance/bundle/deploy/mlops-stacks/template/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/batch-inference-workflow-resource.yml.tmpl @@ -0,0 +1,46 @@ +new_cluster: &new_cluster + new_cluster: + num_workers: 3 + spark_version: 15.3.x-cpu-ml-scala2.12 + node_type_id: {{template `cloud_specific_node_type_id` .}} + data_security_mode: "SINGLE_USER" + custom_tags: + clusterSource: mlops-stacks_{{template `stacks_version` .}} + +common_permissions: &permissions + permissions: + - level: CAN_VIEW + group_name: users + +resources: + jobs: + batch_inference_job: + name: ${bundle.target}-{{ .input_project_name }}-batch-inference-job + tasks: + - task_key: batch_inference_job + <<: *new_cluster + notebook_task: + notebook_path: ../deployment/batch_inference/notebooks/BatchInference.py + base_parameters: + env: ${bundle.target} + {{ if (eq .input_include_feature_store `yes`) }}{{ if (eq .input_include_models_in_unity_catalog `yes`) }}input_table_name: ${bundle.target}.{{ .input_schema_name }}.feature_store_inference_input # TODO: create input table for inference + {{- else -}}input_table_name: hive_metastore.default.taxi_scoring_sample_feature_store_inference_input{{ end }} + {{- else -}}input_table_name: taxi_scoring_sample # TODO: create input table for inference{{ end }} + {{ if (eq .input_include_models_in_unity_catalog `no`) }}output_table_name: ${bundle.target}_{{template `project_name_alphanumeric_underscore` .}}_predictions + {{- else -}}output_table_name: ${var.catalog_name}.{{ .input_schema_name }}.predictions{{ end }} + {{ if (eq .input_include_models_in_unity_catalog `no`) }}model_name: ${var.model_name} + {{- else -}}model_name: ${var.catalog_name}.{{ .input_schema_name }}.${var.model_name}{{ end }} + # git source information of current ML resource deployment. It will be persisted as part of the workflow run + git_source_info: url:${bundle.git.origin_url}; branch:${bundle.git.branch}; commit:${bundle.git.commit} + + schedule: + quartz_cron_expression: "0 0 11 * * ?" # daily at 11am + timezone_id: UTC + <<: *permissions + # If you want to turn on notifications for this job, please uncomment the below code, + # and provide a list of emails to the on_failure argument. + # + # email_notifications: + # on_failure: + # - first@company.com + # - second@company.com diff --git a/acceptance/bundle/deploy/mlops-stacks/template/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/ml-artifacts-resource.yml.tmpl b/acceptance/bundle/deploy/mlops-stacks/template/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/ml-artifacts-resource.yml.tmpl new file mode 100644 index 00000000000..64ceba20123 --- /dev/null +++ b/acceptance/bundle/deploy/mlops-stacks/template/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/ml-artifacts-resource.yml.tmpl @@ -0,0 +1,34 @@ +# Allow users to read the experiment {{ if (eq .input_include_models_in_unity_catalog `no`) }}and the model{{end}} +common_permissions: &permissions + permissions: + - level: CAN_READ + group_name: users + +{{ if (eq .input_include_models_in_unity_catalog `yes`) }}# Allow users to execute models in Unity Catalog +grants: &grants + grants: + - privileges: + - EXECUTE + principal: {{ .input_unity_catalog_read_user_group }}{{end}} + +# Defines model and experiments +resources: + {{ if (eq .input_include_models_in_unity_catalog `no`) }}models: + model: + name: ${var.model_name} + description: MLflow registered model for the "{{ .input_project_name }}" ML Project for ${bundle.target} deployment target. + <<: *permissions + {{- else -}} + registered_models: + model: + name: ${var.model_name} + catalog_name: ${var.catalog_name} + schema_name: {{ .input_schema_name }} + comment: Registered model in Unity Catalog for the "{{ .input_project_name }}" ML Project for ${bundle.target} deployment target. + <<: *grants{{end}} + + experiments: + experiment: + name: ${var.experiment_name} + <<: *permissions + description: MLflow Experiment used to track runs for {{ .input_project_name }} project. diff --git a/acceptance/bundle/deploy/mlops-stacks/template/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/model-workflow-resource.yml.tmpl b/acceptance/bundle/deploy/mlops-stacks/template/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/model-workflow-resource.yml.tmpl new file mode 100644 index 00000000000..3c063b759a1 --- /dev/null +++ b/acceptance/bundle/deploy/mlops-stacks/template/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/resources/model-workflow-resource.yml.tmpl @@ -0,0 +1,123 @@ +new_cluster: &new_cluster + new_cluster: + num_workers: 3 + spark_version: 15.3.x-cpu-ml-scala2.12 + node_type_id: {{template `cloud_specific_node_type_id` .}} + data_security_mode: "SINGLE_USER" + custom_tags: + clusterSource: mlops-stacks_{{template `stacks_version` .}} + +common_permissions: &permissions + permissions: + - level: CAN_VIEW + group_name: users + +resources: + jobs: + model_training_job: + name: ${bundle.target}-{{ .input_project_name }}-model-training-job + job_clusters: + - job_cluster_key: model_training_job_cluster + <<: *new_cluster + tasks: + - task_key: Train + job_cluster_key: model_training_job_cluster + {{ if and (eq .input_include_feature_store `no`) (eq .input_include_mlflow_recipes `no`) }}notebook_task: + notebook_path: ../training/notebooks/Train.py + base_parameters: + env: ${bundle.target} + # TODO: Update training_data_path + training_data_path: /databricks-datasets/nyctaxi-with-zipcodes/subsampled + experiment_name: ${var.experiment_name} + {{ if (eq .input_include_models_in_unity_catalog `no`) }}model_name: ${var.model_name} + {{- else -}}model_name: ${var.catalog_name}.{{ .input_schema_name }}.${var.model_name}{{ end }} + # git source information of current ML resource deployment. It will be persisted as part of the workflow run + git_source_info: url:${bundle.git.origin_url}; branch:${bundle.git.branch}; commit:${bundle.git.commit} + {{ else if (eq .input_include_feature_store `yes`) }}notebook_task: + notebook_path: ../training/notebooks/TrainWithFeatureStore.py + base_parameters: + env: ${bundle.target} + # TODO: Update training_data_path + training_data_path: /databricks-datasets/nyctaxi-with-zipcodes/subsampled + experiment_name: ${var.experiment_name} + {{ if (eq .input_include_models_in_unity_catalog `no`) }}model_name: ${var.model_name} + {{- else -}}model_name: ${var.catalog_name}.{{ .input_schema_name }}.${var.model_name}{{ end }} + {{ if (eq .input_include_models_in_unity_catalog `no`) }}pickup_features_table: feature_store_taxi_example.${bundle.target}_{{template `project_name_alphanumeric_underscore` .}}_trip_pickup_features + {{- else -}}pickup_features_table: ${var.catalog_name}.{{ .input_schema_name }}.trip_pickup_features{{ end }} + {{ if (eq .input_include_models_in_unity_catalog `no`) }}dropoff_features_table: feature_store_taxi_example.${bundle.target}_{{template `project_name_alphanumeric_underscore` .}}_trip_dropoff_features + {{- else -}}dropoff_features_table: ${var.catalog_name}.{{ .input_schema_name }}.trip_dropoff_features{{ end }} + # git source information of current ML resource deployment. It will be persisted as part of the workflow run + git_source_info: url:${bundle.git.origin_url}; branch:${bundle.git.branch}; commit:${bundle.git.commit} + {{- else -}}notebook_task: + notebook_path: ../training/notebooks/TrainWithMLflowRecipes.py + base_parameters: + env: ${bundle.target} + # git source information of current ML resource deployment. It will be persisted as part of the workflow run + git_source_info: url:${bundle.git.origin_url}; branch:${bundle.git.branch}; commit:${bundle.git.commit}{{ end }} + - task_key: ModelValidation + job_cluster_key: model_training_job_cluster + depends_on: + - task_key: Train + notebook_task: + notebook_path: ../validation/notebooks/ModelValidation.py + base_parameters: + {{- if (eq .input_include_mlflow_recipes `yes`) }} + env: ${bundle.target}{{ end }} + experiment_name: ${var.experiment_name} + # The `run_mode` defines whether model validation is enabled or not. + # It can be one of the three values: + # `disabled` : Do not run the model validation notebook. + # `dry_run` : Run the model validation notebook. Ignore failed model validation rules and proceed to move + # model to Production stage. + # `enabled` : Run the model validation notebook. Move model to Production stage only if all model validation + # rules are passing. + # TODO: update run_mode + run_mode: dry_run + # Whether to load the current registered "Production" stage model as baseline. + # Baseline model is a requirement for relative change and absolute change validation thresholds. + # TODO: update enable_baseline_comparison + enable_baseline_comparison: "false" + # Please refer to data parameter in mlflow.evaluate documentation https://mlflow.org/docs/latest/python_api/mlflow.html#mlflow.evaluate + # TODO: update validation_input + validation_input: SELECT * FROM delta.`dbfs:/databricks-datasets/nyctaxi-with-zipcodes/subsampled` + {{- if (eq .input_include_mlflow_recipes `no`) }} + # A string describing the model type. The model type can be either "regressor" and "classifier". + # Please refer to model_type parameter in mlflow.evaluate documentation https://mlflow.org/docs/latest/python_api/mlflow.html#mlflow.evaluate + # TODO: update model_type + model_type: regressor + # The string name of a column from data that contains evaluation labels. + # Please refer to targets parameter in mlflow.evaluate documentation https://mlflow.org/docs/latest/python_api/mlflow.html#mlflow.evaluate + # TODO: targets + targets: fare_amount{{ end }} + # Specifies the name of the function in {{ .input_project_name }}/training_validation_deployment/validation/validation.py that returns custom metrics. + # TODO(optional): custom_metrics_loader_function + custom_metrics_loader_function: custom_metrics + # Specifies the name of the function in {{ .input_project_name }}/training_validation_deployment/validation/validation.py that returns model validation thresholds. + # TODO(optional): validation_thresholds_loader_function + validation_thresholds_loader_function: validation_thresholds + # Specifies the name of the function in {{ .input_project_name }}/training_validation_deployment/validation/validation.py that returns evaluator_config. + # TODO(optional): evaluator_config_loader_function + evaluator_config_loader_function: evaluator_config + # git source information of current ML resource deployment. It will be persisted as part of the workflow run + git_source_info: url:${bundle.git.origin_url}; branch:${bundle.git.branch}; commit:${bundle.git.commit} + - task_key: ModelDeployment + job_cluster_key: model_training_job_cluster + depends_on: + - task_key: ModelValidation + notebook_task: + notebook_path: ../deployment/model_deployment/notebooks/ModelDeployment.py + base_parameters: + env: ${bundle.target} + # git source information of current ML resource deployment. It will be persisted as part of the workflow run + git_source_info: url:${bundle.git.origin_url}; branch:${bundle.git.branch}; commit:${bundle.git.commit} + schedule: + quartz_cron_expression: "0 0 9 * * ?" # daily at 9am + timezone_id: UTC + <<: *permissions + # If you want to turn on notifications for this job, please uncomment the below code, + # and provide a list of emails to the on_failure argument. + # + # email_notifications: + # on_failure: + # - first@company.com + # - second@company.com diff --git a/acceptance/bundle/deploy/mlops-stacks/template/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/training/notebooks/Train.py.tmpl b/acceptance/bundle/deploy/mlops-stacks/template/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/training/notebooks/Train.py.tmpl new file mode 100644 index 00000000000..eb9091579e5 --- /dev/null +++ b/acceptance/bundle/deploy/mlops-stacks/template/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/training/notebooks/Train.py.tmpl @@ -0,0 +1,176 @@ +# Databricks notebook source +################################################################################## +# Model Training Notebook +# +# This notebook shows an example of a Model Training pipeline using Delta tables. +# It is configured and can be executed as the "Train" task in the model_training_job workflow defined under +# ``{{template `project_name_alphanumeric_underscore` .}}/resources/model-workflow-resource.yml`` +# +# Parameters: +# * env (required): - Environment the notebook is run in (staging, or prod). Defaults to "staging". +# * training_data_path (required) - Path to the training data. +# * experiment_name (required) - MLflow experiment name for the training runs. Will be created if it doesn't exist. +{{- if (eq .input_include_models_in_unity_catalog "no") }} +# * model_name (required) - MLflow registered model name to use for the trained model. Will be created if it +# * doesn't exist. +{{else}} +# * model_name (required) - Three-level name (..) to register the trained model in Unity Catalog. +# +{{end -}} +################################################################################## + +# COMMAND ---------- + +# MAGIC %load_ext autoreload +# MAGIC %autoreload 2 + +# COMMAND ---------- + +import os +notebook_path = '/Workspace/' + os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) +%cd $notebook_path + +# COMMAND ---------- + +# MAGIC %pip install -r ../../requirements.txt + +# COMMAND ---------- + +dbutils.library.restartPython() + +# COMMAND ---------- + +# DBTITLE 1, Notebook arguments +# List of input args needed to run this notebook as a job. +# Provide them via DB widgets or notebook arguments. + +# Notebook Environment +dbutils.widgets.dropdown("env", "staging", ["staging", "prod"], "Environment Name") +env = dbutils.widgets.get("env") + +# Path to the Hive-registered Delta table containing the training data. +dbutils.widgets.text( + "training_data_path", + "/databricks-datasets/nyctaxi-with-zipcodes/subsampled", + label="Path to the training data", +) + +# MLflow experiment name. +dbutils.widgets.text( + "experiment_name", + f"/dev-{{template `experiment_base_name` .}}", + label="MLflow experiment name", +) + +{{- if (eq .input_include_models_in_unity_catalog "no") }} +# MLflow registered model name to use for the trained mode. +dbutils.widgets.text( + "model_name", "dev-{{template `model_name` .}}", label="Model Name" +) + +{{else}} +# Unity Catalog registered model name to use for the trained model. +dbutils.widgets.text( + "model_name", "dev.{{ .input_schema_name }}.{{template `model_name` .}}", label="Full (Three-Level) Model Name" +) + +{{end -}} + +# COMMAND ---------- + +# DBTITLE 1,Define input and output variables +input_table_path = dbutils.widgets.get("training_data_path") +experiment_name = dbutils.widgets.get("experiment_name") +model_name = dbutils.widgets.get("model_name") + +# COMMAND ---------- + +# DBTITLE 1, Set experiment +import mlflow + +mlflow.set_experiment(experiment_name) + +{{- if (eq .input_include_models_in_unity_catalog "yes") }} +mlflow.set_registry_uri('databricks-uc') + +{{end -}} + +# COMMAND ---------- + +# DBTITLE 1, Load raw data +training_df = spark.read.format("delta").load(input_table_path) +training_df.display() + +# COMMAND ---------- + +# DBTITLE 1, Helper function +from mlflow.tracking import MlflowClient +import mlflow.pyfunc + + +def get_latest_model_version(model_name): + latest_version = 1 + mlflow_client = MlflowClient() + for mv in mlflow_client.search_model_versions(f"name='{model_name}'"): + version_int = int(mv.version) + if version_int > latest_version: + latest_version = version_int + return latest_version + + +# COMMAND ---------- + +# MAGIC %md +# MAGIC Train a LightGBM model on the data, then log and register the model with MLflow. + +# COMMAND ---------- + +# DBTITLE 1, Train model +import mlflow +from sklearn.model_selection import train_test_split +import lightgbm as lgb +import mlflow.lightgbm + +# Collect data into a Pandas array for training. Since the timestamp columns would likely +# cause the model to overfit the data, exclude them to avoid training on them. +columns = [col for col in training_df.columns if col not in ['tpep_pickup_datetime', 'tpep_dropoff_datetime']] +data = training_df.toPandas()[columns] + +train, test = train_test_split(data, random_state=123) +X_train = train.drop(["fare_amount"], axis=1) +X_test = test.drop(["fare_amount"], axis=1) +y_train = train.fare_amount +y_test = test.fare_amount + +mlflow.lightgbm.autolog() +train_lgb_dataset = lgb.Dataset(X_train, label=y_train.values) +test_lgb_dataset = lgb.Dataset(X_test, label=y_test.values) + +param = {"num_leaves": 32, "objective": "regression", "metric": "rmse"} +num_rounds = 100 + +# Train a lightGBM model +model = lgb.train(param, train_lgb_dataset, num_rounds) + +# COMMAND ---------- + +# DBTITLE 1, Log model and return output. +# Take the first row of the training dataset as the model input example. +input_example = X_train.iloc[[0]] + +# Log the trained model with MLflow +mlflow.lightgbm.log_model( + model, + artifact_path="lgb_model", + # The signature is automatically inferred from the input example and its predicted output. + input_example=input_example, + registered_model_name=model_name +) + +# The returned model URI is needed by the model deployment notebook. +model_version = get_latest_model_version(model_name) +model_uri = f"models:/{model_name}/{model_version}" +dbutils.jobs.taskValues.set("model_uri", model_uri) +dbutils.jobs.taskValues.set("model_name", model_name) +dbutils.jobs.taskValues.set("model_version", model_version) +dbutils.notebook.exit(model_uri) diff --git a/acceptance/bundle/deploy/mlops-stacks/template/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/validation/notebooks/ModelValidation.py.tmpl b/acceptance/bundle/deploy/mlops-stacks/template/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/validation/notebooks/ModelValidation.py.tmpl new file mode 100644 index 00000000000..c2852616c65 --- /dev/null +++ b/acceptance/bundle/deploy/mlops-stacks/template/template/{{.input_root_dir}}/{{template `project_name_alphanumeric_underscore` .}}/validation/notebooks/ModelValidation.py.tmpl @@ -0,0 +1,405 @@ +# Databricks notebook source +################################################################################## +# Model Validation Notebook +## +# This notebook uses mlflow model validation API to run mode validation after training and registering a model +# in model registry, before deploying it to the {{- if (eq .input_include_models_in_unity_catalog "no") }}"Production" stage{{else}} "champion" alias{{end -}}. +# +# It runs as part of CD and by an automated model training job -> validation -> deployment job defined under ``{{template `project_name_alphanumeric_underscore` .}}/resources/model-workflow-resource.yml`` +# +# +# Parameters: +# +# * env - Name of the environment the notebook is run in (staging, or prod). Defaults to "prod". +# * `run_mode` - The `run_mode` defines whether model validation is enabled or not. It can be one of the three values: +# * `disabled` : Do not run the model validation notebook. +# * `dry_run` : Run the model validation notebook. Ignore failed model validation rules and proceed to move +# model to the {{- if (eq .input_include_models_in_unity_catalog "no") }}"Production" stage{{else}} "champion" alias{{end -}}. +# * `enabled` : Run the model validation notebook. Move model to the {{- if (eq .input_include_models_in_unity_catalog "no") }} "Production" stage {{else}} "champion" alias {{end -}} only if all model validation +# rules are passing. +{{- if (eq .input_include_models_in_unity_catalog "no") }} +# * enable_baseline_comparison - Whether to load the current registered "Production" stage model as baseline. +{{else}} +# * enable_baseline_comparison - Whether to load the current registered "champion" model as baseline. +{{end -}} +# Baseline model is a requirement for relative change and absolute change validation thresholds. +# * validation_input - Validation input. Please refer to data parameter in mlflow.evaluate documentation https://mlflow.org/docs/latest/python_api/mlflow.html#mlflow.evaluate +{{ if (eq .input_include_mlflow_recipes `no`) }}# * model_type - A string describing the model type. The model type can be either "regressor" and "classifier". +# Please refer to model_type parameter in mlflow.evaluate documentation https://mlflow.org/docs/latest/python_api/mlflow.html#mlflow.evaluate +# * targets - The string name of a column from data that contains evaluation labels. +# Please refer to targets parameter in mlflow.evaluate documentation https://mlflow.org/docs/latest/python_api/mlflow.html#mlflow.evaluate{{ end }} +# * custom_metrics_loader_function - Specifies the name of the function in {{ .input_project_name }}/validation/validation.py that returns custom metrics. +# * validation_thresholds_loader_function - Specifies the name of the function in {{ .input_project_name }}/validation/validation.py that returns model validation thresholds. +# +# For details on mlflow evaluate API, see doc https://mlflow.org/docs/latest/python_api/mlflow.html#mlflow.evaluate +# For details and examples about performing model validation, see the Model Validation documentation https://mlflow.org/docs/latest/models.html#model-validation +# +################################################################################## + +# COMMAND ---------- + +# MAGIC %load_ext autoreload +# MAGIC %autoreload 2 + +# COMMAND ---------- + +import os +notebook_path = '/Workspace/' + os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) +%cd $notebook_path + +# COMMAND ---------- + +# MAGIC %pip install -r ../../requirements.txt + +# COMMAND ---------- + +dbutils.library.restartPython() + +# COMMAND ---------- + +import os +notebook_path = '/Workspace/' + os.path.dirname(dbutils.notebook.entry_point.getDbutils().notebook().getContext().notebookPath().get()) +%cd $notebook_path +%cd ../ + +# COMMAND ---------- + +{{ if (eq .input_include_mlflow_recipes `yes`) }}dbutils.widgets.dropdown( + "env", "prod", ["staging", "prod"], "Environment(for input data)" +){{ end -}} +dbutils.widgets.text( + "experiment_name", + "/dev-{{template `experiment_base_name` .}}", + "Experiment Name", +) +dbutils.widgets.dropdown("run_mode", "disabled", ["disabled", "dry_run", "enabled"], "Run Mode") +dbutils.widgets.dropdown("enable_baseline_comparison", "false", ["true", "false"], "Enable Baseline Comparison") +dbutils.widgets.text("validation_input", "SELECT * FROM delta.`dbfs:/databricks-datasets/nyctaxi-with-zipcodes/subsampled`", "Validation Input") +{{ if (eq .input_include_mlflow_recipes `no`) }} +dbutils.widgets.text("model_type", "regressor", "Model Type") +dbutils.widgets.text("targets", "fare_amount", "Targets"){{ end }} +dbutils.widgets.text("custom_metrics_loader_function", "custom_metrics", "Custom Metrics Loader Function") +dbutils.widgets.text("validation_thresholds_loader_function", "validation_thresholds", "Validation Thresholds Loader Function") +dbutils.widgets.text("evaluator_config_loader_function", "evaluator_config", "Evaluator Config Loader Function") +{{- if (eq .input_include_models_in_unity_catalog "no") }} +dbutils.widgets.text("model_name", "dev-{{template `model_name` .}}", "Model Name") +{{else}} +dbutils.widgets.text("model_name", "dev.{{ .input_schema_name }}.{{template `model_name` .}}", "Full (Three-Level) Model Name") +{{end -}} +dbutils.widgets.text("model_version", "", "Candidate Model Version") + +# COMMAND ---------- +run_mode = dbutils.widgets.get("run_mode").lower() +assert run_mode == "disabled" or run_mode == "dry_run" or run_mode == "enabled" + +if run_mode == "disabled": + print( + "Model validation is in DISABLED mode. Exit model validation without blocking model deployment." + ) + dbutils.notebook.exit(0) +dry_run = run_mode == "dry_run" + +if dry_run: + print( + "Model validation is in DRY_RUN mode. Validation threshold validation failures will not block model deployment." + ) +else: + print( + "Model validation is in ENABLED mode. Validation threshold validation failures will block model deployment." + ) + +# COMMAND ---------- + +import importlib +import mlflow +import os +import tempfile +import traceback +{{ if (eq .input_include_mlflow_recipes `yes`) }}from mlflow.recipes.utils import ( + get_recipe_config, + get_recipe_name, + get_recipe_root_path, +){{ end }} +from mlflow.tracking.client import MlflowClient +{{ if (eq .input_include_models_in_unity_catalog "no") }} +client = MlflowClient() +{{else}} +client = MlflowClient(registry_uri="databricks-uc") +mlflow.set_registry_uri('databricks-uc') +{{ end }} +# set experiment +experiment_name = dbutils.widgets.get("experiment_name") +mlflow.set_experiment(experiment_name) +{{ if (eq .input_include_mlflow_recipes `yes`) }}env = dbutils.widgets.get("env") +assert env, "env notebook parameter must be specified" + +def get_model_type_from_recipe(): + try: + recipe_config = get_recipe_config("../training", f"databricks-{env}") + problem_type = recipe_config.get("recipe").split("/")[0] + if problem_type.lower() == "regression": + return "regressor" + elif problem_type.lower() == "classification": + return "classifier" + else: + raise Exception(f"Unsupported recipe {recipe_config}") + except Exception as ex: + print(f"Not able to get model type from mlflow recipe databricks-{env}.") + raise ex + +def get_targets_from_recipe(): + try: + recipe_config = get_recipe_config("../training", f"databricks-{env}") + return recipe_config.get("target_col") + except Exception as ex: + print(f"Not able to get targets from mlflow recipe databricks-{env}.") + raise ex{{ end }} +# set model evaluation parameters that can be inferred from the job +model_uri = dbutils.jobs.taskValues.get("Train", "model_uri", debugValue="") +model_name = dbutils.jobs.taskValues.get("Train", "model_name", debugValue="") +model_version = dbutils.jobs.taskValues.get("Train", "model_version", debugValue="") + +if model_uri == "": + model_name = dbutils.widgets.get("model_name") + model_version = dbutils.widgets.get("model_version") + model_uri = "models:/" + model_name + "/" + model_version +{{ if (eq .input_include_models_in_unity_catalog "no") }} +baseline_model_uri = "models:/" + model_name + "/Production" +{{else}} +baseline_model_uri = "models:/" + model_name + "@champion" +{{ end }} +evaluators = "default" +assert model_uri != "", "model_uri notebook parameter must be specified" +assert model_name != "", "model_name notebook parameter must be specified" +assert model_version != "", "model_version notebook parameter must be specified" + +# COMMAND ---------- + +# take input +enable_baseline_comparison = dbutils.widgets.get("enable_baseline_comparison") + +{{ if (eq .input_include_feature_store `yes`) }} +enable_baseline_comparison = "false" +print( + "Currently baseline model comparison is not supported for models registered with feature store. Please refer to " + "issue https://github.com/databricks/mlops-stacks/issues/70 for more details." +) +{{ end }} +assert enable_baseline_comparison == "true" or enable_baseline_comparison == "false" +enable_baseline_comparison = enable_baseline_comparison == "true" + +validation_input = dbutils.widgets.get("validation_input") +assert validation_input +data = spark.sql(validation_input) + +{{ if (eq .input_include_mlflow_recipes `no`) }}model_type = dbutils.widgets.get("model_type") +targets = dbutils.widgets.get("targets") +{{ else }}model_type = get_model_type_from_recipe() +targets = get_targets_from_recipe(){{ end }} +assert model_type +assert targets + +custom_metrics_loader_function_name = dbutils.widgets.get("custom_metrics_loader_function") +validation_thresholds_loader_function_name = dbutils.widgets.get("validation_thresholds_loader_function") +evaluator_config_loader_function_name = dbutils.widgets.get("evaluator_config_loader_function") +assert custom_metrics_loader_function_name +assert validation_thresholds_loader_function_name +assert evaluator_config_loader_function_name +custom_metrics_loader_function = getattr( + importlib.import_module("validation"), custom_metrics_loader_function_name +) +validation_thresholds_loader_function = getattr( + importlib.import_module("validation"), validation_thresholds_loader_function_name +) +evaluator_config_loader_function = getattr( + importlib.import_module("validation"), evaluator_config_loader_function_name +) +custom_metrics = custom_metrics_loader_function() +validation_thresholds = validation_thresholds_loader_function() +evaluator_config = evaluator_config_loader_function() + +# COMMAND ---------- + +# helper methods +def get_run_link(run_info): + return "[Run](#mlflow/experiments/{0}/runs/{1})".format( + run_info.experiment_id, run_info.run_id + ) + + +def get_training_run(model_name, model_version): + version = client.get_model_version(model_name, model_version) + return mlflow.get_run(run_id=version.run_id) + + +def generate_run_name(training_run): + return None if not training_run else training_run.info.run_name + "-validation" + + +def generate_description(training_run): + return ( + None + if not training_run + else "Model Training Details: {0}\n".format(get_run_link(training_run.info)) + ) + + +def log_to_model_description(run, success): + run_link = get_run_link(run.info) + description = client.get_model_version(model_name, model_version).description + status = "SUCCESS" if success else "FAILURE" + if description != "": + description += "\n\n---\n\n" + description += "Model Validation Status: {0}\nValidation Details: {1}".format( + status, run_link + ) + client.update_model_version( + name=model_name, version=model_version, description=description + ) + +{{ if (eq .input_include_feature_store `yes`) }} + +from datetime import timedelta, timezone +import math +import pyspark.sql.functions as F +from pyspark.sql.types import IntegerType + + +def rounded_unix_timestamp(dt, num_minutes=15): + """ + Ceilings datetime dt to interval num_minutes, then returns the unix timestamp. + """ + nsecs = dt.minute * 60 + dt.second + dt.microsecond * 1e-6 + delta = math.ceil(nsecs / (60 * num_minutes)) * (60 * num_minutes) - nsecs + return int((dt + timedelta(seconds=delta)).replace(tzinfo=timezone.utc).timestamp()) + + +rounded_unix_timestamp_udf = F.udf(rounded_unix_timestamp, IntegerType()) + + +def rounded_taxi_data(taxi_data_df): + # Round the taxi data timestamp to 15 and 30 minute intervals so we can join with the pickup and dropoff features + # respectively. + taxi_data_df = ( + taxi_data_df.withColumn( + "rounded_pickup_datetime", + F.to_timestamp( + rounded_unix_timestamp_udf( + taxi_data_df["tpep_pickup_datetime"], F.lit(15) + ) + ), + ) + .withColumn( + "rounded_dropoff_datetime", + F.to_timestamp( + rounded_unix_timestamp_udf( + taxi_data_df["tpep_dropoff_datetime"], F.lit(30) + ) + ), + ) + .drop("tpep_pickup_datetime") + .drop("tpep_dropoff_datetime") + ) + taxi_data_df.createOrReplaceTempView("taxi_data") + return taxi_data_df + + +data = rounded_taxi_data(data) + + +{{ end }} + +# COMMAND ---------- + +{{ if (eq .input_include_feature_store `yes`) }} +# Temporary fix as FS model can't predict as a pyfunc model +# MLflow evaluate can take a lambda function instead of a model uri for a model +# but id does not work for the baseline model as it requires a model_uri (baseline comparison is set to false) + +from databricks.feature_store import FeatureStoreClient + +def get_fs_model(df): + fs_client = FeatureStoreClient() + return ( + fs_client.score_batch(model_uri, spark.createDataFrame(df)) + .select("prediction") + .toPandas() + ) +{{ end }} + +training_run = get_training_run(model_name, model_version) + +# run evaluate +with mlflow.start_run( + run_name=generate_run_name(training_run), + description=generate_description(training_run), +) as run, tempfile.TemporaryDirectory() as tmp_dir: + validation_thresholds_file = os.path.join(tmp_dir, "validation_thresholds.txt") + with open(validation_thresholds_file, "w") as f: + if validation_thresholds: + for metric_name in validation_thresholds: + f.write( + "{0:30} {1}\n".format( + metric_name, str(validation_thresholds[metric_name]) + ) + ) + mlflow.log_artifact(validation_thresholds_file) + + try: + eval_result = mlflow.evaluate( + {{ if (eq .input_include_feature_store `yes`) }} + model=get_fs_model, + {{ else }} + model=model_uri, + {{ end }} + data=data, + targets=targets, + model_type=model_type, + evaluators=evaluators, + validation_thresholds=validation_thresholds, + custom_metrics=custom_metrics, + baseline_model=None + if not enable_baseline_comparison + else baseline_model_uri, + evaluator_config=evaluator_config, + ) + metrics_file = os.path.join(tmp_dir, "metrics.txt") + with open(metrics_file, "w") as f: + f.write( + "{0:30} {1:30} {2}\n".format("metric_name", "candidate", "baseline") + ) + for metric in eval_result.metrics: + candidate_metric_value = str(eval_result.metrics[metric]) + baseline_metric_value = "N/A" + if metric in eval_result.baseline_model_metrics: + mlflow.log_metric( + "baseline_" + metric, eval_result.baseline_model_metrics[metric] + ) + baseline_metric_value = str( + eval_result.baseline_model_metrics[metric] + ) + f.write( + "{0:30} {1:30} {2}\n".format( + metric, candidate_metric_value, baseline_metric_value + ) + ) + mlflow.log_artifact(metrics_file) + log_to_model_description(run, True) + {{ if (eq .input_include_models_in_unity_catalog "yes") }} + # Assign "challenger" alias to indicate model version has passed validation checks + print("Validation checks passed. Assigning 'challenger' alias to model version.") + client.set_registered_model_alias(model_name, "challenger", model_version) + {{ end }} + except Exception as err: + log_to_model_description(run, False) + error_file = os.path.join(tmp_dir, "error.txt") + with open(error_file, "w") as f: + f.write("Validation failed : " + str(err) + "\n") + f.write(traceback.format_exc()) + mlflow.log_artifact(error_file) + if not dry_run: + raise err + else: + print( + "Model validation failed in DRY_RUN. It will not block model deployment." + ) diff --git a/acceptance/bundle/deploy/mlops-stacks/test.toml b/acceptance/bundle/deploy/mlops-stacks/test.toml index 98dbfa6435c..1987ccf6cf0 100644 --- a/acceptance/bundle/deploy/mlops-stacks/test.toml +++ b/acceptance/bundle/deploy/mlops-stacks/test.toml @@ -1,5 +1,5 @@ Cloud=true -Local=false +Local=true Badness = "the newly initialized bundle from the 'mlops-stacks' template contains two validation warnings in the configuration" @@ -10,3 +10,7 @@ Ignore = [ [[Repls]] Old = "aws|azure|gcp" New = "[CLOUD_ENV_BASE]" + +[[Repls]] +Old = "bundle init .*/mlops-stacks/template" +New = "bundle init mlops-stacks"