diff --git a/.github/workflows/code-coverage.yml b/.github/workflows/code-coverage.yml index 9f578ec9f..c5da8ab2d 100644 --- a/.github/workflows/code-coverage.yml +++ b/.github/workflows/code-coverage.yml @@ -11,6 +11,19 @@ on: pull_request: workflow_dispatch: +# Serialise E2E runs per ref so a force-push (or a fast follow-up commit) +# on a PR cancels the previous run instead of racing it against shared +# warehouse state (Delta tables, UC Volume files, etc.). +# +# Pushes to main are NOT cancelled — each merge commit needs its own clean +# CI signal so a regression on commit N doesn't get hidden by commit N+1 +# arriving seconds later. (Concurrent main runs can still collide on shared +# state, but that's the cost of preserving per-commit signal; the +# uuid-suffix conventions in the e2e tests are what keep them isolated.) +concurrency: + group: e2e-${{ github.workflow }}-${{ github.ref }} + cancel-in-progress: ${{ github.event_name == 'pull_request' }} + jobs: test-with-coverage: runs-on: diff --git a/tests/e2e/common/uc_volume_tests.py b/tests/e2e/common/uc_volume_tests.py index 5b4086f91..cf8797c63 100644 --- a/tests/e2e/common/uc_volume_tests.py +++ b/tests/e2e/common/uc_volume_tests.py @@ -1,5 +1,6 @@ import os import tempfile +from uuid import uuid4 import pytest import databricks.sql as sql @@ -40,12 +41,16 @@ def test_uc_volume_life_cycle(self, catalog, schema): with open(fh, "wb") as fp: fp.write(original_text) + # Unique per-run path so concurrent CI jobs sharing the same volume + # don't step on each other's PUT/GET/REMOVE. + volume_path = f"/Volumes/{catalog}/{schema}/e2etests/life_cycle_{uuid4().hex[:8]}.csv" + with self.connection( extra_params={"staging_allowed_local_path": temp_path} ) as conn: cursor = conn.cursor() - query = f"PUT '{temp_path}' INTO '/Volumes/{catalog}/{schema}/e2etests/file1.csv' OVERWRITE" + query = f"PUT '{temp_path}' INTO '{volume_path}' OVERWRITE" cursor.execute(query) # GET should succeed @@ -56,7 +61,7 @@ def test_uc_volume_life_cycle(self, catalog, schema): extra_params={"staging_allowed_local_path": new_temp_path} ) as conn: cursor = conn.cursor() - query = f"GET '/Volumes/{catalog}/{schema}/e2etests/file1.csv' TO '{new_temp_path}'" + query = f"GET '{volume_path}' TO '{new_temp_path}'" cursor.execute(query) with open(new_fh, "rb") as fp: @@ -66,7 +71,7 @@ def test_uc_volume_life_cycle(self, catalog, schema): # REMOVE should succeed - remove_query = f"REMOVE '/Volumes/{catalog}/{schema}/e2etests/file1.csv'" + remove_query = f"REMOVE '{volume_path}'" # Use minimal retry settings to fail fast extra_params = { @@ -84,7 +89,7 @@ def test_uc_volume_life_cycle(self, catalog, schema): Error, match="Staging operation over HTTP was unsuccessful: 404" ): cursor = conn.cursor() - query = f"GET '/Volumes/{catalog}/{schema}/e2etests/file1.csv' TO '{new_temp_path}'" + query = f"GET '{volume_path}' TO '{new_temp_path}'" cursor.execute(query) os.remove(temp_path) @@ -151,19 +156,22 @@ def test_uc_volume_put_fails_if_file_exists_and_overwrite_not_set( with open(fh, "wb") as fp: fp.write(original_text) + # Unique per-run path so a concurrent CI job's REMOVE doesn't delete + # our file between the two PUTs and silently turn the expected + # FILE_IN_STAGING_PATH_ALREADY_EXISTS into a successful PUT. + volume_path = f"/Volumes/{catalog}/{schema}/e2etests/put_conflict_{uuid4().hex[:8]}.csv" + def perform_put(): with self.connection( extra_params={"staging_allowed_local_path": temp_path} ) as conn: cursor = conn.cursor() - query = f"PUT '{temp_path}' INTO '/Volumes/{catalog}/{schema}/e2etests/file1.csv'" + query = f"PUT '{temp_path}' INTO '{volume_path}'" cursor.execute(query) def perform_remove(): try: - remove_query = ( - f"REMOVE '/Volumes/{catalog}/{schema}/e2etests/file1.csv'" - ) + remove_query = f"REMOVE '{volume_path}'" with self.connection( extra_params={"staging_allowed_local_path": "/"} diff --git a/tests/e2e/test_transactions.py b/tests/e2e/test_transactions.py index e91afc0dd..fd4fd73ab 100644 --- a/tests/e2e/test_transactions.py +++ b/tests/e2e/test_transactions.py @@ -35,10 +35,17 @@ def _unique_table_name(request): - """Derive a unique Delta table name from the test node id.""" + """Derive a unique Delta table name from the test node id. + + The uuid suffix keeps tables unique across concurrent CI jobs that + share the same warehouse/catalog — without it, two runs racing on + the same test name collide on CREATE/DROP. + """ node_id = request.node.name sanitized = re.sub(r"[^a-z0-9_]", "_", node_id.lower()) - return f"mst_pysql_{sanitized}"[:80] + suffix = uuid.uuid4().hex[:8] + # Reserve room for the 9-char "_{suffix}" tail so total stays <= 80. + return f"mst_pysql_{sanitized}"[:71] + f"_{suffix}" def _unique_table_name_raw(suffix):