Altinity · strtgbb · Apr 21, 2026 · Apr 12, 2026 · Apr 12, 2026 · Apr 12, 2026
diff --git a/.github/actions/create_workflow_report/create_workflow_report.py b/.github/actions/create_workflow_report/create_workflow_report.py
@@ -323,7 +323,7 @@ def get_checks_errors(client: Client, commit_sha: str, branch_name: str):
     query = f"""{_checks_latest_test_status_cte(commit_sha, branch_name)}
         SELECT job_status, job_name, status AS test_status, test_name, results_link
         FROM latest_test_status
-        WHERE job_status = 'error'
+        WHERE job_status = 'error' AND test_status NOT IN ('OK', 'SKIPPED')
         ORDER BY job_name, test_name
         """
     return query_dataframe_with_retry(client, query)

diff --git a/.github/workflows/grype_scan.yml b/.github/workflows/grype_scan.yml
@@ -76,6 +76,7 @@ jobs:
             ./.github/grype/run_grype_scan.sh $DOCKER_IMAGE
 
         - name: Parse grype results
+          id: parse_grype
           run: |
             python3 -u ./.github/grype/parse_vulnerabilities_grype.py -o nice --no-colors --log raw.log --test-to-end
 
@@ -138,7 +139,7 @@ jobs:
                 owner: context.repo.owner,
                 repo: context.repo.repo,
                 sha: '${{ github.event_name == 'pull_request' && github.event.pull_request.head.sha || github.sha }}',
-                state: hasError ? 'error' : hasVulnerabilities ? 'failure' : 'success',
+                state: hasError ? 'error' : '${{ steps.parse_grype.outcome == 'success' && 'success' || 'failure' }}',
                 target_url: '${{ steps.upload_results.outputs.https_report_path }}',
                 description: hasError ? 'An error occurred' : `Grype Scan Completed with ${totalHighCritical} high/critical vulnerabilities`,
                 context: 'Grype Scan ${{ steps.set_version.outputs.docker_image || inputs.docker_image }}'

diff --git a/.github/workflows/master.yml b/.github/workflows/master.yml
diff --git a/.github/workflows/pull_request.yml b/.github/workflows/pull_request.yml
diff --git a/ci/defs/job_configs.py b/ci/defs/job_configs.py
@@ -556,7 +556,7 @@ class JobConfigs:
                 runs_on=RunnerLabels.AMD_MEDIUM_CPU,
                 requires=[ArtifactNames.CH_AMD_ASAN],
             )
-            for total_batches in (2,)
+            for total_batches in (4,)
             for batch in range(1, total_batches + 1)
         ],
         Job.ParamSet(
@@ -634,7 +634,7 @@ class JobConfigs:
                 runs_on=RunnerLabels.FUNC_TESTER_AMD,
                 requires=[ArtifactNames.CH_AMD_MSAN],
             )
-            for total_batches in (2,)
+            for total_batches in (4,)
             for batch in range(1, total_batches + 1)
         ],
         *[
@@ -709,11 +709,15 @@ class JobConfigs:
     functional_tests_jobs_azure = common_ft_job_config.set_allow_merge_on_failure(
         True
     ).parametrize(
-        Job.ParamSet(
-            parameter="arm_asan, azure, parallel",
-            runs_on=RunnerLabels.FUNC_TESTER_ARM,
-            requires=[ArtifactNames.CH_ARM_ASAN],
-        ),
+        *[
+            Job.ParamSet(
+                parameter=f"arm_asan, azure, parallel, {batch}/{total_batches}",
+                runs_on=RunnerLabels.FUNC_TESTER_ARM,
+                requires=[ArtifactNames.CH_ARM_ASAN],
+            )
+            for total_batches in (4,)
+            for batch in range(1, total_batches + 1)
+        ],
         Job.ParamSet(
             parameter="arm_asan, azure, sequential",
             runs_on=RunnerLabels.FUNC_TESTER_ARM,

diff --git a/ci/jobs/functional_tests.py b/ci/jobs/functional_tests.py
@@ -478,11 +478,11 @@ def start():
                 #             "WARNING: Failed to start log export"
                 #         )
                 #         print("Failed to start log export")
-                if not CH.create_minio_log_tables():
-                    info.add_workflow_report_message(
-                        "WARNING: Failed to create minio log tables"
-                    )
-                    print("Failed to create minio log tables")
+                # if not CH.create_minio_log_tables():
+                #     info.add_workflow_report_message(
+                #         "WARNING: Failed to create minio log tables"
+                #     )
+                #     print("Failed to create minio log tables")
 
                 if has_stateful_tests:
                     res = (
@@ -685,15 +685,14 @@ def start():
             if success_after_rerun or failed_after_rerun:
                 for test_case in test_result.results:
                     if test_case.name in success_after_rerun:
-                        if is_llvm_coverage:
-                            print(
-                                f"Test {test_case.name} has succeeded after rerun. Mark it as OK"
-                            )
-                            test_case.remove_label(Result.Status.FAILED)
-                            test_case.remove_label(Result.StatusExtended.FAIL)
-                            test_case.set_status(Result.StatusExtended.OK)
-                        else:
-                            test_case.set_label(Result.Label.OK_ON_RETRY)
+                        # NOTE (strtgbb): Tweaked to always mark a test that is ok on retry as ok. We want to ignore flaky tests.
+                        print(
+                            f"Test {test_case.name} has succeeded after rerun. Mark it as OK"
+                        )
+                        test_case.remove_label(Result.Status.FAILED)
+                        test_case.remove_label(Result.StatusExtended.FAIL)
+                        test_case.set_status(Result.StatusExtended.OK)
+                        test_case.set_label(Result.Label.OK_ON_RETRY)
                     elif test_case.name in failed_after_rerun:
                         test_case.set_label(Result.Label.FAILED_ON_RETRY)
             results.append(retry_result)

diff --git a/ci/jobs/integration_test_job.py b/ci/jobs/integration_test_job.py
@@ -752,8 +752,8 @@ def main():
     failed_tests_files = []
 
     has_error = False
-    session_timeout_parallel = 3600 * 2
-    session_timeout_sequential = 3600
+    session_timeout_parallel = 3600 * 3
+    session_timeout_sequential = 3600 * 1.5
 
     if is_llvm_coverage:
         session_timeout_parallel = 3600 * 3
@@ -877,17 +877,18 @@ def main():
         is_flaky_check or is_bugfix_validation or is_targeted_check or info.is_local_run
     ):
         test_result_retries = run_pytest_and_collect_results(
-            command=f"{' '.join(failed_test_cases)} --report-log-exclude-logs-on-passed-tests --tb=short -n 1 --dist=loadfile --session-timeout=3600",
+            command=f"{' '.join(failed_test_cases)} --report-log-exclude-logs-on-passed-tests --tb=short -n 1 --dist=loadfile --session-timeout=7000",
             env=test_env,
             report_name="retries",
-            timeout=3600 + 600,
+            timeout=7000 + 600,
         )
         successful_retries = [t.name for t in test_result_retries.results if t.is_ok()]
         failed_retries = [t.name for t in test_result_retries.results if t.is_failure()]
         if successful_retries or failed_retries:
             for test_case in test_results:
                 if test_case.name in successful_retries:
                     test_case.set_label(Result.Label.OK_ON_RETRY)
+                    test_case.set_status(Result.StatusExtended.OK)
                 elif test_case.name in failed_retries:
                     test_case.set_label(Result.Label.FAILED_ON_RETRY)
 

diff --git a/cmake/autogenerated_versions.txt b/cmake/autogenerated_versions.txt
@@ -2,13 +2,13 @@
 
 # NOTE: VERSION_REVISION has nothing common with DBMS_TCP_PROTOCOL_VERSION,
 # only DBMS_TCP_PROTOCOL_VERSION should be incremented on protocol changes.
-SET(VERSION_REVISION 54515)
+SET(VERSION_REVISION 54516)
 SET(VERSION_MAJOR 26)
 SET(VERSION_MINOR 3)
-SET(VERSION_PATCH 8)
-SET(VERSION_GITHASH 177c6aa7da1703d851ce0d997dd76f035fa6940d)
-SET(VERSION_DESCRIBE v26.3.8.20001.altinityantalya)
-SET(VERSION_STRING 26.3.8.20001.altinityantalya)
+SET(VERSION_PATCH 9)
+SET(VERSION_GITHASH f3c6e5a4d27c3997b2a91174752e44acedc51f74)
+SET(VERSION_DESCRIBE v26.3.9.20001.altinityantalya)
+SET(VERSION_STRING 26.3.9.20001.altinityantalya)
 # end of autochange
 
 SET(VERSION_TWEAK 20001)

diff --git a/src/DataTypes/IDataType.cpp b/src/DataTypes/IDataType.cpp
@@ -158,6 +158,12 @@ std::unique_ptr<IDataType::SubstreamData> IDataType::getSubcolumnData(
     bool throw_if_null)
 {
     std::unique_ptr<IDataType::SubstreamData> res;
+    /// Track whether res was set by an exact name match, so that exact matches
+    /// always take priority over prefix (dynamic subcolumn) matches.
+    /// This matters when e.g. JSON has typed paths "a" (Array(JSON)) and "a.b" (Int64):
+    /// without this, the prefix match on "a" would fire first (sorted order) and
+    /// the exact match on "a.b" would be skipped because res is already set.
+    bool res_from_exact_match = false;
 
     ISerialization::StreamCallback callback_with_data = [&](const auto & subpath)
     {
@@ -168,15 +174,18 @@ std::unique_ptr<IDataType::SubstreamData> IDataType::getSubcolumnData(
             {
                 auto name = ISerialization::getSubcolumnNameForStream(subpath, prefix_len, false, initial_array_level);
                 /// Create data from path only if it's requested subcolumn.
-                /// Use the first match to be consistent with ColumnsDescription::addSubcolumns
+                /// Use the first exact match to be consistent with ColumnsDescription::addSubcolumns
                 /// which also keeps the first subcolumn when there are name collisions
                 /// (e.g. "null" can match both Nullable's null-map and a Tuple element named "null").
-                if (name == subcolumn_name && !res)
+                /// Exact matches always take priority over prefix matches regardless of iteration order.
+                if (name == subcolumn_name && !res_from_exact_match)
                 {
                     res = std::make_unique<SubstreamData>(ISerialization::createFromPath(subpath, prefix_len));
+                    res_from_exact_match = true;
                 }
                 /// Check if this subcolumn is a prefix of requested subcolumn and it can create dynamic subcolumns.
-                else if (subcolumn_name.starts_with(name + ".") && subpath[i].data.type && subpath[i].data.type->hasDynamicSubcolumnsData())
+                /// Only use prefix matches when no exact match has been found.
+                else if (!res_from_exact_match && subcolumn_name.starts_with(name + ".") && subpath[i].data.type && subpath[i].data.type->hasDynamicSubcolumnsData())
                 {
                     auto dynamic_subcolumn_name = subcolumn_name.substr(name.size() + 1);
                     auto dynamic_subcolumn_data = subpath[i].data.type->getDynamicSubcolumnData(

diff --git a/src/Databases/DataLake/DatabaseDataLake.cpp b/src/Databases/DataLake/DatabaseDataLake.cpp
@@ -79,6 +79,7 @@ namespace DatabaseDataLakeSetting
     extern const DatabaseDataLakeSettingsString google_adc_refresh_token;
     extern const DatabaseDataLakeSettingsString google_adc_quota_project_id;
     extern const DatabaseDataLakeSettingsString google_adc_credentials_file;
+    extern const DatabaseDataLakeSettingsBool polaris_style_paths;
 }
 
 namespace Setting
@@ -497,6 +498,8 @@ StoragePtr DatabaseDataLake::tryGetTableImpl(const String & name, ContextPtr con
 {
     auto catalog = getCatalog();
     auto table_metadata = DataLake::TableMetadata().withSchema().withLocation().withDataLakeSpecificProperties();
+    if (settings[DatabaseDataLakeSetting::polaris_style_paths])
+        table_metadata.withPolarisStyleAbfssPaths();
 
     /// This is added to test that lightweight queries like 'SHOW TABLES' dont end up fetching the table
     fiu_do_on(FailPoints::lightweight_show_tables,
@@ -859,6 +862,8 @@ ASTPtr DatabaseDataLake::getCreateTableQueryImpl(
 {
     auto catalog = getCatalog();
     auto table_metadata = DataLake::TableMetadata().withLocation().withSchema();
+    if (settings[DatabaseDataLakeSetting::polaris_style_paths])
+        table_metadata.withPolarisStyleAbfssPaths();
 
     const auto [namespace_name, table_name] = DataLake::parseTableName(name);
 

diff --git a/src/Databases/DataLake/DatabaseDataLakeSettings.cpp b/src/Databases/DataLake/DatabaseDataLakeSettings.cpp
@@ -44,6 +44,7 @@ namespace ErrorCodes
     DECLARE(String, google_adc_credentials_file, "", "Deprecated setting, will throw an exception if used", 0) \
     DECLARE(String, dlf_access_key_id, "", "Access id of DLF token for Paimon REST Catalog", 0) \
     DECLARE(String, dlf_access_key_secret, "", "Access secret of DLF token for Paimon REST Catalog", 0) \
+    DECLARE(Bool, polaris_style_paths, true, "Enable Polaris/ADLS Gen2 path convention: the container name is prepended to the path in ABFSS locations (e.g. abfss://c@account/c/actual/path). When enabled, the redundant container prefix is stripped when building Azure HTTPS URLs. Disable if a real directory inside the container has the same name as the container itself.", 0) \
 
 #define LIST_OF_DATABASE_ICEBERG_SETTINGS(M, ALIAS) \
     DATABASE_ICEBERG_RELATED_SETTINGS(M, ALIAS) \

diff --git a/src/Databases/DataLake/ICatalog.cpp b/src/Databases/DataLake/ICatalog.cpp
@@ -118,6 +118,16 @@ void TableMetadata::setLocation(const std::string & location_)
         /// Azure ABFSS format: extract container (before @) and account (after @)
         bucket = bucket_part.substr(0, at_pos);
         azure_account_with_suffix = bucket_part.substr(at_pos + 1);
+
+        /// Some catalogs (e.g. Apache Polaris) follow the ADLS Gen2 filesystem convention
+        /// of including the container name as the first segment of the path in abfss:// locations,
+        /// e.g. abfss://container@account.dfs.core.windows.net/container/actual/path.
+        /// We record this as a flag so that `constructLocation` and `getMetadataLocation` can
+        /// strip the redundant prefix when needed, while `path` itself is left intact so that
+        /// `getLocation` remains a round-trip of `setLocation`.
+        if (polaris_style_abfss_paths && path.starts_with(bucket + "/"))
+            abfss_has_container_path_prefix = true;
+
         LOG_TEST(getLogger("TableMetadata"),
                  "Parsed Azure location - container: {}, account: {}, path: {}",
                  bucket, azure_account_with_suffix, path);
@@ -166,10 +176,15 @@ std::string TableMetadata::constructLocation(const std::string & endpoint_) cons
     if (!azure_account_with_suffix.empty())
     {
         /// Azure storage - endpoint should be https://<account>.dfs.core.windows.net
-        /// Construct the full URL with container and path
+        /// Construct the full URL with container and path.
+        /// When the path carries a Polaris-style redundant container prefix (e.g. "c/actual/path"
+        /// for container "c"), strip it before prepending the container, so we don't double it.
+        std::string_view effective_path = path;
+        if (abfss_has_container_path_prefix && effective_path.starts_with(bucket + "/"))
+            effective_path = effective_path.substr(bucket.size() + 1);
         if (location.ends_with(bucket))
-            return std::filesystem::path(location) / path / "";
-        return std::filesystem::path(location) / bucket / path / "";
+            return std::filesystem::path(location) / effective_path / "";
+        return std::filesystem::path(location) / bucket / effective_path / "";
     }
 
     if (location.ends_with(bucket))
@@ -258,12 +273,59 @@ std::string TableMetadata::getMetadataLocation(const std::string & iceberg_metad
             metadata_location = metadata_location.substr(storage_type_str.size());
         if (data_location.starts_with(storage_type_str))
             data_location = data_location.substr(storage_type_str.size());
-        else if (!endpoint.empty() && data_location.starts_with(endpoint))
-            data_location = data_location.substr(endpoint.size());
+        else if (!endpoint.empty())
+        {
+            std::string normalized_endpoint = endpoint;
+            if (normalized_endpoint.ends_with('/'))
+                normalized_endpoint.pop_back();
+            if (data_location.starts_with(normalized_endpoint))
+            {
+                data_location = data_location.substr(normalized_endpoint.size());
+                if (azure_account_with_suffix.empty() && !data_location.empty() && data_location.front() == '/')
+                    data_location = data_location.substr(1);
+            }
+        }
+
+        /// For Azure ABFSS locations we need to reconcile two different formats:
+        ///   - metadata_location (from catalog): "container@account.host/path/..."
+        ///   - data_location (with endpoint set): "/container/path/" (HTTPS path after endpoint stripped)
+        /// When no endpoint is set both sides are in ABFSS authority form and compare directly.
+        if (!azure_account_with_suffix.empty() && !bucket.empty())
+        {
+            /// The host part after stripping the ABFSS protocol is: bucket@azure_account_with_suffix/
+            std::string azure_host_prefix = bucket + "@" + azure_account_with_suffix + "/";
+
+            /// For Polaris-style paths: the container name is repeated as the first path segment
+            /// (e.g. abfss://c@account/c/actual/path). Strip that redundant prefix from both sides
+            /// before the comparison below so we identify the correct relative path.
+            /// This runs for both with-endpoint and without-endpoint cases.
+            if (abfss_has_container_path_prefix)
+            {
+                auto strip_container = [&](std::string & location_str)
+                {
+                    if (location_str.starts_with(azure_host_prefix))
+                    {
+                        std::string_view after_host = std::string_view(location_str).substr(azure_host_prefix.size());
+                        if (after_host.starts_with(bucket + "/"))
+                        {
+                            location_str = std::string(azure_host_prefix) + std::string(after_host.substr(bucket.size() + 1));
+                        }
+                    }
+                };
+                strip_container(metadata_location);
+                strip_container(data_location);
+            }
+
+            /// With endpoint: data_location is now in HTTPS path form ("/container/path/").
+            /// Convert metadata_location from ABFSS authority form ("container@account.host/path")
+            /// to the matching HTTPS path form ("/container/path") so the prefix comparison works.
+            if (!endpoint.empty() && metadata_location.starts_with(azure_host_prefix))
+                metadata_location = "/" + bucket + "/" + metadata_location.substr(azure_host_prefix.size());
+        }
 
         if (metadata_location.starts_with(data_location))
         {
-            size_t remove_slash = metadata_location[data_location.size()] == '/' ? 1 : 0;
+            size_t remove_slash = (metadata_location.size() > data_location.size() && metadata_location[data_location.size()] == '/') ? 1 : 0;
             metadata_location = metadata_location.substr(data_location.size() + remove_slash);
         }
     }

diff --git a/src/Databases/DataLake/ICatalog.h b/src/Databases/DataLake/ICatalog.h
@@ -33,6 +33,10 @@ class TableMetadata
     TableMetadata & withSchema() { with_schema = true; return *this; }
     TableMetadata & withStorageCredentials() { with_storage_credentials = true; return *this; }
     TableMetadata & withDataLakeSpecificProperties() { with_datalake_specific_metadata = true; return *this; }
+    /// Enable Polaris/ADLS Gen2 convention: when `setLocation` sees an ABFSS URL where the
+    /// first path segment equals the container name, treat it as a redundant prefix and record
+    /// it so that `constructLocation` and `getMetadataLocation` can strip it.
+    TableMetadata & withPolarisStyleAbfssPaths() { polaris_style_abfss_paths = true; return *this; }
 
     bool hasLocation() const;
     bool hasSchema() const;
@@ -93,6 +97,16 @@ class TableMetadata
     /// For Azure ABFSS URLs: stores the account with suffix (e.g., "account.dfs.core.windows.net")
     /// This is extracted from URLs like: abfss://container@account.dfs.core.windows.net/path
     std::string azure_account_with_suffix;
+    /// True when `setLocation` detected that the ABFSS path starts with the container name
+    /// as a redundant first segment — a convention used by some catalogs (e.g. Apache Polaris /
+    /// ADLS Gen2 filesystem paths).
+    /// Example: abfss://c@account.dfs.core.windows.net/c/actual/path — `c` appears in both
+    /// the authority and the first path segment.
+    /// When set, `constructLocation` and `getMetadataLocation` strip that prefix when building
+    /// Azure HTTPS URLs or comparing metadata-file prefixes, but `path` itself is left intact so
+    /// that `getLocation` remains a round-trip of `setLocation`.
+    bool polaris_style_abfss_paths = false;
+    bool abfss_has_container_path_prefix = false;
     /// Endpoint is set and used in case we have non-AWS storage implementation, for example, Minio.
     /// Also not all catalogs support non-AWS storages.
     std::string endpoint;