From 61c0798cb039c90dc6919d28baddc5dedb847ea0 Mon Sep 17 00:00:00 2001 From: "terence.yoo" Date: Thu, 5 Mar 2026 16:00:25 +0900 Subject: [PATCH 01/10] Implement basic row cache --- .../org/apache/hadoop/hbase/HConstants.java | 6 + .../io/encoding/BufferedDataBlockEncoder.java | 8 +- .../MetricsRegionServerSource.java | 7 + .../MetricsRegionServerSourceImpl.java | 6 + .../MetricsRegionServerWrapper.java | 10 + .../hadoop/hbase/regionserver/HRegion.java | 85 ++- .../MetricsRegionServerWrapperImpl.java | 28 + .../hbase/regionserver/RSRpcServices.java | 21 +- .../hadoop/hbase/regionserver/RowCache.java | 236 ++++++-- .../hadoop/hbase/regionserver/RowCells.java | 3 +- .../regionserver/TinyLfuRowCacheStrategy.java | 113 ++++ .../MetricsRegionServerWrapperStub.java | 25 + .../regionserver/TestMetricsRegionServer.java | 5 + .../hbase/regionserver/TestRowCache.java | 547 ++++++++++++++++++ .../regionserver/TestRowCacheCanCacheRow.java | 266 +++++++++ .../TestRowCacheConfiguration.java | 81 +++ .../TestRowCacheEvictOnClose.java | 129 +++++ .../regionserver/TestRowCacheHRegion.java | 97 ++++ ...heWithBucketCacheAndDataBlockEncoding.java | 154 +++++ .../regionserver/TestRowCacheWithMock.java | 397 +++++++++++++ .../tool/TestRowCacheBulkLoadHFiles.java | 199 +++++++ 21 files changed, 2369 insertions(+), 54 deletions(-) create mode 100644 hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/TinyLfuRowCacheStrategy.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCache.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheCanCacheRow.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheConfiguration.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheEvictOnClose.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheHRegion.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheWithBucketCacheAndDataBlockEncoding.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheWithMock.java create mode 100644 hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestRowCacheBulkLoadHFiles.java diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java index 6a51172e9a73..f140783067af 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java @@ -1029,6 +1029,12 @@ public enum OperationStatusCode { public static final String ROW_CACHE_ENABLED_KEY = "row.cache.enabled"; public static final boolean ROW_CACHE_ENABLED_DEFAULT = false; + /** + * Configuration key for the evict the row cache on close + */ + public static final String ROW_CACHE_EVICT_ON_CLOSE_KEY = "row.cache.evictOnClose"; + public static final boolean ROW_CACHE_EVICT_ON_CLOSE_DEFAULT = false; + /** * Configuration key for the memory size of the block cache */ diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java index 5ec39fa5803d..54505dfce955 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/io/encoding/BufferedDataBlockEncoder.java @@ -547,8 +547,8 @@ public void setTimestamp(byte[] ts) throws IOException { @Override public ExtendedCell deepClone() { - // This is not used in actual flow. Throwing UnsupportedOperationException - throw new UnsupportedOperationException(); + // To garbage collect the objects referenced by this cell, we need to deep clone it + return ExtendedCell.super.deepClone(); } } @@ -796,8 +796,8 @@ public void write(ByteBuffer buf, int offset) { @Override public ExtendedCell deepClone() { - // This is not used in actual flow. Throwing UnsupportedOperationException - throw new UnsupportedOperationException(); + // To cache row, we need to deep clone it + return super.deepClone(); } } diff --git a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java index c88a77b51407..166484fe8991 100644 --- a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java +++ b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSource.java @@ -430,6 +430,13 @@ public interface MetricsRegionServerSource extends BaseSource, JvmPauseMonitorSo String L2_CACHE_HIT_RATIO_DESC = "L2 cache hit ratio."; String L2_CACHE_MISS_RATIO = "l2CacheMissRatio"; String L2_CACHE_MISS_RATIO_DESC = "L2 cache miss ratio."; + + String ROW_CACHE_HIT_COUNT = "rowCacheHitCount"; + String ROW_CACHE_MISS_COUNT = "rowCacheMissCount"; + String ROW_CACHE_EVICTED_ROW_COUNT = "rowCacheEvictedRowCount"; + String ROW_CACHE_SIZE = "rowCacheSize"; + String ROW_CACHE_COUNT = "rowCacheCount"; + String RS_START_TIME_NAME = "regionServerStartTime"; String ZOOKEEPER_QUORUM_NAME = "zookeeperQuorum"; String SERVER_NAME_NAME = "serverName"; diff --git a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSourceImpl.java b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSourceImpl.java index b214c8f8f4e7..90ea2a1165c8 100644 --- a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSourceImpl.java +++ b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerSourceImpl.java @@ -452,6 +452,12 @@ public void getMetrics(MetricsCollector metricsCollector, boolean all) { .addCounter(Interns.info(BLOCK_CACHE_DELETE_FAMILY_BLOOM_HIT_COUNT, ""), rsWrap.getDeleteFamilyBloomHitCount()) .addCounter(Interns.info(BLOCK_CACHE_TRAILER_HIT_COUNT, ""), rsWrap.getTrailerHitCount()) + .addCounter(Interns.info(ROW_CACHE_HIT_COUNT, ""), rsWrap.getRowCacheHitCount()) + .addCounter(Interns.info(ROW_CACHE_MISS_COUNT, ""), rsWrap.getRowCacheMissCount()) + .addCounter(Interns.info(ROW_CACHE_EVICTED_ROW_COUNT, ""), + rsWrap.getRowCacheEvictedRowCount()) + .addGauge(Interns.info(ROW_CACHE_SIZE, ""), rsWrap.getRowCacheSize()) + .addGauge(Interns.info(ROW_CACHE_COUNT, ""), rsWrap.getRowCacheCount()) .addCounter(Interns.info(UPDATES_BLOCKED_TIME, UPDATES_BLOCKED_DESC), rsWrap.getUpdatesBlockedTime()) .addCounter(Interns.info(FLUSHED_CELLS, FLUSHED_CELLS_DESC), rsWrap.getFlushedCellsCount()) diff --git a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapper.java b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapper.java index 5b957d9bf08f..68e43b276ee2 100644 --- a/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapper.java +++ b/hbase-hadoop-compat/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapper.java @@ -635,6 +635,16 @@ public interface MetricsRegionServerWrapper { long getTrailerHitCount(); + long getRowCacheHitCount(); + + long getRowCacheMissCount(); + + long getRowCacheSize(); + + long getRowCacheCount(); + + long getRowCacheEvictedRowCount(); + long getTotalRowActionRequestCount(); long getByteBuffAllocatorHeapAllocationBytes(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index 60bd4cee6b73..3a5c3f34313a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -18,6 +18,8 @@ package org.apache.hadoop.hbase.regionserver; import static org.apache.hadoop.hbase.HConstants.REPLICATION_SCOPE_LOCAL; +import static org.apache.hadoop.hbase.HConstants.ROW_CACHE_EVICT_ON_CLOSE_DEFAULT; +import static org.apache.hadoop.hbase.HConstants.ROW_CACHE_EVICT_ON_CLOSE_KEY; import static org.apache.hadoop.hbase.regionserver.HStoreFile.MAJOR_COMPACTION_KEY; import static org.apache.hadoop.hbase.trace.HBaseSemanticAttributes.REGION_NAMES_KEY; import static org.apache.hadoop.hbase.trace.HBaseSemanticAttributes.ROW_LOCK_READ_LOCK_KEY; @@ -145,6 +147,7 @@ import org.apache.hadoop.hbase.io.hfile.bucket.BucketCache; import org.apache.hadoop.hbase.ipc.CoprocessorRpcUtils; import org.apache.hadoop.hbase.ipc.RpcCall; +import org.apache.hadoop.hbase.ipc.RpcCallContext; import org.apache.hadoop.hbase.ipc.RpcServer; import org.apache.hadoop.hbase.ipc.ServerCall; import org.apache.hadoop.hbase.mob.MobFileCache; @@ -946,7 +949,7 @@ public HRegion(final HRegionFileSystem fs, final WAL wal, final Configuration co this.isRowCacheEnabled = checkRowCacheConfig(); } - private boolean checkRowCacheConfig() { + boolean checkRowCacheConfig() { Boolean fromDescriptor = htableDescriptor.getRowCacheEnabled(); // The setting from TableDescriptor has higher priority than the global configuration return fromDescriptor != null @@ -954,6 +957,11 @@ private boolean checkRowCacheConfig() { : conf.getBoolean(HConstants.ROW_CACHE_ENABLED_KEY, HConstants.ROW_CACHE_ENABLED_DEFAULT); } + // For testing only + void setRowCache(RowCache rowCache) { + this.rowCache = rowCache; + } + private void setHTableSpecificConf() { if (this.htableDescriptor == null) { return; @@ -1963,6 +1971,8 @@ public Pair> call() throws IOException { } } + evictRowCache(); + status.setStatus("Writing region close event to WAL"); // Always write close marker to wal even for read only table. This is not a big problem as we // do not write any data into the region; it is just a meta edit in the WAL file. @@ -2003,6 +2013,22 @@ public Pair> call() throws IOException { } } + private void evictRowCache() { + boolean evictOnClose = getReadOnlyConfiguration().getBoolean(ROW_CACHE_EVICT_ON_CLOSE_KEY, + ROW_CACHE_EVICT_ON_CLOSE_DEFAULT); + + if (!evictOnClose) { + return; + } + + if (!(rsServices instanceof HRegionServer regionServer)) { + return; + } + + RowCache rowCache = regionServer.getRSRpcServices().getServer().getRowCache(); + rowCache.evictRowsByRegion(this); + } + /** Wait for all current flushes and compactions of the region to complete */ // TODO HBASE-18906. Check the usage (if any) in Phoenix and expose this or give alternate way for // Phoenix needs. @@ -3259,8 +3285,8 @@ public RegionScannerImpl getScanner(Scan scan) throws IOException { return getScanner(scan, null); } - RegionScannerImpl getScannerWithResults(Get get, Scan scan, List results) - throws IOException { + RegionScannerImpl getScannerWithResults(Get get, Scan scan, List results, + RpcCallContext context) throws IOException { if (!rowCache.canCacheRow(get, this)) { return getScannerWithResults(scan, results); } @@ -3268,12 +3294,23 @@ RegionScannerImpl getScannerWithResults(Get get, Scan scan, List results) // Try get from row cache RowCacheKey key = new RowCacheKey(this, get.getRow()); if (rowCache.tryGetFromCache(key, get, results)) { + addReadRequestsCount(1); + if (getMetrics() != null) { + getMetrics().updateReadRequestCount(); + } + // Cache is hit, and then no scanner is created return null; } RegionScannerImpl scanner = getScannerWithResults(scan, results); - rowCache.populateCache(results, key); + + // When results came from memstore only, do not populate the row cache + boolean readFromMemStoreOnly = context.getBlockBytesScanned() < 1; + if (!readFromMemStoreOnly) { + rowCache.populateCache(this, results, key); + } + return scanner; } @@ -3435,6 +3472,15 @@ private void updateDeleteLatestVersionTimestamp(Cell cell, Get get, int count, b @Override public void put(Put put) throws IOException { TraceUtil.trace(() -> { + // Put with TTL is not allowed on tables with row cache enabled, because cached rows cannot + // track TTL expiration + if (isRowCacheEnabled) { + if (put.getTTL() != Long.MAX_VALUE) { + throw new DoNotRetryIOException( + "Tables with row cache enabled do not allow setting TTL on Puts"); + } + } + checkReadOnly(); // Do a rough check that we have resources to accept a write. The check is @@ -4811,7 +4857,12 @@ public OperationStatus[] batchMutate(Mutation[] mutations, boolean atomic, long // checkAndMutate. // * coprocessor calls (see ex. BulkDeleteEndpoint). // So nonces are not really ever used by HBase. They could be by coprocs, and checkAnd... - return batchMutate(new MutationBatchOperation(this, mutations, atomic, nonceGroup, nonce)); + if (rowCache == null) { + return batchMutate(new MutationBatchOperation(this, mutations, atomic, nonceGroup, nonce)); + } + + return rowCache.mutateWithRowCacheBarrier(this, Arrays.asList(mutations), + () -> batchMutate(new MutationBatchOperation(this, mutations, atomic, nonceGroup, nonce))); } @Override @@ -4823,10 +4874,9 @@ public OperationStatus[] batchMutate(Mutation[] mutations) throws IOException { } OperationStatus[] batchMutate(Mutation[] mutations, boolean atomic) throws IOException { - OperationStatus[] operationStatuses = - rowCache.mutateWithRowCacheBarrier(this, Arrays.asList(mutations), - () -> this.batchMutate(mutations, atomic, HConstants.NO_NONCE, HConstants.NO_NONCE)); - return TraceUtil.trace(() -> operationStatuses, () -> createRegionSpan("Region.batchMutate")); + return TraceUtil.trace( + () -> batchMutate(mutations, atomic, HConstants.NO_NONCE, HConstants.NO_NONCE), + () -> createRegionSpan("Region.batchMutate")); } /** @@ -5111,8 +5161,17 @@ public CheckAndMutateResult checkAndMutate(CheckAndMutate checkAndMutate) throws public CheckAndMutateResult checkAndMutate(CheckAndMutate checkAndMutate, long nonceGroup, long nonce) throws IOException { - CheckAndMutateResult checkAndMutateResult = rowCache.mutateWithRowCacheBarrier(this, - checkAndMutate.getRow(), () -> this.checkAndMutate(checkAndMutate, nonceGroup, nonce)); + CheckAndMutateResult checkAndMutateResult = + rowCache.mutateWithRowCacheBarrier(this, checkAndMutate.getRow(), + () -> this.checkAndMutateInternal(checkAndMutate, nonceGroup, nonce)); + return TraceUtil.trace(() -> checkAndMutateResult, + () -> createRegionSpan("Region.checkAndMutate")); + } + + public CheckAndMutateResult checkAndMutate(List mutations, + CheckAndMutate checkAndMutate, long nonceGroup, long nonce) throws IOException { + CheckAndMutateResult checkAndMutateResult = rowCache.mutateWithRowCacheBarrier(this, mutations, + () -> this.checkAndMutateInternal(checkAndMutate, nonceGroup, nonce)); return TraceUtil.trace(() -> checkAndMutateResult, () -> createRegionSpan("Region.checkAndMutate")); } @@ -5312,6 +5371,10 @@ private OperationStatus mutate(Mutation mutation, boolean atomic) throws IOExcep private OperationStatus mutate(Mutation mutation, boolean atomic, long nonceGroup, long nonce) throws IOException { + if (rowCache == null) { + return this.mutateInternal(mutation, atomic, nonceGroup, nonce); + } + return rowCache.mutateWithRowCacheBarrier(this, mutation.getRow(), () -> this.mutateInternal(mutation, atomic, nonceGroup, nonce)); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java index c8f7f96a033b..b4dabf7fb3bf 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java @@ -69,6 +69,7 @@ class MetricsRegionServerWrapperImpl implements MetricsRegionServerWrapper { private BlockCache l2Cache = null; private MobFileCache mobFileCache; private CacheStats cacheStats; + private final RowCache rowCache; private CacheStats l1Stats = null; private CacheStats l2Stats = null; private volatile long numWALFiles = 0; @@ -99,6 +100,8 @@ public MetricsRegionServerWrapperImpl(final HRegionServer regionServer) { this.regionServer = regionServer; initBlockCache(); initMobFileCache(); + RSRpcServices rsRpcServices = this.regionServer.getRSRpcServices(); + this.rowCache = rsRpcServices == null ? null : rsRpcServices.getServer().getRowCache(); this.excludeDatanodeManager = this.regionServer.getWalFactory().getExcludeDatanodeManager(); this.period = regionServer.getConfiguration().getLong(HConstants.REGIONSERVER_METRICS_PERIOD, @@ -1194,6 +1197,31 @@ public long getTrailerHitCount() { return this.cacheStats != null ? this.cacheStats.getTrailerHitCount() : 0L; } + @Override + public long getRowCacheHitCount() { + return this.rowCache != null ? this.rowCache.getHitCount() : 0L; + } + + @Override + public long getRowCacheMissCount() { + return this.rowCache != null ? this.rowCache.getMissCount() : 0L; + } + + @Override + public long getRowCacheSize() { + return this.rowCache != null ? this.rowCache.getSize() : 0L; + } + + @Override + public long getRowCacheCount() { + return this.rowCache != null ? this.rowCache.getCount() : 0L; + } + + @Override + public long getRowCacheEvictedRowCount() { + return this.rowCache != null ? this.rowCache.getEvictedRowCount() : 0L; + } + @Override public long getByteBuffAllocatorHeapAllocationBytes() { return ByteBuffAllocator.getHeapAllocationBytes(allocator, ByteBuffAllocator.HEAP); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java index 35371cb74ae7..7a21ab8a5504 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java @@ -668,7 +668,7 @@ private CheckAndMutateResult checkAndMutate(HRegion region, List{@code RowCache} coordinates cache access for Get operations and - * enforces cache consistency during mutations. It delegates actual - * storage and eviction policy decisions (e.g., LRU, LFU) to a - * {@link RowCacheStrategy} implementation.

- * - *

This class is responsible for: + *

+ * {@code RowCache} coordinates cache access for Get operations and enforces cache consistency + * during mutations. It delegates actual storage and eviction policy decisions (e.g., LRU, LFU) to a + * {@link RowCacheStrategy} implementation. + *

+ *

+ * This class is responsible for: *

    - *
  • Determining whether row cache is enabled for a region
  • - *
  • Attempting cache lookups before falling back to the normal read path
  • - *
  • Populating the cache after successful reads
  • - *
  • Evicting affected rows on mutations to maintain correctness
  • + *
  • Determining whether row cache is enabled for a region
  • + *
  • Attempting cache lookups before falling back to the normal read path
  • + *
  • Populating the cache after successful reads
  • + *
  • Evicting affected rows on mutations to maintain correctness
  • *
- * - *

{@code RowCache} does not implement caching policy or storage directly; - * those concerns are encapsulated by {@code RowCacheStrategy}.

+ *

+ * {@code RowCache} does not implement caching policy or storage directly; those concerns are + * encapsulated by {@code RowCacheStrategy}. + *

*/ @org.apache.yetus.audience.InterfaceAudience.Private public class RowCache { + /** + * A barrier that prevents the row cache from being populated during region operations, such as + * bulk loads. It is implemented as a counter to address issues that arise when the same region is + * updated concurrently. + */ + private final Map regionLevelBarrierMap = new ConcurrentHashMap<>(); + /** + * A barrier that prevents the row cache from being populated during row mutations. It is + * implemented as a counter to address issues that arise when the same row is mutated + * concurrently. + */ + private final Map rowLevelBarrierMap = new ConcurrentHashMap<>(); + private final boolean enabledByConf; private final RowCacheStrategy rowCacheStrategy; @@ -63,8 +85,8 @@ R execute(RowOperation operation) throws IOException { RowCache(Configuration conf) { enabledByConf = conf.getFloat(HConstants.ROW_CACHE_SIZE_KEY, HConstants.ROW_CACHE_SIZE_DEFAULT) > 0; - // TODO: implement row cache - rowCacheStrategy = null; + // Currently we only support TinyLfu implementation + rowCacheStrategy = new TinyLfuRowCacheStrategy(MemorySizeUtil.getRowCacheSize(conf)); } R mutateWithRowCacheBarrier(HRegion region, byte[] row, RowOperation operation) @@ -74,9 +96,39 @@ R mutateWithRowCacheBarrier(HRegion region, byte[] row, RowOperation oper } RowCacheKey key = new RowCacheKey(region, row); - // TODO: implement mutate with row cache barrier logic - evictRow(key); - return execute(operation); + try { + // Creates a barrier that prevents the row cache from being populated for this row + // during mutation. Reads for the row can instead be served from HFiles or the block cache. + createRowLevelBarrier(key); + + // After creating the barrier, evict the existing row cache for this row, + // as it becomes invalid after the mutation + evictRow(key); + + return execute(operation); + } finally { + // Remove the barrier after mutation to allow the row cache to be populated again + removeRowLevelBarrier(key); + } + } + + /** + * Remove the barrier after mutation to allow the row cache to be populated again + * @param key the cache key of the row + */ + void removeRowLevelBarrier(RowCacheKey key) { + rowLevelBarrierMap.computeIfPresent(key, (k, counter) -> { + int remaining = counter.decrementAndGet(); + return (remaining <= 0) ? null : counter; + }); + } + + /** + * Creates a barrier to prevent the row cache from being populated for this row during mutation + * @param key the cache key of the row + */ + void createRowLevelBarrier(RowCacheKey key) { + rowLevelBarrierMap.computeIfAbsent(key, k -> new AtomicInteger(0)).incrementAndGet(); } R mutateWithRowCacheBarrier(HRegion region, List mutations, @@ -85,21 +137,88 @@ R mutateWithRowCacheBarrier(HRegion region, List mutations, return operation.execute(); } - // TODO: implement mutate with row cache barrier logic Set rowCacheKeys = new HashSet<>(mutations.size()); - mutations.forEach(mutation -> rowCacheKeys.add(new RowCacheKey(region, mutation.getRow()))); - rowCacheKeys.forEach(this::evictRow); + try { + // Evict the entire row cache + mutations.forEach(mutation -> rowCacheKeys.add(new RowCacheKey(region, mutation.getRow()))); + rowCacheKeys.forEach(key -> { + // Creates a barrier that prevents the row cache from being populated for this row + // during mutation. Reads for the row can instead be served from HFiles or the block cache. + createRowLevelBarrier(key); - return execute(operation); + // After creating the barrier, evict the existing row cache for this row, + // as it becomes invalid after the mutation + evictRow(key); + }); + + return execute(operation); + } finally { + // Remove the barrier after mutation to allow the row cache to be populated again + rowCacheKeys.forEach(this::removeRowLevelBarrier); + } } void evictRow(RowCacheKey key) { rowCacheStrategy.evictRow(key); } + void evictRowsByRegion(HRegion region) { + rowCacheStrategy.evictRowsByRegion(region); + } + + // @formatter:off + /** + * Row cache is only enabled when the following conditions are met: + * - Row cache is enabled at the table level. + * - Cache blocks is enabled in the get request. + * - A Get object cannot be distinguished from others except by its row key. + * So we check equality for the following: + * - filter + * - retrieving cells + * - TTL + * - attributes + * - CheckExistenceOnly + * - ColumnFamilyTimeRange + * - Consistency + * - MaxResultsPerColumnFamily + * - ReplicaId + * - RowOffsetPerColumnFamily + * @param get the Get request + * @param region the Region + * @return true if the row can be cached, false otherwise + */ + // @formatter:on boolean canCacheRow(Get get, Region region) { - // TODO: implement logic to determine if the row can be cached - return false; + return enabledByConf && region.isRowCacheEnabled() && get.getCacheBlocks() + && get.getFilter() == null && isRetrieveAllCells(get, region) && isDefaultTtl(region) + && get.getAttributesMap().isEmpty() && !get.isCheckExistenceOnly() + && get.getColumnFamilyTimeRange().isEmpty() && get.getConsistency() == Consistency.STRONG + && get.getMaxResultsPerColumnFamily() == -1 && get.getReplicaId() == -1 + && get.getRowOffsetPerColumnFamily() == 0 && get.getTimeRange().isAllTime(); + } + + private static boolean isRetrieveAllCells(Get get, Region region) { + if (region.getTableDescriptor().getColumnFamilyCount() != get.numFamilies()) { + return false; + } + + boolean hasQualifier = get.getFamilyMap().values().stream().anyMatch(Objects::nonNull); + return !hasQualifier; + } + + private static boolean isDefaultTtl(Region region) { + return Arrays.stream(region.getTableDescriptor().getColumnFamilies()) + .allMatch(cfd -> cfd.getTimeToLive() == ColumnFamilyDescriptorBuilder.DEFAULT_TTL); + } + + // For testing only + public RowCells getRow(RowCacheKey key) { + return getRow(key, true); + } + + // For testing only + RowCells getRow(RowCacheKey key, boolean caching) { + return rowCacheStrategy.getRow(key, caching); } boolean tryGetFromCache(RowCacheKey key, Get get, List results) { @@ -110,16 +229,67 @@ boolean tryGetFromCache(RowCacheKey key, Get get, List results) { } results.addAll(row.getCells()); - // TODO: implement update of metrics return true; } - void populateCache(List results, RowCacheKey key) { - // TODO: implement with barrier to avoid cache read during mutation - try { - rowCacheStrategy.cacheRow(key, new RowCells(results)); - } catch (CloneNotSupportedException ignored) { - // Not able to cache row cells, ignore - } + void populateCache(HRegion region, List results, RowCacheKey key) { + // The row cache is populated only when no region level barriers remain + regionLevelBarrierMap.computeIfAbsent(region, t -> { + // The row cache is populated only when no row level barriers remain + rowLevelBarrierMap.computeIfAbsent(key, k -> { + try { + rowCacheStrategy.cacheRow(key, new RowCells(results)); + } catch (CloneNotSupportedException ignored) { + // Not able to cache row cells, ignore + } + return null; + }); + return null; + }); + } + + void createRegionLevelBarrier(HRegion region) { + regionLevelBarrierMap.computeIfAbsent(region, k -> new AtomicInteger(0)).incrementAndGet(); + } + + void increaseRowCacheSeqNum(HRegion region) { + region.increaseRowCacheSeqNum(); + } + + void removeTableLevelBarrier(HRegion region) { + regionLevelBarrierMap.computeIfPresent(region, (k, counter) -> { + int remaining = counter.decrementAndGet(); + return (remaining <= 0) ? null : counter; + }); + } + + long getHitCount() { + return rowCacheStrategy.getHitCount(); + } + + long getMissCount() { + return rowCacheStrategy.getMissCount(); + } + + long getSize() { + return rowCacheStrategy.getSize(); + } + + long getCount() { + return rowCacheStrategy.getCount(); + } + + long getEvictedRowCount() { + return rowCacheStrategy.getEvictedRowCount(); + } + + // For testing only + AtomicInteger getRowLevelBarrier(RowCacheKey key) { + return rowLevelBarrierMap.get(key); + } + + // For testing only + AtomicInteger getRegionLevelBarrier(HRegion region) { + return regionLevelBarrierMap.get(region); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCells.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCells.java index 2f44058e0a24..af0a0ea4c537 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCells.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCells.java @@ -39,8 +39,7 @@ public RowCells(List cells) throws CloneNotSupportedException { // To garbage collect the objects referenced by the cells this.cells.add(extCell.deepClone()); } catch (RuntimeException e) { - // throw new CloneNotSupportedException("Deep clone failed"); - this.cells.add(extCell); + throw new CloneNotSupportedException("Deep clone failed"); } } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/TinyLfuRowCacheStrategy.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/TinyLfuRowCacheStrategy.java new file mode 100644 index 000000000000..e141bd3cbb2b --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/TinyLfuRowCacheStrategy.java @@ -0,0 +1,113 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import com.github.benmanes.caffeine.cache.Cache; +import com.github.benmanes.caffeine.cache.Caffeine; +import com.github.benmanes.caffeine.cache.Policy; +import com.github.benmanes.caffeine.cache.RemovalCause; +import com.github.benmanes.caffeine.cache.RemovalListener; +import java.util.Optional; +import java.util.OptionalLong; +import java.util.concurrent.atomic.LongAdder; +import org.checkerframework.checker.nullness.qual.NonNull; + +@org.apache.yetus.audience.InterfaceAudience.Private +public class TinyLfuRowCacheStrategy implements RowCacheStrategy { + private final class EvictionListener + implements RemovalListener<@NonNull RowCacheKey, @NonNull RowCells> { + @Override + public void onRemoval(RowCacheKey key, RowCells value, @NonNull RemovalCause cause) { + evictedRowCount.increment(); + } + } + + private final Cache<@NonNull RowCacheKey, RowCells> cache; + + // Cache.stats() does not provide eviction count for entries, so we maintain our own counter. + private final LongAdder evictedRowCount = new LongAdder(); + + TinyLfuRowCacheStrategy(long maxSizeBytes) { + if (maxSizeBytes <= 0) { + cache = Caffeine.newBuilder().maximumSize(0).build(); + return; + } + + cache = + Caffeine.newBuilder().maximumWeight(maxSizeBytes).removalListener(new EvictionListener()) + .weigher((RowCacheKey key, + RowCells value) -> (int) Math.min(key.heapSize() + value.heapSize(), Integer.MAX_VALUE)) + .recordStats().build(); + } + + @Override + public void cacheRow(RowCacheKey key, RowCells value) { + cache.put(key, value); + } + + @Override + public void evictRow(RowCacheKey key) { + cache.asMap().remove(key); + } + + @Override + public void evictRowsByRegion(HRegion region) { + cache.asMap().keySet().removeIf(key -> key.isSameRegion(region)); + } + + @Override + public long getCount() { + return cache.estimatedSize(); + } + + @Override + public long getEvictedRowCount() { + return evictedRowCount.sum(); + } + + @Override + public long getHitCount() { + return cache.stats().hitCount(); + } + + @Override + public long getMaxSize() { + Optional result = cache.policy().eviction().map(Policy.Eviction::getMaximum); + return result.orElse(-1L); + } + + @Override + public long getMissCount() { + return cache.stats().missCount(); + } + + @Override + public RowCells getRow(RowCacheKey key, boolean caching) { + if (!caching) { + return null; + } + + return cache.getIfPresent(key); + } + + @Override + public long getSize() { + Optional result = cache.policy().eviction().map(Policy.Eviction::weightedSize); + return result.orElse(OptionalLong.of(-1L)).orElse(-1L); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperStub.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperStub.java index f1b6efe50a99..6b677f2d1223 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperStub.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperStub.java @@ -662,6 +662,31 @@ public long getTrailerHitCount() { return 0; } + @Override + public long getRowCacheHitCount() { + return 2; + } + + @Override + public long getRowCacheMissCount() { + return 1; + } + + @Override + public long getRowCacheEvictedRowCount() { + return 0; + } + + @Override + public long getRowCacheSize() { + return 1; + } + + @Override + public long getRowCacheCount() { + return 2; + } + @Override public int getSplitQueueSize() { return 0; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMetricsRegionServer.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMetricsRegionServer.java index aac2a5922b9b..76c2a8ad6e42 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMetricsRegionServer.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestMetricsRegionServer.java @@ -148,6 +148,11 @@ public void testWrapperSource() { HELPER.assertGauge("l2CacheHitRatio", 90, serverSource); HELPER.assertGauge("l2CacheMissRatio", 10, serverSource); HELPER.assertCounter("updatesBlockedTime", 419, serverSource); + HELPER.assertCounter("rowCacheHitCount", 2, serverSource); + HELPER.assertCounter("rowCacheMissCount", 1, serverSource); + HELPER.assertCounter("rowCacheEvictedRowCount", 0, serverSource); + HELPER.assertGauge("rowCacheSize", 1, serverSource); + HELPER.assertGauge("rowCacheCount", 2, serverSource); } @Test diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCache.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCache.java new file mode 100644 index 000000000000..c4ca0d70faff --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCache.java @@ -0,0 +1,547 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import static org.apache.hadoop.hbase.HConstants.HFILE_BLOCK_CACHE_SIZE_KEY; +import static org.apache.hadoop.hbase.HConstants.ROW_CACHE_SIZE_KEY; +import static org.apache.hadoop.hbase.regionserver.MetricsRegionServerSource.ROW_CACHE_EVICTED_ROW_COUNT; +import static org.apache.hadoop.hbase.regionserver.MetricsRegionServerSource.ROW_CACHE_HIT_COUNT; +import static org.apache.hadoop.hbase.regionserver.MetricsRegionServerSource.ROW_CACHE_MISS_COUNT; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.CompatibilityFactory; +import org.apache.hadoop.hbase.DoNotRetryIOException; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtil; +import org.apache.hadoop.hbase.SingleProcessHBaseCluster; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.client.Append; +import org.apache.hadoop.hbase.client.CheckAndMutate; +import org.apache.hadoop.hbase.client.CheckAndMutateResult; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; +import org.apache.hadoop.hbase.client.Delete; +import org.apache.hadoop.hbase.client.Get; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.Row; +import org.apache.hadoop.hbase.client.RowMutations; +import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.client.TableDescriptorBuilder; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; +import org.apache.hadoop.hbase.test.MetricsAssertHelper; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.testclassification.RegionServerTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.rules.TestName; + +@Category({ RegionServerTests.class, MediumTests.class }) +public class TestRowCache { + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestRowCache.class); + + private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); + private static final byte[] CF1 = Bytes.toBytes("cf1"); + private static final byte[] CF2 = Bytes.toBytes("cf2"); + private static final byte[] Q1 = Bytes.toBytes("q1"); + private static final byte[] Q2 = Bytes.toBytes("q2"); + + private static MetricsAssertHelper metricsHelper; + private static MetricsRegionServer regionServerMetrics; + private static MetricsRegionServerSource serverSource; + + private static Admin admin; + private static RowCache rowCache; + + private TableName tableName; + private Table table; + HRegion region; + private final Map counterBase = new HashMap<>(); + + @Rule + public TestName testName = new TestName(); + + @BeforeClass + public static void beforeClass() throws Exception { + Configuration conf = TEST_UTIL.getConfiguration(); + + // Enable row cache but reduce the block cache size to fit in 80% of the heap + conf.setFloat(ROW_CACHE_SIZE_KEY, 0.01f); + conf.setFloat(HFILE_BLOCK_CACHE_SIZE_KEY, 0.39f); + + SingleProcessHBaseCluster cluster = TEST_UTIL.startMiniCluster(); + cluster.waitForActiveAndReadyMaster(); + admin = TEST_UTIL.getAdmin(); + + metricsHelper = CompatibilityFactory.getInstance(MetricsAssertHelper.class); + HRegionServer regionServer = cluster.getRegionServer(0); + regionServerMetrics = regionServer.getMetrics(); + serverSource = regionServerMetrics.getMetricsSource(); + + rowCache = regionServer.getRSRpcServices().getServer().getRowCache(); + } + + @AfterClass + public static void afterClass() throws Exception { + HRegionServer.TEST_SKIP_REPORTING_TRANSITION = false; + TEST_UTIL.shutdownMiniCluster(); + } + + @Before + public void beforeTestMethod() throws Exception { + ColumnFamilyDescriptor cf1 = ColumnFamilyDescriptorBuilder.newBuilder(CF1).build(); + // To test data block encoding + ColumnFamilyDescriptor cf2 = ColumnFamilyDescriptorBuilder.newBuilder(CF2) + .setDataBlockEncoding(DataBlockEncoding.FAST_DIFF).build(); + + tableName = TableName.valueOf(testName.getMethodName()); + TableDescriptor td = TableDescriptorBuilder.newBuilder(tableName).setRowCacheEnabled(true) + .setColumnFamily(cf1).setColumnFamily(cf2).build(); + admin.createTable(td); + table = admin.getConnection().getTable(tableName); + region = TEST_UTIL.getRSForFirstRegionInTable(tableName).getRegions().stream() + .filter(r -> r.getRegionInfo().getTable().equals(tableName)).findFirst().orElseThrow(); + } + + @After + public void afterTestMethod() throws Exception { + counterBase.clear(); + + admin.disableTable(tableName); + admin.deleteTable(tableName); + } + + private void setCounterBase(String metric, long value) { + counterBase.put(metric, value); + } + + private void assertCounterDiff(String metric, long diff) { + Long base = counterBase.get(metric); + if (base == null) { + throw new IllegalStateException( + "base counter of " + metric + " metric should have been set before by setCounterBase()"); + } + long newValue = base + diff; + metricsHelper.assertCounter(metric, newValue, serverSource); + counterBase.put(metric, newValue); + } + + private static void recomputeMetrics() { + regionServerMetrics.getRegionServerWrapper().forceRecompute(); + } + + @Test + public void testGetWithRowCache() throws IOException { + byte[] rowKey = "row".getBytes(); + Get get = new Get(rowKey); + Result result; + + RowCacheKey rowCacheKey = new RowCacheKey(region, rowKey); + + // Initialize metrics + recomputeMetrics(); + setCounterBase("Get_num_ops", metricsHelper.getCounter("Get_num_ops", serverSource)); + setCounterBase(ROW_CACHE_HIT_COUNT, + metricsHelper.getCounter(ROW_CACHE_HIT_COUNT, serverSource)); + setCounterBase(ROW_CACHE_MISS_COUNT, + metricsHelper.getCounter(ROW_CACHE_MISS_COUNT, serverSource)); + setCounterBase(ROW_CACHE_EVICTED_ROW_COUNT, + metricsHelper.getCounter(ROW_CACHE_EVICTED_ROW_COUNT, serverSource)); + + // Put a row + Put put = new Put(rowKey); + put.addColumn(CF1, Q1, Bytes.toBytes(0L)); + put.addColumn(CF1, Q2, "12".getBytes()); + put.addColumn(CF2, Q1, "21".getBytes()); + put.addColumn(CF2, Q2, "22".getBytes()); + table.put(put); + admin.flush(tableName); + recomputeMetrics(); + assertCounterDiff(ROW_CACHE_HIT_COUNT, 0); + assertCounterDiff(ROW_CACHE_MISS_COUNT, 0); + assertCounterDiff(ROW_CACHE_EVICTED_ROW_COUNT, 0); + + // First get to populate the row cache + result = table.get(get); + recomputeMetrics(); + assertArrayEquals(rowKey, result.getRow()); + assertArrayEquals(Bytes.toBytes(0L), result.getValue(CF1, Q1)); + assertArrayEquals("12".getBytes(), result.getValue(CF1, Q2)); + assertArrayEquals("21".getBytes(), result.getValue(CF2, Q1)); + assertArrayEquals("22".getBytes(), result.getValue(CF2, Q2)); + assertCounterDiff("Get_num_ops", 1); + // Ensure the get operation from HFile without row cache + assertCounterDiff(ROW_CACHE_HIT_COUNT, 0); + assertCounterDiff(ROW_CACHE_MISS_COUNT, 1); + assertCounterDiff(ROW_CACHE_EVICTED_ROW_COUNT, 0); + + // Get from the row cache + result = table.get(get); + recomputeMetrics(); + assertArrayEquals(rowKey, result.getRow()); + assertArrayEquals(Bytes.toBytes(0L), result.getValue(CF1, Q1)); + assertArrayEquals("12".getBytes(), result.getValue(CF1, Q2)); + assertArrayEquals("21".getBytes(), result.getValue(CF2, Q1)); + assertArrayEquals("22".getBytes(), result.getValue(CF2, Q2)); + assertCounterDiff("Get_num_ops", 1); + // Ensure the get operation from the row cache + assertCounterDiff(ROW_CACHE_HIT_COUNT, 1); + assertCounterDiff(ROW_CACHE_MISS_COUNT, 0); + assertCounterDiff(ROW_CACHE_EVICTED_ROW_COUNT, 0); + + // Row cache is invalidated by the put operation + assertNotNull(rowCache.getRow(rowCacheKey)); + table.put(put); + recomputeMetrics(); + assertCounterDiff(ROW_CACHE_HIT_COUNT, 1); + assertCounterDiff(ROW_CACHE_MISS_COUNT, 0); + assertCounterDiff(ROW_CACHE_EVICTED_ROW_COUNT, 1); + + // Get is executed without the row cache; however, the cache is re-populated as a result + result = table.get(get); + recomputeMetrics(); + assertArrayEquals(rowKey, result.getRow()); + assertCounterDiff("Get_num_ops", 1); + // Ensure the get operation not from the row cache + assertCounterDiff(ROW_CACHE_HIT_COUNT, 0); + assertCounterDiff(ROW_CACHE_MISS_COUNT, 1); + assertCounterDiff(ROW_CACHE_EVICTED_ROW_COUNT, 0); + + // Get again with the row cache + result = table.get(get); + recomputeMetrics(); + assertArrayEquals(rowKey, result.getRow()); + assertCounterDiff("Get_num_ops", 1); + // Ensure the get operation from the row cache + assertCounterDiff(ROW_CACHE_HIT_COUNT, 1); + assertCounterDiff(ROW_CACHE_MISS_COUNT, 0); + assertCounterDiff(ROW_CACHE_EVICTED_ROW_COUNT, 0); + + // Row cache is invalidated by the increment operation + assertNotNull(rowCache.getRow(rowCacheKey)); + table.incrementColumnValue(rowKey, CF1, Q1, 1); + assertNull(rowCache.getRow(rowCacheKey)); + + // Get is executed without the row cache; however, the cache is re-populated as a result + table.get(get); + assertNotNull(rowCache.getRow(rowCacheKey)); + + // Row cache is invalidated by the append operation + assertNotNull(rowCache.getRow(rowCacheKey)); + Append append = new Append(rowKey); + append.addColumn(CF1, Q1, Bytes.toBytes(0L)); + table.append(append); + assertNull(rowCache.getRow(rowCacheKey)); + + // Get is executed without the row cache; however, the cache is re-populated as a result + table.get(get); + assertNotNull(rowCache.getRow(rowCacheKey)); + + // Row cache is invalidated by the delete operation + assertNotNull(rowCache.getRow(rowCacheKey)); + Delete delete = new Delete(rowKey); + delete.addColumn(CF1, Q1); + table.delete(delete); + assertNull(rowCache.getRow(rowCacheKey)); + } + + @Test(expected = DoNotRetryIOException.class) + public void testPutWithTTL() throws IOException { + // Put with TTL is not allowed on tables with row cache enabled, because cached rows cannot + // track TTL expiration + Put put = new Put("row".getBytes()); + put.addColumn(CF1, Q1, "11".getBytes()); + put.setTTL(1); + table.put(put); + } + + @Test + public void testCheckAndMutate() throws IOException { + byte[] rowKey = "row".getBytes(); + Get get = new Get(rowKey); + Result result; + CheckAndMutate cam; + CheckAndMutateResult camResult; + + RowCacheKey rowCacheKey = new RowCacheKey(region, rowKey); + + // Put a row + Put put1 = new Put(rowKey); + put1.addColumn(CF1, Q1, "11".getBytes()); + put1.addColumn(CF1, Q2, "12".getBytes()); + table.put(put1); + admin.flush(tableName); + + // Validate that the row cache is populated + result = table.get(get); + assertNotNull(rowCache.getRow(rowCacheKey)); + assertArrayEquals("11".getBytes(), result.getValue(CF1, Q1)); + assertArrayEquals("12".getBytes(), result.getValue(CF1, Q2)); + + // The row cache is not invalidated when a checkAndMutate operation fails + Put put2 = new Put(rowKey); + put2.addColumn(CF1, Q2, "1212".getBytes()); + cam = CheckAndMutate.newBuilder(rowKey).ifEquals(CF1, Q2, "00".getBytes()).build(put2); + camResult = table.checkAndMutate(cam); + assertFalse(camResult.isSuccess()); + assertNull(rowCache.getRow(rowCacheKey)); + + // Validate that the row cache is populated + result = table.get(get); + assertNotNull(rowCache.getRow(rowCacheKey)); + assertArrayEquals("11".getBytes(), result.getValue(CF1, Q1)); + assertArrayEquals("12".getBytes(), result.getValue(CF1, Q2)); + + // The row cache is invalidated by a checkAndMutate operation + cam = CheckAndMutate.newBuilder(rowKey).ifEquals(CF1, Q2, "12".getBytes()).build(put2); + camResult = table.checkAndMutate(cam); + assertTrue(camResult.isSuccess()); + assertNull(rowCache.getRow(rowCacheKey)); + } + + @Test + public void testCheckAndMutates() throws IOException { + byte[] rowKey1 = "row1".getBytes(); + byte[] rowKey2 = "row2".getBytes(); + Get get1 = new Get(rowKey1); + Get get2 = new Get(rowKey2); + Result result1, result2; + List cams; + List camResults; + + RowCacheKey rowCacheKey1 = new RowCacheKey(region, rowKey1); + RowCacheKey rowCacheKey2 = new RowCacheKey(region, rowKey2); + + // Put rows + Put put1 = new Put(rowKey1); + put1.addColumn(CF1, Q1, "111".getBytes()); + put1.addColumn(CF1, Q2, "112".getBytes()); + table.put(put1); + Put put2 = new Put(rowKey2); + put2.addColumn(CF1, Q1, "211".getBytes()); + put2.addColumn(CF1, Q2, "212".getBytes()); + table.put(put2); + admin.flush(tableName); + + // Validate that the row caches are populated + result1 = table.get(get1); + assertNotNull(rowCache.getRow(rowCacheKey1)); + assertArrayEquals("111".getBytes(), result1.getValue(CF1, Q1)); + assertArrayEquals("112".getBytes(), result1.getValue(CF1, Q2)); + result2 = table.get(get2); + assertNotNull(rowCache.getRow(rowCacheKey2)); + assertArrayEquals("211".getBytes(), result2.getValue(CF1, Q1)); + assertArrayEquals("212".getBytes(), result2.getValue(CF1, Q2)); + + // The row caches are invalidated by checkAndMutate operations + cams = new ArrayList<>(); + cams.add(CheckAndMutate.newBuilder(rowKey1).ifEquals(CF1, Q2, "112".getBytes()).build(put1)); + cams.add(CheckAndMutate.newBuilder(rowKey2).ifEquals(CF1, Q2, "212".getBytes()).build(put2)); + camResults = table.checkAndMutate(cams); + assertTrue(camResults.get(0).isSuccess()); + assertTrue(camResults.get(1).isSuccess()); + assertNull(rowCache.getRow(rowCacheKey1)); + assertNull(rowCache.getRow(rowCacheKey2)); + } + + @Test + public void testRowMutations() throws IOException { + byte[] rowKey1 = "row1".getBytes(); + byte[] rowKey2 = "row2".getBytes(); + Get get1 = new Get(rowKey1); + Get get2 = new Get(rowKey2); + Result result1, result2; + + RowCacheKey rowCacheKey1 = new RowCacheKey(region, rowKey1); + RowCacheKey rowCacheKey2 = new RowCacheKey(region, rowKey2); + + // Put rows + Put put1 = new Put(rowKey1); + put1.addColumn(CF1, Q1, "111".getBytes()); + put1.addColumn(CF1, Q2, "112".getBytes()); + table.put(put1); + Put put2 = new Put(rowKey2); + put2.addColumn(CF1, Q1, "211".getBytes()); + put2.addColumn(CF1, Q2, "212".getBytes()); + table.put(put2); + admin.flush(tableName); + + // Validate that the row caches are populated + result1 = table.get(get1); + assertNotNull(rowCache.getRow(rowCacheKey1)); + assertArrayEquals("111".getBytes(), result1.getValue(CF1, Q1)); + assertArrayEquals("112".getBytes(), result1.getValue(CF1, Q2)); + result2 = table.get(get2); + assertNotNull(rowCache.getRow(rowCacheKey1)); + assertArrayEquals("211".getBytes(), result2.getValue(CF1, Q1)); + assertArrayEquals("212".getBytes(), result2.getValue(CF1, Q2)); + + // The row caches are invalidated by batch operation + Put put12 = new Put(rowKey1); + put12.addColumn(CF1, Q1, "111111".getBytes()); + Put put13 = new Put(rowKey1); + put13.addColumn(CF1, Q2, "112112".getBytes()); + RowMutations rms = new RowMutations(rowKey1); + rms.add(put12); + rms.add(put13); + CheckAndMutate cam = + CheckAndMutate.newBuilder(rowKey1).ifEquals(CF1, Q1, "111".getBytes()).build(rms); + table.checkAndMutate(cam); + assertNull(rowCache.getRow(rowCacheKey1)); + assertNotNull(rowCache.getRow(rowCacheKey2)); + + // Validate that the row caches are populated + result1 = table.get(get1); + assertNotNull(rowCache.getRow(rowCacheKey1)); + assertArrayEquals("111111".getBytes(), result1.getValue(CF1, Q1)); + assertArrayEquals("112112".getBytes(), result1.getValue(CF1, Q2)); + result2 = table.get(get2); + assertNotNull(rowCache.getRow(rowCacheKey1)); + assertArrayEquals("211".getBytes(), result2.getValue(CF1, Q1)); + assertArrayEquals("212".getBytes(), result2.getValue(CF1, Q2)); + } + + @Test + public void testBatch() throws IOException, InterruptedException { + byte[] rowKey1 = "row1".getBytes(); + byte[] rowKey2 = "row2".getBytes(); + byte[] rowKey3 = "row3".getBytes(); + Get get1 = new Get(rowKey1); + Get get2 = new Get(rowKey2); + Get get3 = new Get(rowKey3); + List batchOperations; + Object[] results; + + RowCacheKey rowCacheKey1 = new RowCacheKey(region, rowKey1); + RowCacheKey rowCacheKey2 = new RowCacheKey(region, rowKey2); + RowCacheKey rowCacheKey3 = new RowCacheKey(region, rowKey3); + + // Put rows + batchOperations = new ArrayList<>(); + Put put1 = new Put(rowKey1); + put1.addColumn(CF1, Q1, "111".getBytes()); + put1.addColumn(CF1, Q2, "112".getBytes()); + batchOperations.add(put1); + Put put2 = new Put(rowKey2); + put2.addColumn(CF1, Q1, "211".getBytes()); + put2.addColumn(CF1, Q2, "212".getBytes()); + batchOperations.add(put2); + Put put3 = new Put(rowKey3); + put3.addColumn(CF1, Q1, "311".getBytes()); + put3.addColumn(CF1, Q2, "312".getBytes()); + batchOperations.add(put3); + results = new Result[batchOperations.size()]; + table.batch(batchOperations, results); + admin.flush(tableName); + + // Validate that the row caches are populated + batchOperations = new ArrayList<>(); + batchOperations.add(get1); + batchOperations.add(get2); + batchOperations.add(get3); + results = new Object[batchOperations.size()]; + table.batch(batchOperations, results); + assertEquals(3, results.length); + assertNotNull(rowCache.getRow(rowCacheKey1)); + assertArrayEquals("111".getBytes(), ((Result) results[0]).getValue(CF1, Q1)); + assertArrayEquals("112".getBytes(), ((Result) results[0]).getValue(CF1, Q2)); + assertNotNull(rowCache.getRow(rowCacheKey2)); + assertArrayEquals("211".getBytes(), ((Result) results[1]).getValue(CF1, Q1)); + assertArrayEquals("212".getBytes(), ((Result) results[1]).getValue(CF1, Q2)); + assertNotNull(rowCache.getRow(rowCacheKey3)); + assertArrayEquals("311".getBytes(), ((Result) results[2]).getValue(CF1, Q1)); + assertArrayEquals("312".getBytes(), ((Result) results[2]).getValue(CF1, Q2)); + + // The row caches are invalidated by batch operation + batchOperations = new ArrayList<>(); + batchOperations.add(put1); + Put put2New = new Put(rowKey2); + put2New.addColumn(CF1, Q1, "211211".getBytes()); + put2New.addColumn(CF1, Q2, "212".getBytes()); + CheckAndMutate cam = + CheckAndMutate.newBuilder(rowKey2).ifEquals(CF1, Q1, "211".getBytes()).build(put2New); + batchOperations.add(cam); + results = new Object[batchOperations.size()]; + table.batch(batchOperations, results); + assertEquals(2, results.length); + assertNull(rowCache.getRow(rowCacheKey1)); + assertNull(rowCache.getRow(rowCacheKey2)); + assertNotNull(rowCache.getRow(rowCacheKey3)); + } + + @Test + public void testGetFromMemstoreOnly() throws IOException, InterruptedException { + byte[] rowKey = "row".getBytes(); + RowCacheKey rowCacheKey = new RowCacheKey(region, rowKey); + + // Put a row into memstore only, not flushed to HFile yet + Put put = new Put(rowKey); + put.addColumn(CF1, Q1, Bytes.toBytes(0L)); + table.put(put); + + // Get from memstore only + Get get = new Get(rowKey); + table.get(get); + + // Validate that the row cache is not populated + assertNull(rowCache.getRow(rowCacheKey)); + + // Flush memstore to HFile, then get again + admin.flush(tableName); + get = new Get(rowKey); + table.get(get); + + // Validate that the row cache is populated now + assertNotNull(rowCache.getRow(rowCacheKey)); + + // Put another qualifier. And now the cells are in both memstore and HFile. + put = new Put(rowKey); + put.addColumn(CF1, Q2, Bytes.toBytes(0L)); + table.put(put); + + // Validate that the row cache is invalidated + assertNull(rowCache.getRow(rowCacheKey)); + + // Get from memstore and HFile + get = new Get(rowKey); + table.get(get); + assertNotNull(rowCache.getRow(rowCacheKey)); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheCanCacheRow.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheCanCacheRow.java new file mode 100644 index 000000000000..ea3ed188b758 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheCanCacheRow.java @@ -0,0 +1,266 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import java.io.IOException; +import java.util.function.Function; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.CompareOperator; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; +import org.apache.hadoop.hbase.client.Consistency; +import org.apache.hadoop.hbase.client.Get; +import org.apache.hadoop.hbase.client.IsolationLevel; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.client.TableDescriptorBuilder; +import org.apache.hadoop.hbase.filter.BinaryComparator; +import org.apache.hadoop.hbase.filter.RowFilter; +import org.apache.hadoop.hbase.security.visibility.Authorizations; +import org.apache.hadoop.hbase.testclassification.RegionServerTests; +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.junit.Assert; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.mockito.Mockito; + +@Category({ RegionServerTests.class, SmallTests.class }) +public class TestRowCacheCanCacheRow { + private static final byte[] CF1 = "cf1".getBytes(); + private static final byte[] CF2 = "cf2".getBytes(); + private static final byte[] ROW_KEY = "row".getBytes(); + private static final TableName TABLE_NAME = TableName.valueOf("test"); + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestRowCacheCanCacheRow.class); + + @Test + public void testRowCacheEnabledByTable() { + Region region = Mockito.mock(Region.class); + ColumnFamilyDescriptor cfd = ColumnFamilyDescriptorBuilder.newBuilder(CF1).build(); + TableDescriptor td; + + Get get = new Get(ROW_KEY); + get.addFamily(CF1); + + td = TableDescriptorBuilder.newBuilder(TABLE_NAME).setRowCacheEnabled(true).setColumnFamily(cfd) + .build(); + Mockito.when(region.getTableDescriptor()).thenReturn(td); + Configuration conf = HBaseConfiguration.create(); + conf.setFloat(HConstants.ROW_CACHE_SIZE_KEY, 0.01f); + Mockito.when(region.getReadOnlyConfiguration()).thenReturn(conf); + Mockito.when(region.isRowCacheEnabled()).thenReturn(td.getRowCacheEnabled()); + + RowCache rowCache = new RowCache(conf); + Assert.assertTrue(rowCache.canCacheRow(get, region)); + + // Disable row cache, expect false + td = TableDescriptorBuilder.newBuilder(TABLE_NAME).setColumnFamily(cfd) + .setRowCacheEnabled(false).build(); + Mockito.when(region.isRowCacheEnabled()).thenReturn(td.getRowCacheEnabled()); + Assert.assertFalse(rowCache.canCacheRow(get, region)); + } + + @Test + public void testRowCacheDisabledByConfig() { + Region region = Mockito.mock(Region.class); + Configuration conf = HBaseConfiguration.create(); + Mockito.when(region.getReadOnlyConfiguration()).thenReturn(conf); + + ColumnFamilyDescriptor cfd = ColumnFamilyDescriptorBuilder.newBuilder(CF1).build(); + TableDescriptor td; + + Get get = new Get(ROW_KEY); + get.addFamily(CF1); + + // Row cache enabled at table level, but disabled by row cache size 0, expect false + td = TableDescriptorBuilder.newBuilder(TABLE_NAME).setRowCacheEnabled(true).setColumnFamily(cfd) + .build(); + Mockito.when(region.getTableDescriptor()).thenReturn(td); + + RowCache rowCache = new RowCache(conf); + Assert.assertFalse(rowCache.canCacheRow(get, region)); + } + + @Test + public void testRetrieveAllCells() { + Region region = Mockito.mock(Region.class); + ColumnFamilyDescriptor cfd1 = ColumnFamilyDescriptorBuilder.newBuilder(CF1).build(); + ColumnFamilyDescriptor cfd2 = ColumnFamilyDescriptorBuilder.newBuilder(CF2).build(); + TableDescriptor td = TableDescriptorBuilder.newBuilder(TABLE_NAME).setRowCacheEnabled(true) + .setColumnFamily(cfd1).setColumnFamily(cfd2).build(); + Mockito.when(region.getTableDescriptor()).thenReturn(td); + Mockito.when(region.isRowCacheEnabled()).thenReturn(td.getRowCacheEnabled()); + Configuration conf = HBaseConfiguration.create(); + conf.setFloat(HConstants.ROW_CACHE_SIZE_KEY, 0.01f); + Mockito.when(region.getReadOnlyConfiguration()).thenReturn(conf); + RowCache rowCache = new RowCache(conf); + + // Not all CFs, expect false + Get get = new Get(ROW_KEY); + get.addFamily(CF1); + Assert.assertFalse(rowCache.canCacheRow(get, region)); + + // All CFs, expect true + get.addFamily(CF2); + Assert.assertTrue(rowCache.canCacheRow(get, region)); + + // Not all qualifiers, expect false + get.addColumn(CF1, "q1".getBytes()); + Assert.assertFalse(rowCache.canCacheRow(get, region)); + } + + @Test + public void testTtl() { + ColumnFamilyDescriptor cfd1; + ColumnFamilyDescriptor cfd2; + TableDescriptor td; + Region region = Mockito.mock(Region.class); + Configuration conf = HBaseConfiguration.create(); + conf.setFloat(HConstants.ROW_CACHE_SIZE_KEY, 0.01f); + Mockito.when(region.getReadOnlyConfiguration()).thenReturn(conf); + RowCache rowCache = new RowCache(conf); + + Get get = new Get(ROW_KEY); + get.addFamily(CF1); + get.addFamily(CF2); + + // Ttl is set, expect false + cfd1 = ColumnFamilyDescriptorBuilder.newBuilder(CF1).setTimeToLive(1).build(); + cfd2 = ColumnFamilyDescriptorBuilder.newBuilder(CF2).build(); + td = TableDescriptorBuilder.newBuilder(TABLE_NAME).setRowCacheEnabled(true) + .setColumnFamily(cfd1).setColumnFamily(cfd2).build(); + Mockito.when(region.getTableDescriptor()).thenReturn(td); + Mockito.when(region.isRowCacheEnabled()).thenReturn(td.getRowCacheEnabled()); + Assert.assertFalse(rowCache.canCacheRow(get, region)); + + // Ttl is not set, expect true + cfd1 = ColumnFamilyDescriptorBuilder.newBuilder(CF1).build(); + td = TableDescriptorBuilder.newBuilder(TABLE_NAME).setRowCacheEnabled(true) + .setColumnFamily(cfd1).setColumnFamily(cfd2).build(); + Mockito.when(region.getTableDescriptor()).thenReturn(td); + Mockito.when(region.isRowCacheEnabled()).thenReturn(td.getRowCacheEnabled()); + Assert.assertTrue(rowCache.canCacheRow(get, region)); + } + + @Test + public void testFilter() { + testWith( + get -> get.setFilter(new RowFilter(CompareOperator.EQUAL, new BinaryComparator(ROW_KEY)))); + } + + @Test + public void testCacheBlock() { + testWith(get -> get.setCacheBlocks(false)); + } + + @Test + public void testAttribute() { + testWith(get -> get.setAttribute("test", "value".getBytes())); + } + + @Test + public void testCheckExistenceOnly() { + testWith(get -> get.setCheckExistenceOnly(true)); + } + + @Test + public void testColumnFamilyTimeRange() { + testWith(get -> get.setColumnFamilyTimeRange(CF1, 1000, 2000)); + } + + @Test + public void testConsistency() { + testWith(get -> get.setConsistency(Consistency.TIMELINE)); + } + + @Test + public void testAuthorizations() { + testWith(get -> get.setAuthorizations(new Authorizations("foo"))); + } + + @Test + public void testId() { + testWith(get -> get.setId("test")); + } + + @Test + public void testIsolationLevel() { + testWith(get -> get.setIsolationLevel(IsolationLevel.READ_UNCOMMITTED)); + } + + @Test + public void testMaxResultsPerColumnFamily() { + testWith(get -> get.setMaxResultsPerColumnFamily(2)); + } + + @Test + public void testReplicaId() { + testWith(get -> get.setReplicaId(1)); + } + + @Test + public void testRowOffsetPerColumnFamily() { + testWith(get -> get.setRowOffsetPerColumnFamily(1)); + } + + @Test + public void testTimeRange() { + testWith(get -> { + try { + return get.setTimeRange(1, 2); + } catch (IOException e) { + throw new RuntimeException(e); + } + }); + } + + @Test + public void testTimestamp() { + testWith(get -> get.setTimestamp(1)); + } + + private static void testWith(Function func) { + Region region = Mockito.mock(Region.class); + ColumnFamilyDescriptor cfd = ColumnFamilyDescriptorBuilder.newBuilder(CF1).build(); + TableDescriptor td = TableDescriptorBuilder.newBuilder(TABLE_NAME).setRowCacheEnabled(true) + .setColumnFamily(cfd).build(); + Mockito.when(region.getTableDescriptor()).thenReturn(td); + Mockito.when(region.isRowCacheEnabled()).thenReturn(td.getRowCacheEnabled()); + + Configuration conf = HBaseConfiguration.create(); + conf.setFloat(HConstants.ROW_CACHE_SIZE_KEY, 0.01f); + Mockito.when(region.getReadOnlyConfiguration()).thenReturn(conf); + RowCache rowCache = new RowCache(conf); + + Get get = new Get(ROW_KEY); + get.addFamily(CF1); + Assert.assertTrue(rowCache.canCacheRow(get, region)); + + // noinspection unused + var unused = func.apply(get); + + // expect false + Assert.assertFalse(rowCache.canCacheRow(get, region)); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheConfiguration.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheConfiguration.java new file mode 100644 index 000000000000..02bba6fddf88 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheConfiguration.java @@ -0,0 +1,81 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + +import java.io.IOException; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseTestingUtil; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; +import org.apache.hadoop.hbase.client.TableDescriptorBuilder; +import org.apache.hadoop.hbase.testclassification.RegionServerTests; +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.junit.Test; +import org.junit.experimental.categories.Category; + +@Category({ RegionServerTests.class, SmallTests.class }) +public class TestRowCacheConfiguration { + private static final byte[] CF1 = "cf1".getBytes(); + private static final TableName TABLE_NAME = TableName.valueOf("table"); + private final static HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); + + @Test + public void testDetermineRowCacheEnabled() throws IOException { + Configuration conf = TEST_UTIL.getConfiguration(); + + HRegion region; + + // Set global config to false + conf.setBoolean(HConstants.ROW_CACHE_ENABLED_KEY, false); + + region = createRegion(null); + assertFalse(region.checkRowCacheConfig()); + + region = createRegion(false); + assertFalse(region.checkRowCacheConfig()); + + region = createRegion(true); + assertTrue(region.checkRowCacheConfig()); + + // Set global config to true + conf.setBoolean(HConstants.ROW_CACHE_ENABLED_KEY, true); + + region = createRegion(null); + assertTrue(region.checkRowCacheConfig()); + + region = createRegion(false); + assertFalse(region.checkRowCacheConfig()); + + region = createRegion(true); + assertTrue(region.checkRowCacheConfig()); + } + + private HRegion createRegion(Boolean rowCacheEnabled) throws IOException { + ColumnFamilyDescriptor cfd = ColumnFamilyDescriptorBuilder.newBuilder(CF1).build(); + TableDescriptorBuilder tdb = TableDescriptorBuilder.newBuilder(TABLE_NAME).setColumnFamily(cfd); + if (rowCacheEnabled != null) { + tdb.setRowCacheEnabled(rowCacheEnabled); + } + return TEST_UTIL.createLocalHRegion(tdb.build(), "".getBytes(), "1".getBytes()); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheEvictOnClose.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheEvictOnClose.java new file mode 100644 index 000000000000..4b3a1419f93a --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheEvictOnClose.java @@ -0,0 +1,129 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import static org.apache.hadoop.hbase.HConstants.HFILE_BLOCK_CACHE_SIZE_KEY; +import static org.apache.hadoop.hbase.HConstants.ROW_CACHE_EVICT_ON_CLOSE_KEY; +import static org.apache.hadoop.hbase.HConstants.ROW_CACHE_SIZE_KEY; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +import java.util.Arrays; +import java.util.List; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtil; +import org.apache.hadoop.hbase.SingleProcessHBaseCluster; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.*; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.testclassification.RegionServerTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.rules.TestName; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@Category({ RegionServerTests.class, MediumTests.class }) +@RunWith(Parameterized.class) +public class TestRowCacheEvictOnClose { + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestRowCacheEvictOnClose.class); + + private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); + private static final byte[] CF1 = Bytes.toBytes("cf1"); + private static final byte[] Q1 = Bytes.toBytes("q1"); + private static final byte[] Q2 = Bytes.toBytes("q2"); + + @Rule + public TestName testName = new TestName(); + + @Parameterized.Parameter + public boolean evictOnClose; + + @Parameterized.Parameters + public static List params() { + return Arrays.asList(new Object[][] { { true }, { false } }); + } + + @Test + public void testEvictOnClose() throws Exception { + Configuration conf = TEST_UTIL.getConfiguration(); + + // Enable row cache + conf.setFloat(ROW_CACHE_SIZE_KEY, 0.01f); + conf.setFloat(HFILE_BLOCK_CACHE_SIZE_KEY, 0.39f); + + // Set ROW_CACHE_EVICT_ON_CLOSE + conf.setBoolean(ROW_CACHE_EVICT_ON_CLOSE_KEY, evictOnClose); + + // Start cluster + SingleProcessHBaseCluster cluster = TEST_UTIL.startMiniCluster(); + cluster.waitForActiveAndReadyMaster(); + Admin admin = TEST_UTIL.getAdmin(); + + RowCache rowCache = cluster.getRegionServer(0).getRSRpcServices().getServer().getRowCache(); + + // Create table with row cache enabled + ColumnFamilyDescriptor cf1 = ColumnFamilyDescriptorBuilder.newBuilder(CF1).build(); + TableName tableName = TableName.valueOf(testName.getMethodName().replaceAll("[\\[\\]]", "_")); + TableDescriptor td = TableDescriptorBuilder.newBuilder(tableName).setRowCacheEnabled(true) + .setColumnFamily(cf1).build(); + admin.createTable(td); + Table table = admin.getConnection().getTable(tableName); + + int numRows = 10; + + // Put rows + for (int i = 0; i < numRows; i++) { + byte[] rowKey = ("row" + i).getBytes(); + Put put = new Put(rowKey); + put.addColumn(CF1, Q1, Bytes.toBytes(0L)); + put.addColumn(CF1, Q2, "12".getBytes()); + table.put(put); + } + // Need to flush because the row cache is not populated when reading only from the memstore. + admin.flush(tableName); + + // Populate row caches + for (int i = 0; i < numRows; i++) { + byte[] rowKey = ("row" + i).getBytes(); + Get get = new Get(rowKey); + Result result = table.get(get); + assertArrayEquals(rowKey, result.getRow()); + assertArrayEquals(Bytes.toBytes(0L), result.getValue(CF1, Q1)); + assertArrayEquals("12".getBytes(), result.getValue(CF1, Q2)); + } + + // Verify row cache has some entries + assertEquals(numRows, rowCache.getCount()); + + // Disable table + admin.disableTable(tableName); + + // Verify row cache is cleared on table close + assertEquals(evictOnClose ? 0 : numRows, rowCache.getCount()); + + admin.deleteTable(tableName); + TEST_UTIL.shutdownMiniCluster(); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheHRegion.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheHRegion.java new file mode 100644 index 000000000000..a8c59dc6ccbc --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheHRegion.java @@ -0,0 +1,97 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtil; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.RegionInfoBuilder; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.client.TableDescriptorBuilder; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.testclassification.RegionServerTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.CommonFSUtils; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.apache.hadoop.hbase.wal.WAL; +import org.apache.hadoop.hbase.wal.WALFactory; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.rules.TestName; + +@Category({ RegionServerTests.class, MediumTests.class }) +public class TestRowCacheHRegion { + + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestRowCacheHRegion.class); + + private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); + public static final byte[] CF = Bytes.toBytes("cf1"); + + @Rule + public TestName currentTest = new TestName(); + + @BeforeClass + public static void setupCluster() throws Exception { + TEST_UTIL.startMiniCluster(1); + } + + @AfterClass + public static void teardownCluster() throws Exception { + TEST_UTIL.shutdownMiniCluster(); + } + + @Test + public void testOpenHRegion() throws Exception { + Configuration conf = TEST_UTIL.getConfiguration(); + WALFactory walFactory = new WALFactory(conf, + ServerName.valueOf(currentTest.getMethodName(), 16010, EnvironmentEdgeManager.currentTime()) + .toString()); + WAL wal = walFactory.getWAL(null); + Path hbaseRootDir = CommonFSUtils.getRootDir(conf); + TableName tableName = TableName.valueOf(currentTest.getMethodName()); + RegionInfo hri = RegionInfoBuilder.newBuilder(tableName).build(); + TableDescriptor htd = TableDescriptorBuilder.newBuilder(tableName) + .setColumnFamily(ColumnFamilyDescriptorBuilder.of(CF)).build(); + HRegionServer regionServer = TEST_UTIL.getHBaseCluster().getRegionServer(0); + HRegion region = HRegion.openHRegion(conf, FileSystem.get(conf), hbaseRootDir, hri, htd, wal, + regionServer, null); + + // Verify that rowCacheSeqNum is initialized correctly + assertNotEquals(HConstants.NO_SEQNUM, region.getRowCacheSeqNum()); + assertEquals(region.getOpenSeqNum(), region.getRowCacheSeqNum()); + + region.close(); + walFactory.close(); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheWithBucketCacheAndDataBlockEncoding.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheWithBucketCacheAndDataBlockEncoding.java new file mode 100644 index 000000000000..dafbfbdf6f8a --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheWithBucketCacheAndDataBlockEncoding.java @@ -0,0 +1,154 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import static org.apache.hadoop.hbase.HConstants.HFILE_BLOCK_CACHE_SIZE_KEY; +import static org.apache.hadoop.hbase.HConstants.ROW_CACHE_SIZE_KEY; +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; + +import java.io.IOException; +import java.util.Arrays; +import java.util.List; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtil; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; +import org.apache.hadoop.hbase.client.Get; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.client.TableDescriptorBuilder; +import org.apache.hadoop.hbase.io.ByteBuffAllocator; +import org.apache.hadoop.hbase.io.encoding.DataBlockEncoding; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.After; +import org.junit.Before; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.rules.TestName; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; + +@Category(MediumTests.class) +@RunWith(Parameterized.class) +public class TestRowCacheWithBucketCacheAndDataBlockEncoding { + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestRowCacheWithBucketCacheAndDataBlockEncoding.class); + + @Parameterized.Parameter + public static boolean uesBucketCache; + + @Parameterized.Parameters + public static List params() { + return Arrays.asList(new Object[][] { { true }, { false } }); + } + + @Rule + public TestName name = new TestName(); + + private static final byte[] ROW_KEY = Bytes.toBytes("checkRow"); + private static final byte[] CF = Bytes.toBytes("CF"); + private static final byte[] QUALIFIER = Bytes.toBytes("cq"); + private static final byte[] VALUE = Bytes.toBytes("checkValue"); + private static HBaseTestingUtil testingUtil; + private static Admin admin = null; + private static RowCache rowCache; + + @Before + public void beforeClass() throws Exception { + testingUtil = new HBaseTestingUtil(); + Configuration conf = testingUtil.getConfiguration(); + + // Use bucket cache + if (uesBucketCache) { + conf.setInt(ByteBuffAllocator.MIN_ALLOCATE_SIZE_KEY, 1); + conf.set(HConstants.BUCKET_CACHE_IOENGINE_KEY, "offheap"); + conf.setInt(HConstants.BUCKET_CACHE_SIZE_KEY, 64); + } + + // Use row cache + conf.setFloat(ROW_CACHE_SIZE_KEY, 0.01f); + conf.setFloat(HFILE_BLOCK_CACHE_SIZE_KEY, 0.39f); + testingUtil.startMiniCluster(); + admin = testingUtil.getAdmin(); + + rowCache = testingUtil.getHBaseCluster().getRegionServer(0).getRowCache(); + } + + @After + public void afterClass() throws Exception { + testingUtil.shutdownMiniCluster(); + } + + @Test + public void testRowCacheNoEncode() throws Exception { + testRowCache(name.getMethodName(), DataBlockEncoding.NONE); + } + + @Test + public void testRowCacheEncode() throws Exception { + testRowCache(name.getMethodName(), DataBlockEncoding.FAST_DIFF); + } + + private void testRowCache(String methodName, DataBlockEncoding dbe) throws Exception { + TableName tableName = TableName.valueOf(methodName.replaceAll("[\\[\\]]", "_")); + try (Table testTable = createTable(tableName, dbe)) { + Put put = new Put(ROW_KEY); + put.addColumn(CF, QUALIFIER, VALUE); + testTable.put(put); + admin.flush(testTable.getName()); + + long countBase = rowCache.getCount(); + long hitCountBase = rowCache.getHitCount(); + + Result result; + + // First get should not hit the row cache, and populate it + Get get = new Get(ROW_KEY); + result = testTable.get(get); + assertArrayEquals(ROW_KEY, result.getRow()); + assertArrayEquals(VALUE, result.getValue(CF, QUALIFIER)); + assertEquals(1, rowCache.getCount() - countBase); + assertEquals(0, rowCache.getHitCount() - hitCountBase); + + // Second get should hit the row cache + result = testTable.get(get); + assertArrayEquals(ROW_KEY, result.getRow()); + assertArrayEquals(VALUE, result.getValue(CF, QUALIFIER)); + assertEquals(1, rowCache.getCount() - countBase); + assertEquals(1, rowCache.getHitCount() - hitCountBase); + } + } + + private Table createTable(TableName tableName, DataBlockEncoding dbe) throws IOException { + TableDescriptor td = TableDescriptorBuilder.newBuilder(tableName) + .setColumnFamily(ColumnFamilyDescriptorBuilder.newBuilder(CF).setBlocksize(100) + .setDataBlockEncoding(dbe).build()) + .setRowCacheEnabled(true).build(); + return testingUtil.createTable(td, null); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheWithMock.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheWithMock.java new file mode 100644 index 000000000000..bfb8530d9f64 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheWithMock.java @@ -0,0 +1,397 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.regionserver; + +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseConfiguration; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.KeyValueTestUtil; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Append; +import org.apache.hadoop.hbase.client.CheckAndMutate; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptor; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; +import org.apache.hadoop.hbase.client.Delete; +import org.apache.hadoop.hbase.client.Get; +import org.apache.hadoop.hbase.client.Increment; +import org.apache.hadoop.hbase.client.Mutation; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.RegionInfo; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.io.hfile.BlockCache; +import org.apache.hadoop.hbase.ipc.RpcCallContext; +import org.apache.hadoop.hbase.testclassification.RegionServerTests; +import org.apache.hadoop.hbase.testclassification.SmallTests; +import org.junit.ClassRule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.mockito.InOrder; +import org.mockito.Mockito; + +import org.apache.hbase.thirdparty.com.google.protobuf.ByteString; +import org.apache.hbase.thirdparty.com.google.protobuf.RpcController; +import org.apache.hbase.thirdparty.com.google.protobuf.ServiceException; + +import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos; +import org.apache.hadoop.hbase.shaded.protobuf.generated.HBaseProtos; + +@Category({ RegionServerTests.class, SmallTests.class }) +public class TestRowCacheWithMock { + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestRowCacheWithMock.class); + + @Test + public void testBarrier() throws IOException { + // Mocking dependencies to create rowCache instance + RegionInfo regionInfo = Mockito.mock(RegionInfo.class); + Mockito.when(regionInfo.getEncodedName()).thenReturn("region1"); + TableName tableName = TableName.valueOf("table1"); + Mockito.when(regionInfo.getTable()).thenReturn(tableName); + + List stores = new ArrayList<>(); + HStore hStore = Mockito.mock(HStore.class); + Mockito.when(hStore.getStorefilesCount()).thenReturn(2); + stores.add(hStore); + + ColumnFamilyDescriptor cfd = ColumnFamilyDescriptorBuilder.newBuilder("CF1".getBytes()).build(); + TableDescriptor td = Mockito.mock(TableDescriptor.class); + Mockito.when(td.getColumnFamilies()).thenReturn(new ColumnFamilyDescriptor[] { cfd }); + + byte[] rowKey = "row".getBytes(); + Get get = new Get(rowKey); + Scan scan = new Scan(get); + List results = new ArrayList<>(); + + RegionScannerImpl regionScanner = Mockito.mock(RegionScannerImpl.class); + + RpcCallContext context = Mockito.mock(RpcCallContext.class); + Mockito.when(context.getBlockBytesScanned()).thenReturn(1L); + + Configuration conf = HBaseConfiguration.create(); + conf.setFloat(HConstants.ROW_CACHE_SIZE_KEY, 0.01f); + + RowCache rowCache = new RowCache(conf); + + HRegion region = Mockito.mock(HRegion.class); + Mockito.doCallRealMethod().when(region).setRowCache(Mockito.any()); + region.setRowCache(rowCache); + Mockito.when(region.getRegionInfo()).thenReturn(regionInfo); + Mockito.when(region.getTableDescriptor()).thenReturn(td); + Mockito.when(region.getStores()).thenReturn(stores); + Mockito.when(region.getScanner(scan)).thenReturn(regionScanner); + Mockito.when(region.getReadOnlyConfiguration()).thenReturn(conf); + Mockito.when(region.isRowCacheEnabled()).thenReturn(true); + Mockito.when(region.getScannerWithResults(get, scan, results, context)).thenCallRealMethod(); + + RowCacheKey key = new RowCacheKey(region, rowKey); + results.add(KeyValueTestUtil.create("row", "CF", "q1", 1, "v1")); + + // Verify that row cache populated before creating a row level barrier + region.getScannerWithResults(get, scan, results, context); + assertNotNull(rowCache.getRow(key)); + assertNull(rowCache.getRowLevelBarrier(key)); + + // Evict the row cache + rowCache.evictRow(key); + assertNull(rowCache.getRow(key)); + + // Create a row level barrier for the row key + rowCache.createRowLevelBarrier(key); + assertEquals(1, rowCache.getRowLevelBarrier(key).get()); + + // Verify that no row cache populated after creating a row level barrier + region.getScannerWithResults(get, scan, results, context); + assertNull(rowCache.getRow(key)); + + // Remove the row level barrier + rowCache.removeRowLevelBarrier(key); + assertNull(rowCache.getRowLevelBarrier(key)); + + // Verify that row cache populated before creating a table level barrier + region.getScannerWithResults(get, scan, results, context); + assertNotNull(rowCache.getRow(key)); + assertNull(rowCache.getRegionLevelBarrier(region)); + + // Evict the row cache + rowCache.evictRow(key); + assertNull(rowCache.getRow(key)); + + // Create a table level barrier for the row key + rowCache.createRegionLevelBarrier(region); + assertEquals(1, rowCache.getRegionLevelBarrier(region).get()); + + // Verify that no row cache populated after creating a table level barrier + region.getScannerWithResults(get, scan, results, context); + assertNull(rowCache.getRow(key)); + + // Remove the table level barrier + rowCache.removeTableLevelBarrier(region); + assertNull(rowCache.getRegionLevelBarrier(region)); + } + + @Test + public void testMutate() throws IOException, ServiceException { + // Mocking RowCache and its dependencies + TableDescriptor tableDescriptor = Mockito.mock(TableDescriptor.class); + + RegionInfo regionInfo = Mockito.mock(RegionInfo.class); + Mockito.when(regionInfo.getEncodedName()).thenReturn("region1"); + + RowCache rowCache = Mockito.mock(RowCache.class); + + RegionServerServices rss = Mockito.mock(RegionServerServices.class); + Mockito.when(rss.getRowCache()).thenReturn(rowCache); + + HRegion region = Mockito.mock(HRegion.class); + Mockito.doCallRealMethod().when(region).setRowCache(Mockito.any()); + region.setRowCache(rowCache); + Mockito.when(region.getTableDescriptor()).thenReturn(tableDescriptor); + Mockito.when(region.getRegionInfo()).thenReturn(regionInfo); + Mockito.when(region.getBlockCache()).thenReturn(Mockito.mock(BlockCache.class)); + Mockito.when(region.isRowCacheEnabled()).thenReturn(true); + Mockito.when(region.getRegionServerServices()).thenReturn(rss); + + RSRpcServices rsRpcServices = Mockito.mock(RSRpcServices.class); + Mockito.when(rsRpcServices.getRegion(Mockito.any())).thenReturn(region); + + RpcController rpcController = Mockito.mock(RpcController.class); + + CheckAndMutate checkAndMutate = CheckAndMutate.newBuilder("row".getBytes()) + .ifEquals("CF".getBytes(), "q1".getBytes(), "v1".getBytes()).build(new Put("row".getBytes())); + + Put put1 = new Put("row1".getBytes()); + put1.addColumn("CF".getBytes(), "q1".getBytes(), "v1".getBytes()); + Put put2 = new Put("row1".getBytes()); + put2.addColumn("CF".getBytes(), "q1".getBytes(), "v1".getBytes()); + List mutations = new ArrayList<>(); + mutations.add(put1); + mutations.add(put2); + + Delete del = new Delete("row1".getBytes()); + Append append = new Append("row1".getBytes()); + append.addColumn("CF".getBytes(), "q1".getBytes(), "v1".getBytes()); + Increment increment = new Increment("row1".getBytes()); + increment.addColumn("CF".getBytes(), "q1".getBytes(), 1L); + + Mutation[] mutationArray = new Mutation[mutations.size()]; + mutations.toArray(mutationArray); + + // rowCache.mutateWithRowCacheBarrier must run real code so internal calls are recorded + Mockito.doCallRealMethod().when(rowCache).mutateWithRowCacheBarrier(Mockito.any(HRegion.class), + Mockito.any(byte[].class), Mockito.any()); + Mockito.doCallRealMethod().when(rowCache).mutateWithRowCacheBarrier(Mockito.any(HRegion.class), + Mockito.anyList(), Mockito.any()); + + InOrder inOrder; + + // Put + Mockito.doAnswer(invocation -> { + Put arg = invocation.getArgument(0); + rowCache.mutateWithRowCacheBarrier(region, arg.getRow(), () -> null); + return null; + }).when(region).put(put1); + Mockito.clearInvocations(rowCache); + inOrder = Mockito.inOrder(rowCache); + region.put(put1); + // Verify the sequence of method calls + inOrder.verify(rowCache, Mockito.times(1)).createRowLevelBarrier(Mockito.any()); + inOrder.verify(rowCache, Mockito.times(1)).evictRow(Mockito.any()); + inOrder.verify(rowCache, Mockito.times(1)).execute(Mockito.any()); + inOrder.verify(rowCache, Mockito.times(1)).removeRowLevelBarrier(Mockito.any()); + + // Delete + Mockito.doAnswer(invocation -> { + Delete arg = invocation.getArgument(0); + rowCache.mutateWithRowCacheBarrier(region, arg.getRow(), () -> null); + return null; + }).when(region).delete(del); + inOrder = Mockito.inOrder(rowCache); + Mockito.clearInvocations(rowCache); + region.delete(del); + // Verify the sequence of method calls + inOrder.verify(rowCache, Mockito.times(1)).createRowLevelBarrier(Mockito.any()); + inOrder.verify(rowCache, Mockito.times(1)).evictRow(Mockito.any()); + inOrder.verify(rowCache, Mockito.times(1)).execute(Mockito.any()); + inOrder.verify(rowCache, Mockito.times(1)).removeRowLevelBarrier(Mockito.any()); + + // Append + Mockito.doAnswer(invocation -> { + Append arg = invocation.getArgument(0); + rowCache.mutateWithRowCacheBarrier(region, arg.getRow(), () -> null); + return null; + }).when(region).append(append); + inOrder = Mockito.inOrder(rowCache); + Mockito.clearInvocations(rowCache); + region.append(append); + // Verify the sequence of method calls + inOrder.verify(rowCache, Mockito.times(1)).createRowLevelBarrier(Mockito.any()); + inOrder.verify(rowCache, Mockito.times(1)).evictRow(Mockito.any()); + inOrder.verify(rowCache, Mockito.times(1)).execute(Mockito.any()); + inOrder.verify(rowCache, Mockito.times(1)).removeRowLevelBarrier(Mockito.any()); + + // Increment + Mockito.doAnswer(invocation -> { + Increment arg = invocation.getArgument(0); + rowCache.mutateWithRowCacheBarrier(region, arg.getRow(), () -> null); + return null; + }).when(region).increment(increment); + inOrder = Mockito.inOrder(rowCache); + Mockito.clearInvocations(rowCache); + region.increment(increment); + // Verify the sequence of method calls + inOrder.verify(rowCache, Mockito.times(1)).createRowLevelBarrier(Mockito.any()); + inOrder.verify(rowCache, Mockito.times(1)).evictRow(Mockito.any()); + inOrder.verify(rowCache, Mockito.times(1)).execute(Mockito.any()); + inOrder.verify(rowCache, Mockito.times(1)).removeRowLevelBarrier(Mockito.any()); + + // CheckAndMutate + Mockito.doAnswer(invocation -> { + CheckAndMutate c = invocation.getArgument(0); + rowCache.mutateWithRowCacheBarrier(region, c.getRow(), () -> null); + return null; + }).when(region).checkAndMutate(Mockito.any(CheckAndMutate.class), Mockito.anyLong(), + Mockito.anyLong()); + Mockito.clearInvocations(rowCache); + inOrder = Mockito.inOrder(rowCache); + region.checkAndMutate(checkAndMutate, 0, 0); + // Verify the sequence of method calls + inOrder.verify(rowCache, Mockito.times(1)).createRowLevelBarrier(Mockito.any()); + inOrder.verify(rowCache, Mockito.times(1)).evictRow(Mockito.any()); + inOrder.verify(rowCache, Mockito.times(1)).execute(Mockito.any()); + inOrder.verify(rowCache, Mockito.times(1)).removeRowLevelBarrier(Mockito.any()); + + // RowMutations + Mockito.doAnswer(invocation -> { + List muts = invocation.getArgument(0); + rowCache.mutateWithRowCacheBarrier(region, muts, () -> null); + return null; + }).when(region).checkAndMutate(Mockito.anyList(), Mockito.any(CheckAndMutate.class), + Mockito.anyLong(), Mockito.anyLong()); + Mockito.clearInvocations(rowCache); + inOrder = Mockito.inOrder(rowCache); + region.checkAndMutate(mutations, checkAndMutate, 0, 0); + // Verify the sequence of method calls + inOrder.verify(rowCache, Mockito.times(1)).createRowLevelBarrier(Mockito.any()); + inOrder.verify(rowCache, Mockito.times(1)).evictRow(Mockito.any()); + inOrder.verify(rowCache, Mockito.times(1)).execute(Mockito.any()); + inOrder.verify(rowCache, Mockito.times(1)).removeRowLevelBarrier(Mockito.any()); + + // Batch + Mockito.doAnswer(invocation -> { + Mutation[] muts = invocation.getArgument(0); + rowCache.mutateWithRowCacheBarrier(region, Arrays.asList(muts), () -> null); + return null; + }).when(region).batchMutate(Mockito.any(Mutation[].class), Mockito.anyBoolean(), + Mockito.anyLong(), Mockito.anyLong()); + Mockito.clearInvocations(rowCache); + inOrder = Mockito.inOrder(rowCache); + region.batchMutate(mutationArray, true, 0, 0); + // Verify the sequence of method calls + inOrder.verify(rowCache, Mockito.times(1)).createRowLevelBarrier(Mockito.any()); + inOrder.verify(rowCache, Mockito.times(1)).evictRow(Mockito.any()); + inOrder.verify(rowCache, Mockito.times(1)).execute(Mockito.any()); + inOrder.verify(rowCache, Mockito.times(1)).removeRowLevelBarrier(Mockito.any()); + + // Bulkload + HBaseProtos.RegionSpecifier regionSpecifier = HBaseProtos.RegionSpecifier.newBuilder() + .setType(HBaseProtos.RegionSpecifier.RegionSpecifierType.REGION_NAME) + .setValue(ByteString.copyFrom("region".getBytes())).build(); + ClientProtos.BulkLoadHFileRequest bulkLoadRequest = + ClientProtos.BulkLoadHFileRequest.newBuilder().setRegion(regionSpecifier).build(); + Mockito.doCallRealMethod().when(rsRpcServices).bulkLoadHFile(rpcController, bulkLoadRequest); + Mockito.clearInvocations(rowCache); + inOrder = Mockito.inOrder(rowCache); + rsRpcServices.bulkLoadHFile(rpcController, bulkLoadRequest); + // Verify the sequence of method calls + inOrder.verify(rowCache, Mockito.times(1)).createRegionLevelBarrier(Mockito.any()); + inOrder.verify(rowCache, Mockito.times(1)).increaseRowCacheSeqNum(Mockito.any()); + inOrder.verify(rowCache, Mockito.times(1)).removeTableLevelBarrier(Mockito.any()); + } + + @Test + public void testCaching() throws IOException { + // Mocking dependencies to create RowCache instance + RegionInfo regionInfo = Mockito.mock(RegionInfo.class); + Mockito.when(regionInfo.getEncodedName()).thenReturn("region1"); + TableName tableName = TableName.valueOf("table1"); + Mockito.when(regionInfo.getTable()).thenReturn(tableName); + + List stores = new ArrayList<>(); + HStore hStore = Mockito.mock(HStore.class); + Mockito.when(hStore.getStorefilesCount()).thenReturn(2); + stores.add(hStore); + + ColumnFamilyDescriptor cfd = ColumnFamilyDescriptorBuilder.newBuilder("CF1".getBytes()).build(); + TableDescriptor td = Mockito.mock(TableDescriptor.class); + Mockito.when(td.getColumnFamilies()).thenReturn(new ColumnFamilyDescriptor[] { cfd }); + + RpcCallContext context = Mockito.mock(RpcCallContext.class); + Mockito.when(context.getBlockBytesScanned()).thenReturn(1L); + + byte[] rowKey = "row".getBytes(); + RegionScannerImpl regionScanner = Mockito.mock(RegionScannerImpl.class); + + Get get = new Get(rowKey); + Scan scan = new Scan(get); + + Configuration conf = HBaseConfiguration.create(); + conf.setFloat(HConstants.ROW_CACHE_SIZE_KEY, 0.01f); + RowCache rowCache = new RowCache(conf); + + HRegion region = Mockito.mock(HRegion.class); + Mockito.doCallRealMethod().when(region).setRowCache(Mockito.any()); + region.setRowCache(rowCache); + Mockito.when(region.getRegionInfo()).thenReturn(regionInfo); + Mockito.when(region.getTableDescriptor()).thenReturn(td); + Mockito.when(region.getStores()).thenReturn(stores); + Mockito.when(region.getScanner(scan)).thenReturn(regionScanner); + Mockito.when(region.getReadOnlyConfiguration()).thenReturn(conf); + Mockito.when(region.isRowCacheEnabled()).thenReturn(true); + Mockito.when(region.getScannerWithResults(Mockito.any(Get.class), Mockito.any(Scan.class), + Mockito.anyList(), Mockito.any())).thenCallRealMethod(); + + RowCacheKey key = new RowCacheKey(region, rowKey); + List results = new ArrayList<>(); + results.add(KeyValueTestUtil.create("row", "CF", "q1", 1, "v1")); + + // Verify that row cache populated with caching=false + // This should be called first not to populate the row cache + get.setCacheBlocks(false); + region.getScannerWithResults(get, scan, results, context); + assertNull(rowCache.getRow(key)); + assertNull(rowCache.getRow(key)); + + // Verify that row cache populated with caching=true + get.setCacheBlocks(true); + region.getScannerWithResults(get, scan, results, context); + assertNotNull(rowCache.getRow(key, true)); + assertNull(rowCache.getRow(key, false)); + } +} diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestRowCacheBulkLoadHFiles.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestRowCacheBulkLoadHFiles.java new file mode 100644 index 000000000000..c5a62935e5e6 --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/tool/TestRowCacheBulkLoadHFiles.java @@ -0,0 +1,199 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.tool; + +import static org.apache.hadoop.hbase.HConstants.HFILE_BLOCK_CACHE_SIZE_KEY; +import static org.apache.hadoop.hbase.HConstants.ROW_CACHE_SIZE_KEY; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertNotEquals; +import static org.junit.Assert.assertNotNull; +import static org.junit.Assert.assertNull; + +import java.io.IOException; +import java.util.Comparator; +import java.util.stream.IntStream; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HBaseClassTestRule; +import org.apache.hadoop.hbase.HBaseTestingUtil; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.Admin; +import org.apache.hadoop.hbase.client.ColumnFamilyDescriptorBuilder; +import org.apache.hadoop.hbase.client.Get; +import org.apache.hadoop.hbase.client.Put; +import org.apache.hadoop.hbase.client.Result; +import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.client.TableDescriptor; +import org.apache.hadoop.hbase.client.TableDescriptorBuilder; +import org.apache.hadoop.hbase.regionserver.HRegion; +import org.apache.hadoop.hbase.regionserver.RowCache; +import org.apache.hadoop.hbase.regionserver.RowCacheKey; +import org.apache.hadoop.hbase.regionserver.TestHRegionServerBulkLoad; +import org.apache.hadoop.hbase.testclassification.MediumTests; +import org.apache.hadoop.hbase.testclassification.MiscTests; +import org.apache.hadoop.hbase.util.Bytes; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.ClassRule; +import org.junit.Rule; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.rules.TestName; + +@Category({ MiscTests.class, MediumTests.class }) +public class TestRowCacheBulkLoadHFiles { + @ClassRule + public static final HBaseClassTestRule CLASS_RULE = + HBaseClassTestRule.forClass(TestRowCacheBulkLoadHFiles.class); + + private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); + private static Admin admin; + + final static int NUM_CFS = 2; + final static byte[] QUAL = Bytes.toBytes("qual"); + final static int ROWCOUNT = 10; + + private TableName tableName; + private Table table; + private HRegion[] regions; + + @Rule + public TestName testName = new TestName(); + + static String family(int i) { + return String.format("family_%04d", i); + } + + public static void buildHFiles(FileSystem fs, Path dir) throws IOException { + byte[] val = "value".getBytes(); + for (int i = 0; i < NUM_CFS; i++) { + Path testIn = new Path(dir, family(i)); + + TestHRegionServerBulkLoad.createHFile(fs, new Path(testIn, "hfile_" + i), + Bytes.toBytes(family(i)), QUAL, val, ROWCOUNT); + } + } + + private TableDescriptor createTableDesc(TableName name) { + TableDescriptorBuilder builder = + TableDescriptorBuilder.newBuilder(name).setRowCacheEnabled(true); + IntStream.range(0, NUM_CFS).mapToObj(i -> ColumnFamilyDescriptorBuilder.of(family(i))) + .forEachOrdered(builder::setColumnFamily); + return builder.build(); + } + + private Path buildBulkFiles(TableName table) throws Exception { + Path dir = TEST_UTIL.getDataTestDirOnTestFS(table.getNameAsString()); + Path bulk1 = new Path(dir, table.getNameAsString()); + FileSystem fs = TEST_UTIL.getTestFileSystem(); + buildHFiles(fs, bulk1); + return bulk1; + } + + @BeforeClass + public static void setupCluster() throws Exception { + Configuration conf = TEST_UTIL.getConfiguration(); + + // Enable row cache but reduce the block cache size to fit in 80% of the heap + conf.setFloat(ROW_CACHE_SIZE_KEY, 0.01f); + conf.setFloat(HFILE_BLOCK_CACHE_SIZE_KEY, 0.39f); + + TEST_UTIL.startMiniCluster(1); + admin = TEST_UTIL.getAdmin(); + } + + @AfterClass + public static void teardownCluster() throws Exception { + TEST_UTIL.shutdownMiniCluster(); + } + + @Before + public void before() throws Exception { + tableName = TableName.valueOf(testName.getMethodName()); + // Split the table into 2 regions + byte[][] splitKeys = new byte[][] { TestHRegionServerBulkLoad.rowkey(ROWCOUNT) }; + admin.createTable(createTableDesc(tableName), splitKeys); + table = TEST_UTIL.getConnection().getTable(tableName); + // Sorted by region name + regions = TEST_UTIL.getRSForFirstRegionInTable(tableName).getRegions().stream() + .filter(r -> r.getRegionInfo().getTable().equals(tableName)) + .sorted(Comparator.comparing(r -> r.getRegionInfo().getRegionNameAsString())) + .toArray(HRegion[]::new); + } + + @After + public void after() throws Exception { + if (admin.tableExists(tableName)) { + admin.disableTable(tableName); + admin.deleteTable(tableName); + } + } + + @Test + public void testRowCache() throws Exception { + RowCache rowCache = + TEST_UTIL.getHBaseCluster().getRegionServer(0).getRSRpcServices().getServer().getRowCache(); + + // The region to be bulk-loaded + byte[] rowKeyRegion0 = TestHRegionServerBulkLoad.rowkey(0); + // The region not to be bulk-loaded + byte[] rowKeyRegion1 = TestHRegionServerBulkLoad.rowkey(ROWCOUNT); + + // Put a row into each region to populate the row cache + Put put0 = new Put(rowKeyRegion0); + put0.addColumn(family(0).getBytes(), "q1".getBytes(), "value".getBytes()); + table.put(put0); + Put put1 = new Put(rowKeyRegion1); + put1.addColumn(family(0).getBytes(), "q1".getBytes(), "value".getBytes()); + table.put(put1); + admin.flush(tableName); + + // Ensure each region has a row cache + Get get0 = new Get(rowKeyRegion0); + Result result0 = table.get(get0); + assertNotNull(result0); + RowCacheKey keyPrev0 = new RowCacheKey(regions[0], get0.getRow()); + assertNotNull(rowCache.getRow(keyPrev0)); + Get get1 = new Get(rowKeyRegion1); + Result result1 = table.get(get1); + assertNotNull(result1); + RowCacheKey keyPrev1 = new RowCacheKey(regions[1], get1.getRow()); + assertNotNull(rowCache.getRow(keyPrev1)); + + // Do bulkload to region0 only + Configuration conf = new Configuration(TEST_UTIL.getConfiguration()); + BulkLoadHFilesTool loader = new BulkLoadHFilesTool(conf); + Path dir = buildBulkFiles(tableName); + loader.bulkLoad(tableName, dir); + + // Ensure the row cache is removed after bulkload for region0 + RowCacheKey keyCur0 = new RowCacheKey(regions[0], get0.getRow()); + assertNotEquals(keyPrev0, keyCur0); + assertNull(rowCache.getRow(keyCur0)); + // Ensure the row cache for keyPrev0 still exists, but it is not used anymore. + assertNotNull(rowCache.getRow(keyPrev0)); + + // Ensure the row cache for region1 is not affected + RowCacheKey keyCur1 = new RowCacheKey(regions[1], get1.getRow()); + assertEquals(keyPrev1, keyCur1); + assertNotNull(rowCache.getRow(keyCur1)); + } +} From cd93c2fb0f4e8d0002cf3bbe77d4b28bf55136ca Mon Sep 17 00:00:00 2001 From: "terence.yoo" Date: Mon, 6 Apr 2026 09:11:22 +0900 Subject: [PATCH 02/10] Add GitHub Actions CI workflow files from master --- .github/workflows/yetus-general-check.yml | 138 ++++++++++++++++++ .../yetus-jdk17-hadoop3-compile-check.yml | 107 ++++++++++++++ .../yetus-jdk17-hadoop3-unit-check.yml | 129 ++++++++++++++++ 3 files changed, 374 insertions(+) create mode 100644 .github/workflows/yetus-general-check.yml create mode 100644 .github/workflows/yetus-jdk17-hadoop3-compile-check.yml create mode 100644 .github/workflows/yetus-jdk17-hadoop3-unit-check.yml diff --git a/.github/workflows/yetus-general-check.yml b/.github/workflows/yetus-general-check.yml new file mode 100644 index 000000000000..79397d6a905f --- /dev/null +++ b/.github/workflows/yetus-general-check.yml @@ -0,0 +1,138 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# yamllint disable rule:line-length +--- +name: Yetus General Check + +"on": + pull_request: + types: [opened, synchronize, reopened] + +permissions: {} + +jobs: + general-check: + runs-on: ubuntu-latest + timeout-minutes: 600 + permissions: + contents: read + statuses: write + + env: + YETUS_VERSION: '0.15.0' + + steps: + - name: Checkout HBase + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + path: src + fetch-depth: 0 + persist-credentials: false + + - name: Set up JDK 17 + uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5.2.0 + with: + java-version: '17' + distribution: 'temurin' + + - name: Maven cache + uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3 + with: + path: ~/.m2 + key: hbase-m2-${{ hashFiles('**/pom.xml') }} + restore-keys: | + hbase-m2- + + - name: Download Yetus + run: | + mkdir -p yetus + cd yetus + bash "${{ github.workspace }}/src/dev-support/jenkins-scripts/cache-apache-project-artifact.sh" \ + --keys 'https://downloads.apache.org/yetus/KEYS' \ + --verify-tar-gz \ + ./apache-yetus-${{ env.YETUS_VERSION }}-bin.tar.gz \ + yetus/${{ env.YETUS_VERSION }}/apache-yetus-${{ env.YETUS_VERSION }}-bin.tar.gz + tar --strip-components=1 -xzf apache-yetus-${{ env.YETUS_VERSION }}-bin.tar.gz + rm apache-yetus-${{ env.YETUS_VERSION }}-bin.tar.gz + + - name: Run Yetus General Check + env: + ARCHIVE_PATTERN_LIST: "TEST-*.xml,org.apache.h*.txt,*.dumpstream,*.dump" + DOCKERFILE: "${{ github.workspace }}/src/dev-support/docker/Dockerfile" + GITHUB_PASSWORD: ${{ secrets.GITHUB_TOKEN }} + GITHUB_USER: ${{ github.actor }} + PATCHDIR: "${{ github.workspace }}/yetus-general-check/output" + PLUGINS: "all,-javadoc,-jira,-shadedjars,-unit" + SET_JAVA_HOME: "/usr/lib/jvm/java-17" + SOURCEDIR: "${{ github.workspace }}/src" + TESTS_FILTER: "checkstyle,javac,pylint,shellcheck,shelldocs,blanks,perlcritic,ruby-lint,rubocop" + YETUSDIR: "${{ github.workspace }}/yetus" + AUTHOR_IGNORE_LIST: "src/main/asciidoc/_chapters/developer.adoc" + BLANKS_EOL_IGNORE_FILE: "dev-support/blanks-eol-ignore.txt" + BLANKS_TABS_IGNORE_FILE: "dev-support/blanks-tabs-ignore.txt" + EXCLUDE_TESTS_URL: "https://ci-hbase.apache.org/job/HBase-Find-Flaky-Tests/job/${{ github.base_ref }}/lastSuccessfulBuild/artifact/output/excludes" + BUILD_THREAD: "4" + SUREFIRE_FIRST_PART_FORK_COUNT: "1.0C" + SUREFIRE_SECOND_PART_FORK_COUNT: "0.5C" + BRANCH_NAME: "${{ github.base_ref }}" + DEBUG: 'true' + run: | + cd "${{ github.workspace }}" + bash src/dev-support/jenkins_precommit_github_yetus.sh + + - name: Publish Job Summary + if: always() + run: | + cd "${{ github.workspace }}" + python3 src/dev-support/yetus_console_to_md.py yetus-general-check/output >> $GITHUB_STEP_SUMMARY + + - name: Publish Test Results + if: always() + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 + with: + name: yetus-general-check-output + path: ${{ github.workspace }}/yetus-general-check/output + retention-days: 7 + + zizmor: + runs-on: ubuntu-latest + timeout-minutes: 5 + permissions: + contents: read + + steps: + - name: Check for workflow changes + id: changes + env: + GH_TOKEN: ${{ github.token }} + run: | + if gh pr diff "${{ github.event.pull_request.number }}" --repo "${{ github.repository }}" --name-only | grep -q '^\.github/workflows/'; then + echo "changed=true" >> "$GITHUB_OUTPUT" + else + echo "changed=false" >> "$GITHUB_OUTPUT" + fi + + - name: Checkout HBase + if: steps.changes.outputs.changed == 'true' + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + persist-credentials: false + + - name: Run zizmor + if: steps.changes.outputs.changed == 'true' + run: pipx run zizmor --min-severity=medium .github/workflows/ diff --git a/.github/workflows/yetus-jdk17-hadoop3-compile-check.yml b/.github/workflows/yetus-jdk17-hadoop3-compile-check.yml new file mode 100644 index 000000000000..46d3a973d017 --- /dev/null +++ b/.github/workflows/yetus-jdk17-hadoop3-compile-check.yml @@ -0,0 +1,107 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# yamllint disable rule:line-length +--- +name: Yetus JDK17 Hadoop3 Compile Check + +"on": + pull_request: + types: [opened, synchronize, reopened] + +permissions: + contents: read + statuses: write + +jobs: + jdk17-hadoop3-compile-check: + runs-on: ubuntu-latest + timeout-minutes: 60 + + env: + YETUS_VERSION: '0.15.0' + + steps: + - name: Checkout HBase + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + path: src + fetch-depth: 0 + persist-credentials: false + + - name: Set up JDK 17 + uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5.2.0 + with: + java-version: '17' + distribution: 'temurin' + + - name: Maven cache + uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3 + with: + path: ~/.m2 + key: hbase-m2-${{ hashFiles('**/pom.xml') }} + restore-keys: | + hbase-m2- + + - name: Download Yetus + run: | + mkdir -p yetus + cd yetus + bash "${{ github.workspace }}/src/dev-support/jenkins-scripts/cache-apache-project-artifact.sh" \ + --keys 'https://downloads.apache.org/yetus/KEYS' \ + --verify-tar-gz \ + ./apache-yetus-${{ env.YETUS_VERSION }}-bin.tar.gz \ + yetus/${{ env.YETUS_VERSION }}/apache-yetus-${{ env.YETUS_VERSION }}-bin.tar.gz + tar --strip-components=1 -xzf apache-yetus-${{ env.YETUS_VERSION }}-bin.tar.gz + rm apache-yetus-${{ env.YETUS_VERSION }}-bin.tar.gz + + - name: Run Yetus JDK17 Hadoop3 Compile Check + env: + ARCHIVE_PATTERN_LIST: "TEST-*.xml,org.apache.h*.txt,*.dumpstream,*.dump" + DOCKERFILE: "${{ github.workspace }}/src/dev-support/docker/Dockerfile" + GITHUB_PASSWORD: ${{ secrets.GITHUB_TOKEN }} + GITHUB_USER: ${{ github.actor }} + PATCHDIR: "${{ github.workspace }}/yetus-jdk17-hadoop3-compile-check/output" + PLUGINS: "compile,github,htmlout,javac,javadoc,maven,mvninstall,shadedjars" + SET_JAVA_HOME: "/usr/lib/jvm/java-17" + SOURCEDIR: "${{ github.workspace }}/src" + TESTS_FILTER: "javac,javadoc" + YETUSDIR: "${{ github.workspace }}/yetus" + AUTHOR_IGNORE_LIST: "src/main/asciidoc/_chapters/developer.adoc" + BLANKS_EOL_IGNORE_FILE: "dev-support/blanks-eol-ignore.txt" + BLANKS_TABS_IGNORE_FILE: "dev-support/blanks-tabs-ignore.txt" + BUILD_THREAD: "4" + BRANCH_NAME: "${{ github.base_ref }}" + SKIP_ERRORPRONE: 'true' + DEBUG: 'true' + run: | + cd "${{ github.workspace }}" + bash src/dev-support/jenkins_precommit_github_yetus.sh + + - name: Publish Job Summary + if: always() + run: | + cd "${{ github.workspace }}" + python3 src/dev-support/yetus_console_to_md.py yetus-jdk17-hadoop3-compile-check/output >> $GITHUB_STEP_SUMMARY + + - name: Publish Results + if: always() + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 + with: + name: yetus-jdk17-hadoop3-compile-check-output + path: ${{ github.workspace }}/yetus-jdk17-hadoop3-compile-check/output + retention-days: 7 diff --git a/.github/workflows/yetus-jdk17-hadoop3-unit-check.yml b/.github/workflows/yetus-jdk17-hadoop3-unit-check.yml new file mode 100644 index 000000000000..93ae44c247a3 --- /dev/null +++ b/.github/workflows/yetus-jdk17-hadoop3-unit-check.yml @@ -0,0 +1,129 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# yamllint disable rule:line-length +--- +name: Yetus JDK17 Hadoop3 Unit Check + +"on": + pull_request: + types: [opened, synchronize, reopened] + +permissions: + contents: read + statuses: write + +jobs: + jdk17-hadoop3-unit-check: + runs-on: ubuntu-latest + timeout-minutes: 360 + + strategy: + fail-fast: false + matrix: + include: + - name: "small" + test_profile: "runSmallTests" + - name: "medium" + test_profile: "runMediumTests" + # Large tests split alphabetically by class name (after "Test" prefix) + # Wave 1: Test[A-H]*, Wave 2: Test[I-R]*, Wave 3: Test[S-Z]* + - name: "large-wave-1" + test_profile: "runLargeTests-wave1" + - name: "large-wave-2" + test_profile: "runLargeTests-wave2" + - name: "large-wave-3" + test_profile: "runLargeTests-wave3" + + name: ${{ matrix.name }} + + env: + YETUS_VERSION: '0.15.0' + + steps: + - name: Checkout HBase + uses: actions/checkout@de0fac2e4500dabe0009e67214ff5f5447ce83dd # v6.0.2 + with: + path: src + fetch-depth: 0 + persist-credentials: false + + - name: Set up JDK 17 + uses: actions/setup-java@be666c2fcd27ec809703dec50e508c2fdc7f6654 # v5.2.0 + with: + java-version: '17' + distribution: 'temurin' + + - name: Maven cache + uses: actions/cache@cdf6c1fa76f9f475f3d7449005a359c84ca0f306 # v5.0.3 + with: + path: ~/.m2 + key: hbase-m2-${{ hashFiles('**/pom.xml') }} + restore-keys: | + hbase-m2- + + - name: Download Yetus + run: | + mkdir -p yetus + cd yetus + bash "${{ github.workspace }}/src/dev-support/jenkins-scripts/cache-apache-project-artifact.sh" \ + --keys 'https://downloads.apache.org/yetus/KEYS' \ + --verify-tar-gz \ + ./apache-yetus-${{ env.YETUS_VERSION }}-bin.tar.gz \ + yetus/${{ env.YETUS_VERSION }}/apache-yetus-${{ env.YETUS_VERSION }}-bin.tar.gz + tar --strip-components=1 -xzf apache-yetus-${{ env.YETUS_VERSION }}-bin.tar.gz + rm apache-yetus-${{ env.YETUS_VERSION }}-bin.tar.gz + + - name: Run Yetus JDK17 Hadoop3 Unit Check + env: + ARCHIVE_PATTERN_LIST: "TEST-*.xml,org.apache.h*.txt,*.dumpstream,*.dump" + DOCKERFILE: "${{ github.workspace }}/src/dev-support/docker/Dockerfile" + GITHUB_PASSWORD: ${{ secrets.GITHUB_TOKEN }} + GITHUB_USER: ${{ github.actor }} + PATCHDIR: "${{ github.workspace }}/yetus-jdk17-hadoop3-unit-check/output" + PLUGINS: "github,htmlout,maven,unit" + SET_JAVA_HOME: "/usr/lib/jvm/java-17" + SOURCEDIR: "${{ github.workspace }}/src" + YETUSDIR: "${{ github.workspace }}/yetus" + AUTHOR_IGNORE_LIST: "src/main/asciidoc/_chapters/developer.adoc" + BLANKS_EOL_IGNORE_FILE: "dev-support/blanks-eol-ignore.txt" + BLANKS_TABS_IGNORE_FILE: "dev-support/blanks-tabs-ignore.txt" + EXCLUDE_TESTS_URL: "https://ci-hbase.apache.org/job/HBase-Find-Flaky-Tests/job/${{ github.base_ref }}/lastSuccessfulBuild/artifact/output/excludes" + BUILD_THREAD: "4" + SUREFIRE_FIRST_PART_FORK_COUNT: "1.0C" + SUREFIRE_SECOND_PART_FORK_COUNT: "0.5C" + BRANCH_NAME: "${{ github.base_ref }}" + SKIP_ERRORPRONE: 'true' + DEBUG: 'true' + TEST_PROFILE: ${{ matrix.test_profile }} + run: | + cd "${{ github.workspace }}" + bash src/dev-support/jenkins_precommit_github_yetus.sh + + - name: Publish Job Summary + if: always() + run: | + cd "${{ github.workspace }}" + python3 src/dev-support/yetus_console_to_md.py yetus-jdk17-hadoop3-unit-check/output >> $GITHUB_STEP_SUMMARY + + - name: Publish Test Results + if: always() + uses: actions/upload-artifact@bbbca2ddaa5d8feaa63e36b76fdaad77386f024f # v7.0.0 + with: + name: yetus-jdk17-hadoop3-unit-check-${{ matrix.name }} + path: ${{ github.workspace }}/yetus-jdk17-hadoop3-unit-check/output + retention-days: 7 From 95de81b7d1bb3c58e3d1783cbe6c4551cfbda2b3 Mon Sep 17 00:00:00 2001 From: "terence.yoo" Date: Mon, 6 Apr 2026 09:53:44 +0900 Subject: [PATCH 03/10] Rename removeTableLevelBarrier to removeRegionLevelBarrier --- .../org/apache/hadoop/hbase/regionserver/RSRpcServices.java | 2 +- .../java/org/apache/hadoop/hbase/regionserver/RowCache.java | 2 +- .../hadoop/hbase/regionserver/TestRowCacheWithMock.java | 4 ++-- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java index 7a21ab8a5504..d391cc17f5aa 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RSRpcServices.java @@ -2360,7 +2360,7 @@ public BulkLoadHFileResponse bulkLoadHFile(final RpcController controller, return bulkLoadHFileInternal(request); } finally { // The row cache for the region has been enabled again - rowCache.removeTableLevelBarrier(region); + rowCache.removeRegionLevelBarrier(region); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCache.java index 6a02618d7097..53c29269b4e4 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCache.java @@ -256,7 +256,7 @@ void increaseRowCacheSeqNum(HRegion region) { region.increaseRowCacheSeqNum(); } - void removeTableLevelBarrier(HRegion region) { + void removeRegionLevelBarrier(HRegion region) { regionLevelBarrierMap.computeIfPresent(region, (k, counter) -> { int remaining = counter.decrementAndGet(); return (remaining <= 0) ? null : counter; diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheWithMock.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheWithMock.java index bfb8530d9f64..f3cde3792331 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheWithMock.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCacheWithMock.java @@ -152,7 +152,7 @@ public void testBarrier() throws IOException { assertNull(rowCache.getRow(key)); // Remove the table level barrier - rowCache.removeTableLevelBarrier(region); + rowCache.removeRegionLevelBarrier(region); assertNull(rowCache.getRegionLevelBarrier(region)); } @@ -332,7 +332,7 @@ public void testMutate() throws IOException, ServiceException { // Verify the sequence of method calls inOrder.verify(rowCache, Mockito.times(1)).createRegionLevelBarrier(Mockito.any()); inOrder.verify(rowCache, Mockito.times(1)).increaseRowCacheSeqNum(Mockito.any()); - inOrder.verify(rowCache, Mockito.times(1)).removeTableLevelBarrier(Mockito.any()); + inOrder.verify(rowCache, Mockito.times(1)).removeRegionLevelBarrier(Mockito.any()); } @Test From 782dd75bbf3b846820fe2571819db73733d96e7b Mon Sep 17 00:00:00 2001 From: "terence.yoo" Date: Mon, 6 Apr 2026 09:55:17 +0900 Subject: [PATCH 04/10] Move rowCache field next to MobFileCache and extract initRowCache() method --- .../regionserver/MetricsRegionServerWrapperImpl.java | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java index b4dabf7fb3bf..ef80e2ee5803 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/MetricsRegionServerWrapperImpl.java @@ -68,8 +68,8 @@ class MetricsRegionServerWrapperImpl implements MetricsRegionServerWrapper { private BlockCache l1Cache = null; private BlockCache l2Cache = null; private MobFileCache mobFileCache; + private RowCache rowCache; private CacheStats cacheStats; - private final RowCache rowCache; private CacheStats l1Stats = null; private CacheStats l2Stats = null; private volatile long numWALFiles = 0; @@ -100,8 +100,7 @@ public MetricsRegionServerWrapperImpl(final HRegionServer regionServer) { this.regionServer = regionServer; initBlockCache(); initMobFileCache(); - RSRpcServices rsRpcServices = this.regionServer.getRSRpcServices(); - this.rowCache = rsRpcServices == null ? null : rsRpcServices.getServer().getRowCache(); + initRowCache(); this.excludeDatanodeManager = this.regionServer.getWalFactory().getExcludeDatanodeManager(); this.period = regionServer.getConfiguration().getLong(HConstants.REGIONSERVER_METRICS_PERIOD, @@ -152,6 +151,11 @@ private void initMobFileCache() { this.mobFileCache = this.regionServer.getMobFileCache().orElse(null); } + private void initRowCache() { + RSRpcServices rsRpcServices = this.regionServer.getRSRpcServices(); + this.rowCache = rsRpcServices == null ? null : rsRpcServices.getServer().getRowCache(); + } + @Override public String getClusterId() { return regionServer.getClusterId(); From 267f4efba0c6e819365951bb3a41014d6c3f6c9e Mon Sep 17 00:00:00 2001 From: "terence.yoo" Date: Mon, 6 Apr 2026 09:56:53 +0900 Subject: [PATCH 05/10] Add @RestrictedApi annotation to setRowCache() in HRegion --- .../java/org/apache/hadoop/hbase/regionserver/HRegion.java | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index 3a5c3f34313a..863c14956e7a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -957,7 +957,8 @@ boolean checkRowCacheConfig() { : conf.getBoolean(HConstants.ROW_CACHE_ENABLED_KEY, HConstants.ROW_CACHE_ENABLED_DEFAULT); } - // For testing only + @RestrictedApi(explanation = "Should only be called in tests", link = "", + allowedOnPath = ".*/src/test/.*") void setRowCache(RowCache rowCache) { this.rowCache = rowCache; } From 1fc9d5c19c58c44c6a95d3fb7999863d75614773 Mon Sep 17 00:00:00 2001 From: "terence.yoo" Date: Mon, 6 Apr 2026 10:05:11 +0900 Subject: [PATCH 06/10] Migrate TestRowCache from JUnit4 to JUnit5 --- .../hbase/regionserver/TestRowCache.java | 66 +++++++++---------- 1 file changed, 30 insertions(+), 36 deletions(-) diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCache.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCache.java index c4ca0d70faff..1cac5aa2b957 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCache.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCache.java @@ -22,12 +22,13 @@ import static org.apache.hadoop.hbase.regionserver.MetricsRegionServerSource.ROW_CACHE_EVICTED_ROW_COUNT; import static org.apache.hadoop.hbase.regionserver.MetricsRegionServerSource.ROW_CACHE_HIT_COUNT; import static org.apache.hadoop.hbase.regionserver.MetricsRegionServerSource.ROW_CACHE_MISS_COUNT; -import static org.junit.Assert.assertArrayEquals; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertNotNull; -import static org.junit.Assert.assertNull; -import static org.junit.Assert.assertTrue; +import static org.junit.jupiter.api.Assertions.assertArrayEquals; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertFalse; +import static org.junit.jupiter.api.Assertions.assertNotNull; +import static org.junit.jupiter.api.Assertions.assertNull; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertTrue; import java.io.IOException; import java.util.ArrayList; @@ -37,7 +38,6 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.CompatibilityFactory; import org.apache.hadoop.hbase.DoNotRetryIOException; -import org.apache.hadoop.hbase.HBaseClassTestRule; import org.apache.hadoop.hbase.HBaseTestingUtil; import org.apache.hadoop.hbase.SingleProcessHBaseCluster; import org.apache.hadoop.hbase.TableName; @@ -61,22 +61,17 @@ import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.testclassification.RegionServerTests; import org.apache.hadoop.hbase.util.Bytes; -import org.junit.After; -import org.junit.AfterClass; -import org.junit.Before; -import org.junit.BeforeClass; -import org.junit.ClassRule; -import org.junit.Rule; -import org.junit.Test; -import org.junit.experimental.categories.Category; -import org.junit.rules.TestName; - -@Category({ RegionServerTests.class, MediumTests.class }) +import org.junit.jupiter.api.AfterAll; +import org.junit.jupiter.api.AfterEach; +import org.junit.jupiter.api.BeforeAll; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Tag; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.TestInfo; + +@Tag(RegionServerTests.TAG) +@Tag(MediumTests.TAG) public class TestRowCache { - @ClassRule - public static final HBaseClassTestRule CLASS_RULE = - HBaseClassTestRule.forClass(TestRowCache.class); - private static final HBaseTestingUtil TEST_UTIL = new HBaseTestingUtil(); private static final byte[] CF1 = Bytes.toBytes("cf1"); private static final byte[] CF2 = Bytes.toBytes("cf2"); @@ -95,10 +90,7 @@ public class TestRowCache { HRegion region; private final Map counterBase = new HashMap<>(); - @Rule - public TestName testName = new TestName(); - - @BeforeClass + @BeforeAll public static void beforeClass() throws Exception { Configuration conf = TEST_UTIL.getConfiguration(); @@ -118,20 +110,20 @@ public static void beforeClass() throws Exception { rowCache = regionServer.getRSRpcServices().getServer().getRowCache(); } - @AfterClass + @AfterAll public static void afterClass() throws Exception { HRegionServer.TEST_SKIP_REPORTING_TRANSITION = false; TEST_UTIL.shutdownMiniCluster(); } - @Before - public void beforeTestMethod() throws Exception { + @BeforeEach + public void beforeTestMethod(TestInfo testInfo) throws Exception { ColumnFamilyDescriptor cf1 = ColumnFamilyDescriptorBuilder.newBuilder(CF1).build(); // To test data block encoding ColumnFamilyDescriptor cf2 = ColumnFamilyDescriptorBuilder.newBuilder(CF2) .setDataBlockEncoding(DataBlockEncoding.FAST_DIFF).build(); - tableName = TableName.valueOf(testName.getMethodName()); + tableName = TableName.valueOf(testInfo.getTestMethod().get().getName()); TableDescriptor td = TableDescriptorBuilder.newBuilder(tableName).setRowCacheEnabled(true) .setColumnFamily(cf1).setColumnFamily(cf2).build(); admin.createTable(td); @@ -140,7 +132,7 @@ public void beforeTestMethod() throws Exception { .filter(r -> r.getRegionInfo().getTable().equals(tableName)).findFirst().orElseThrow(); } - @After + @AfterEach public void afterTestMethod() throws Exception { counterBase.clear(); @@ -282,14 +274,16 @@ public void testGetWithRowCache() throws IOException { assertNull(rowCache.getRow(rowCacheKey)); } - @Test(expected = DoNotRetryIOException.class) + @Test public void testPutWithTTL() throws IOException { // Put with TTL is not allowed on tables with row cache enabled, because cached rows cannot // track TTL expiration - Put put = new Put("row".getBytes()); - put.addColumn(CF1, Q1, "11".getBytes()); - put.setTTL(1); - table.put(put); + assertThrows(DoNotRetryIOException.class, () -> { + Put put = new Put("row".getBytes()); + put.addColumn(CF1, Q1, "11".getBytes()); + put.setTTL(1); + table.put(put); + }); } @Test From 6739f062accbde26671deaee01b59cfc1fb3d693 Mon Sep 17 00:00:00 2001 From: "terence.yoo" Date: Thu, 30 Apr 2026 20:59:33 +0900 Subject: [PATCH 07/10] Make RowCacheStrategy pluggable via configuration Introduce row.cache.strategy.class configuration key to allow operators to plug in custom RowCacheStrategy implementations. The default remains TinyLfuRowCacheStrategy. RowCacheStrategy implementations must now provide a public constructor that takes a Configuration argument, following the same convention used by MemStore and RegionSplitPolicy. --- .../src/main/java/org/apache/hadoop/hbase/HConstants.java | 5 +++++ .../org/apache/hadoop/hbase/regionserver/RowCache.java | 8 +++++--- .../hbase/regionserver/TinyLfuRowCacheStrategy.java | 8 +++++++- 3 files changed, 17 insertions(+), 4 deletions(-) diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java index f140783067af..329b8b4908bd 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java @@ -1035,6 +1035,11 @@ public enum OperationStatusCode { public static final String ROW_CACHE_EVICT_ON_CLOSE_KEY = "row.cache.evictOnClose"; public static final boolean ROW_CACHE_EVICT_ON_CLOSE_DEFAULT = false; + /** + * Configuration key for the row cache strategy class + */ + public static final String ROW_CACHE_STRATEGY_CLASS_KEY = "row.cache.strategy.class"; + /** * Configuration key for the memory size of the block cache */ diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCache.java index 53c29269b4e4..2c859eab0f3e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCache.java @@ -33,7 +33,7 @@ import org.apache.hadoop.hbase.client.Consistency; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.Mutation; -import org.apache.hadoop.hbase.io.util.MemorySizeUtil; +import org.apache.hadoop.hbase.util.ReflectionUtils; /** * Facade for row-level caching in the RegionServer. @@ -85,8 +85,10 @@ R execute(RowOperation operation) throws IOException { RowCache(Configuration conf) { enabledByConf = conf.getFloat(HConstants.ROW_CACHE_SIZE_KEY, HConstants.ROW_CACHE_SIZE_DEFAULT) > 0; - // Currently we only support TinyLfu implementation - rowCacheStrategy = new TinyLfuRowCacheStrategy(MemorySizeUtil.getRowCacheSize(conf)); + Class strategyClass = conf.getClass( + HConstants.ROW_CACHE_STRATEGY_CLASS_KEY, TinyLfuRowCacheStrategy.class, + RowCacheStrategy.class); + rowCacheStrategy = ReflectionUtils.newInstance(strategyClass, conf); } R mutateWithRowCacheBarrier(HRegion region, byte[] row, RowOperation operation) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/TinyLfuRowCacheStrategy.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/TinyLfuRowCacheStrategy.java index e141bd3cbb2b..a9f99b6d3edc 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/TinyLfuRowCacheStrategy.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/TinyLfuRowCacheStrategy.java @@ -25,6 +25,8 @@ import java.util.Optional; import java.util.OptionalLong; import java.util.concurrent.atomic.LongAdder; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.io.util.MemorySizeUtil; import org.checkerframework.checker.nullness.qual.NonNull; @org.apache.yetus.audience.InterfaceAudience.Private @@ -42,7 +44,11 @@ public void onRemoval(RowCacheKey key, RowCells value, @NonNull RemovalCause cau // Cache.stats() does not provide eviction count for entries, so we maintain our own counter. private final LongAdder evictedRowCount = new LongAdder(); - TinyLfuRowCacheStrategy(long maxSizeBytes) { + public TinyLfuRowCacheStrategy(Configuration conf) { + this(MemorySizeUtil.getRowCacheSize(conf)); + } + + private TinyLfuRowCacheStrategy(long maxSizeBytes) { if (maxSizeBytes <= 0) { cache = Caffeine.newBuilder().maximumSize(0).build(); return; From e985135e33694b93a8e210a48cdb4836fa95c305 Mon Sep 17 00:00:00 2001 From: "terence.yoo" Date: Thu, 30 Apr 2026 21:16:40 +0900 Subject: [PATCH 08/10] Rename populateCache to cache and key region barrier by encoded name Address review feedback: - Rename RowCache.populateCache to cache. - Change regionLevelBarrierMap key type from HRegion to the encoded region name (String). The encoded name is the canonical region identifier already used elsewhere (e.g., RowCacheKey.isSameRegion). - The cache method no longer needs an HRegion parameter; it derives the encoded region name from the RowCacheKey. The external signatures of create/remove/getRegionLevelBarrier still take HRegion to make the caller's intent explicit; only the internal map key type changes. --- .../hadoop/hbase/regionserver/HRegion.java | 2 +- .../hadoop/hbase/regionserver/RowCache.java | 23 +++++++++++-------- .../hbase/regionserver/RowCacheKey.java | 4 ++++ 3 files changed, 18 insertions(+), 11 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index 863c14956e7a..0785e23f76ef 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -3309,7 +3309,7 @@ RegionScannerImpl getScannerWithResults(Get get, Scan scan, List results, // When results came from memstore only, do not populate the row cache boolean readFromMemStoreOnly = context.getBlockBytesScanned() < 1; if (!readFromMemStoreOnly) { - rowCache.populateCache(this, results, key); + rowCache.cache(results, key); } return scanner; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCache.java index 2c859eab0f3e..420592024e27 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCache.java @@ -60,9 +60,9 @@ public class RowCache { /** * A barrier that prevents the row cache from being populated during region operations, such as * bulk loads. It is implemented as a counter to address issues that arise when the same region is - * updated concurrently. + * updated concurrently. Keyed by the encoded region name. */ - private final Map regionLevelBarrierMap = new ConcurrentHashMap<>(); + private final Map regionLevelBarrierMap = new ConcurrentHashMap<>(); /** * A barrier that prevents the row cache from being populated during row mutations. It is * implemented as a counter to address issues that arise when the same row is mutated @@ -234,9 +234,9 @@ boolean tryGetFromCache(RowCacheKey key, Get get, List results) { return true; } - void populateCache(HRegion region, List results, RowCacheKey key) { + void cache(List results, RowCacheKey key) { // The row cache is populated only when no region level barriers remain - regionLevelBarrierMap.computeIfAbsent(region, t -> { + regionLevelBarrierMap.computeIfAbsent(key.getEncodedRegionName(), t -> { // The row cache is populated only when no row level barriers remain rowLevelBarrierMap.computeIfAbsent(key, k -> { try { @@ -251,7 +251,9 @@ void populateCache(HRegion region, List results, RowCacheKey key) { } void createRegionLevelBarrier(HRegion region) { - regionLevelBarrierMap.computeIfAbsent(region, k -> new AtomicInteger(0)).incrementAndGet(); + regionLevelBarrierMap + .computeIfAbsent(region.getRegionInfo().getEncodedName(), k -> new AtomicInteger(0)) + .incrementAndGet(); } void increaseRowCacheSeqNum(HRegion region) { @@ -259,10 +261,11 @@ void increaseRowCacheSeqNum(HRegion region) { } void removeRegionLevelBarrier(HRegion region) { - regionLevelBarrierMap.computeIfPresent(region, (k, counter) -> { - int remaining = counter.decrementAndGet(); - return (remaining <= 0) ? null : counter; - }); + regionLevelBarrierMap.computeIfPresent(region.getRegionInfo().getEncodedName(), + (k, counter) -> { + int remaining = counter.decrementAndGet(); + return (remaining <= 0) ? null : counter; + }); } long getHitCount() { @@ -292,6 +295,6 @@ AtomicInteger getRowLevelBarrier(RowCacheKey key) { // For testing only AtomicInteger getRegionLevelBarrier(HRegion region) { - return regionLevelBarrierMap.get(region); + return regionLevelBarrierMap.get(region.getRegionInfo().getEncodedName()); } } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCacheKey.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCacheKey.java index 09ec68194ea9..c6bcde41a7a4 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCacheKey.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCacheKey.java @@ -41,6 +41,10 @@ public RowCacheKey(HRegion region, byte[] rowKey) { this.rowCacheSeqNum = region.getRowCacheSeqNum(); } + String getEncodedRegionName() { + return encodedRegionName; + } + @Override public boolean equals(Object o) { if (o == null || getClass() != o.getClass()) return false; From d8fef38bfc02906f0314397a588371be43fa6bf2 Mon Sep 17 00:00:00 2001 From: "terence.yoo" Date: Thu, 30 Apr 2026 21:49:23 +0900 Subject: [PATCH 09/10] Support cell-level TTL on Put with row cache by checking expiration on hit Address review feedback: instead of rejecting Puts with TTL on row cache-enabled tables, check TTL expiration when serving cache hits. - RowCells: precompute the earliest TTL expiration time across the contained cells during construction, exposing isExpired(now) for an O(1) check on each cache hit. Cells without a TTL tag yield Long.MAX_VALUE so the check short-circuits. - RowCache.tryGetFromCache: if the cached row is expired, evict it and fall back to the storage read path. - RowCache.cache: skip caching when results are empty. - HRegion.put: remove the guard that rejected Puts with TTL on row cache-enabled tables. Server-side cells preserve their TTL tag (carried forward by TagUtil.carryForwardTTLTag during mutation), so the same expiration check used by ScanQueryMatcher can be applied at the cache layer. CF-level TTL still disables the row cache via canCacheRow's isDefaultTtl check; that policy is unchanged. --- .../hadoop/hbase/regionserver/HRegion.java | 9 ---- .../hadoop/hbase/regionserver/RowCache.java | 13 ++++++ .../hadoop/hbase/regionserver/RowCells.java | 34 ++++++++++++++- .../hbase/regionserver/TestRowCache.java | 42 ++++++++++++++----- 4 files changed, 78 insertions(+), 20 deletions(-) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index 0785e23f76ef..d07aa9e9755a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -3473,15 +3473,6 @@ private void updateDeleteLatestVersionTimestamp(Cell cell, Get get, int count, b @Override public void put(Put put) throws IOException { TraceUtil.trace(() -> { - // Put with TTL is not allowed on tables with row cache enabled, because cached rows cannot - // track TTL expiration - if (isRowCacheEnabled) { - if (put.getTTL() != Long.MAX_VALUE) { - throw new DoNotRetryIOException( - "Tables with row cache enabled do not allow setting TTL on Puts"); - } - } - checkReadOnly(); // Do a rough check that we have resources to accept a write. The check is diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCache.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCache.java index 420592024e27..23c893b3a3b1 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCache.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCache.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hbase.client.Consistency; import org.apache.hadoop.hbase.client.Get; import org.apache.hadoop.hbase.client.Mutation; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.ReflectionUtils; /** @@ -230,11 +231,23 @@ boolean tryGetFromCache(RowCacheKey key, Get get, List results) { return false; } + if (row.isExpired(EnvironmentEdgeManager.currentTime())) { + // A cell in the cached row has expired by its cell-level TTL. Drop the row from the cache + // and treat this as a miss so the caller falls back to the normal read path. + evictRow(key); + return false; + } + results.addAll(row.getCells()); return true; } void cache(List results, RowCacheKey key) { + if (results.isEmpty()) { + // Nothing to cache; avoid creating an empty entry that would just be a cache hit returning + // an empty row. + return; + } // The row cache is populated only when no region level barriers remain regionLevelBarrierMap.computeIfAbsent(key.getEncodedRegionName(), t -> { // The row cache is populated only when no row level barriers remain diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCells.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCells.java index af0a0ea4c537..7b29de61c9c2 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCells.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/RowCells.java @@ -18,9 +18,13 @@ package org.apache.hadoop.hbase.regionserver; import java.util.ArrayList; +import java.util.Iterator; import java.util.List; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.ExtendedCell; +import org.apache.hadoop.hbase.PrivateCellUtil; +import org.apache.hadoop.hbase.Tag; +import org.apache.hadoop.hbase.TagType; import org.apache.hadoop.hbase.io.HeapSize; import org.apache.hadoop.hbase.util.ClassSize; @@ -29,19 +33,47 @@ public class RowCells implements HeapSize { public static final long FIXED_OVERHEAD = ClassSize.estimateBase(RowCells.class, false); private final List cells = new ArrayList<>(); + /** + * Earliest expiration time among contained cells, derived from cell-level TTL tags. Set to + * {@link Long#MAX_VALUE} when no cell carries a TTL tag, which lets the row cache short-circuit + * the expiration check on every hit. + */ + private final long earliestExpirationMs; public RowCells(List cells) throws CloneNotSupportedException { + long earliest = Long.MAX_VALUE; for (Cell cell : cells) { if (!(cell instanceof ExtendedCell extCell)) { throw new CloneNotSupportedException("Cell is not an ExtendedCell"); } try { // To garbage collect the objects referenced by the cells - this.cells.add(extCell.deepClone()); + ExtendedCell cloned = extCell.deepClone(); + this.cells.add(cloned); + long exp = expirationTimeOf(cloned); + if (exp < earliest) { + earliest = exp; + } } catch (RuntimeException e) { throw new CloneNotSupportedException("Deep clone failed"); } } + this.earliestExpirationMs = earliest; + } + + private static long expirationTimeOf(ExtendedCell cell) { + Iterator i = PrivateCellUtil.tagsIterator(cell); + while (i.hasNext()) { + Tag t = i.next(); + if (TagType.TTL_TAG_TYPE == t.getType()) { + return cell.getTimestamp() + Tag.getValueAsLong(t); + } + } + return Long.MAX_VALUE; + } + + public boolean isExpired(long now) { + return earliestExpirationMs < now; } @Override diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCache.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCache.java index 1cac5aa2b957..239c40f01687 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCache.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/regionserver/TestRowCache.java @@ -37,7 +37,6 @@ import java.util.Map; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.CompatibilityFactory; -import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.HBaseTestingUtil; import org.apache.hadoop.hbase.SingleProcessHBaseCluster; import org.apache.hadoop.hbase.TableName; @@ -61,6 +60,8 @@ import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.testclassification.RegionServerTests; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.apache.hadoop.hbase.util.ManualEnvironmentEdge; import org.junit.jupiter.api.AfterAll; import org.junit.jupiter.api.AfterEach; import org.junit.jupiter.api.BeforeAll; @@ -275,15 +276,36 @@ public void testGetWithRowCache() throws IOException { } @Test - public void testPutWithTTL() throws IOException { - // Put with TTL is not allowed on tables with row cache enabled, because cached rows cannot - // track TTL expiration - assertThrows(DoNotRetryIOException.class, () -> { - Put put = new Put("row".getBytes()); - put.addColumn(CF1, Q1, "11".getBytes()); - put.setTTL(1); - table.put(put); - }); + public void testPutWithTTL() throws Exception { + // Cell-level TTL set via Put.setTTL is supported: the cached row is invalidated on hit when + // the cell has expired. + byte[] rowKey = "row".getBytes(); + RowCacheKey rowCacheKey = new RowCacheKey(region, rowKey); + + Put put = new Put(rowKey).addColumn(CF1, Q1, "v".getBytes()); + put.setTTL(60_000); + table.put(put); + // Flush so that the next Get reads from HFile (memstore-only reads do not populate the cache) + admin.flush(tableName); + + // First Get populates the cache + Result first = table.get(new Get(rowKey)); + assertFalse(first.isEmpty()); + assertNotNull(rowCache.getRow(rowCacheKey)); + + // Advance time beyond the cell TTL + ManualEnvironmentEdge edge = new ManualEnvironmentEdge(); + edge.setValue(EnvironmentEdgeManager.currentTime() + 120_000); + EnvironmentEdgeManager.injectEdge(edge); + try { + // Cache hit detects expiration, evicts the row, and falls back to the read path. The + // storage path also filters the expired cell, so the result is empty. + Result second = table.get(new Get(rowKey)); + assertTrue(second.isEmpty()); + assertNull(rowCache.getRow(rowCacheKey)); + } finally { + EnvironmentEdgeManager.reset(); + } } @Test From 8b89b7cb58f689f3e0d03d0d84f6c3201aa88268 Mon Sep 17 00:00:00 2001 From: "terence.yoo" Date: Thu, 30 Apr 2026 21:56:09 +0900 Subject: [PATCH 10/10] Sync dev-support scripts from master The CI workflows invoke scripts under dev-support/ that were missing or outdated on this branch, causing GHA runs to fail at startup. --- dev-support/Jenkinsfile | 446 +-------------- dev-support/create-release/README.txt | 3 +- dev-support/create-release/release-build.sh | 2 + dev-support/create-release/release-util.sh | 2 +- ...on_instead_of_inheritance-HBASE-17732.adoc | 4 +- dev-support/docker/Dockerfile | 11 + .../flaky-tests/python-requirements.txt | 4 +- dev-support/flaky-tests/report-flakies.py | 53 +- .../generate-website/build-hbase-website.sh | 250 ++++++++ .../generate-hbase-website.Jenkinsfile | 137 +++++ dev-support/gh_hide_old_comments.sh | 96 +++- .../git-jira-release-audit/requirements.txt | 10 +- ...-backwards-compatibility-check.Jenkinsfile | 326 +++++++++++ dev-support/hbase-personality.sh | 92 +-- dev-support/hbase-vote.sh | 4 +- .../integration-test.Jenkinsfile | 404 +++++++++++++ dev-support/integration-test/patch-hadoop3.sh | 24 + .../pseudo-distributed-test.sh | 540 ++++++++++++++++++ .../integration-test/source-artifact.sh | 242 ++++++++ dev-support/jenkins_precommit_github_yetus.sh | 30 +- dev-support/make_rc.sh | 2 +- dev-support/spotbugs-exclude.xml | 12 + dev-support/yetus_console_to_md.py | 522 +++++++++++++++++ 23 files changed, 2680 insertions(+), 536 deletions(-) create mode 100755 dev-support/generate-website/build-hbase-website.sh create mode 100644 dev-support/generate-website/generate-hbase-website.Jenkinsfile create mode 100644 dev-support/hadoop3-backwards-compatibility-check.Jenkinsfile create mode 100644 dev-support/integration-test/integration-test.Jenkinsfile create mode 100755 dev-support/integration-test/patch-hadoop3.sh create mode 100755 dev-support/integration-test/pseudo-distributed-test.sh create mode 100755 dev-support/integration-test/source-artifact.sh create mode 100644 dev-support/yetus_console_to_md.py diff --git a/dev-support/Jenkinsfile b/dev-support/Jenkinsfile index c550272cc3f8..f22e67d04e83 100644 --- a/dev-support/Jenkinsfile +++ b/dev-support/Jenkinsfile @@ -38,7 +38,6 @@ pipeline { OUTPUT_DIR_RELATIVE_JDK8_HADOOP3 = 'output-jdk8-hadoop3' OUTPUT_DIR_RELATIVE_JDK11_HADOOP3 = 'output-jdk11-hadoop3' OUTPUT_DIR_RELATIVE_JDK17_HADOOP3 = 'output-jdk17-hadoop3' - OUTPUT_DIR_RELATIVE_JDK17_HADOOP3_BACKWARDS = 'output-jdk17-hadoop3-backwards' PROJECT = 'hbase' PROJECT_PERSONALITY = 'https://raw.githubusercontent.com/apache/hbase/master/dev-support/hbase-personality.sh' @@ -58,9 +57,6 @@ pipeline { ASF_NIGHTLIES = 'https://nightlies.apache.org' ASF_NIGHTLIES_BASE_ORI = "${ASF_NIGHTLIES}/hbase/${JOB_NAME}/${BUILD_NUMBER}" ASF_NIGHTLIES_BASE = "${ASF_NIGHTLIES_BASE_ORI.replaceAll(' ', '%20')}" - // These are dependent on the branch - HADOOP3_VERSIONS = "3.3.5,3.3.6,3.4.0,3.4.1,3.4.2" - HADOOP3_DEFAULT_VERSION = "3.4.2" } parameters { booleanParam(name: 'USE_YETUS_PRERELEASE', defaultValue: false, description: '''Check to use the current HEAD of apache/yetus rather than our configured release. @@ -85,7 +81,7 @@ pipeline { // can't just do a simple echo or the directory won't be created. :( sh '''#!/usr/bin/env bash echo "Make sure we have a directory for downloading dependencies: $(pwd)" -''' + ''' } sh '''#!/usr/bin/env bash set -e @@ -127,77 +123,8 @@ pipeline { } stash name: 'yetus', includes: "yetus-*/*,yetus-*/**/*,tools/personality.sh" } - } - stage ('hadoop 2 cache') { - environment { - HADOOP2_VERSION="2.10.2" - } - steps { - // directory must be unique for each parallel stage, because jenkins runs them in the same workspace :( - dir('downloads-hadoop-2') { - sh '''#!/usr/bin/env bash - echo "Make sure we have a directory for downloading dependencies: $(pwd)" -''' - } - sh '''#!/usr/bin/env bash - set -e - echo "Ensure we have a copy of Hadoop ${HADOOP2_VERSION}" - "${WORKSPACE}/component/dev-support/jenkins-scripts/cache-apache-project-artifact.sh" \ - --working-dir "${WORKSPACE}/downloads-hadoop-2" \ - --keys 'https://downloads.apache.org/hadoop/common/KEYS' \ - --verify-tar-gz \ - "${WORKSPACE}/hadoop-${HADOOP2_VERSION}-bin.tar.gz" \ - "hadoop/common/hadoop-${HADOOP2_VERSION}/hadoop-${HADOOP2_VERSION}.tar.gz" - for stale in $(ls -1 "${WORKSPACE}"/hadoop-2*.tar.gz | grep -v ${HADOOP2_VERSION}); do - echo "Delete stale hadoop 2 cache ${stale}" - rm -rf $stale - done - ''' - stash name: 'hadoop-2', includes: "hadoop-${HADOOP2_VERSION}-bin.tar.gz" - } - } - stage ('hadoop 3 cache') { - steps { - script { - hadoop3_versions = env.HADOOP3_VERSIONS.split(","); - env.HADOOP3_VERSIONS_REGEX = "[" + hadoop3_versions.join("|") + "]"; - for (hadoop3_version in hadoop3_versions) { - env.HADOOP3_VERSION = hadoop3_version; - echo "env.HADOOP3_VERSION" + env.hadoop3_version; - stage ('Hadoop 3 cache inner stage') { - // directory must be unique for each parallel stage, because jenkins runs them in the same workspace :( - dir("downloads-hadoop-${HADOOP3_VERSION}") { - sh '''#!/usr/bin/env bash - echo "Make sure we have a directory for downloading dependencies: $(pwd)" -''' - } //dir - sh '''#!/usr/bin/env bash - set -e - echo "Ensure we have a copy of Hadoop ${HADOOP3_VERSION}" - "${WORKSPACE}/component/dev-support/jenkins-scripts/cache-apache-project-artifact.sh" \ - --working-dir "${WORKSPACE}/downloads-hadoop-${HADOOP3_VERSION}" \ - --keys 'https://downloads.apache.org/hadoop/common/KEYS' \ - --verify-tar-gz \ - "${WORKSPACE}/hadoop-${HADOOP3_VERSION}-bin.tar.gz" \ - "hadoop/common/hadoop-${HADOOP3_VERSION}/hadoop-${HADOOP3_VERSION}.tar.gz" - for stale in $(ls -1 "${WORKSPACE}"/hadoop-3*.tar.gz | grep -v ${HADOOP3_VERSION}); do - echo "Delete stale hadoop 3 cache ${stale}" - rm -rf $stale - done - ''' - stash name: "hadoop-${HADOOP3_VERSION}", includes: "hadoop-${HADOOP3_VERSION}-bin.tar.gz" - script { - if (env.HADOOP3_VERSION == env.HADOOP3_DEFAULT_VERSION) { - // FIXME: we never unstash this, because we run the packaging tests with the version-specific stashes - stash(name: "hadoop-3", includes: "hadoop-${HADOOP3_VERSION}-bin.tar.gz") - } //if - } //script - } //stage ('Hadoop 3 cache inner stage') - } //for - } //script - } //steps - } //stage ('hadoop 3 cache') { - } //parallel + } // stage ('yetus install') + } // parallel } //stage ('thirdparty installs') stage ('init health results') { steps { @@ -208,13 +135,6 @@ pipeline { stash name: 'jdk8-hadoop3-result', allowEmpty: true, includes: "${OUTPUT_DIR_RELATIVE_JDK8_HADOOP3}/doesn't-match" stash name: 'jdk11-hadoop3-result', allowEmpty: true, includes: "${OUTPUT_DIR_RELATIVE_JDK11_HADOOP3}/doesn't-match" stash name: 'jdk17-hadoop3-result', allowEmpty: true, includes: "${OUTPUT_DIR_RELATIVE_JDK17_HADOOP3}/doesn't-match" - script { - for (hadoop3_version in hadoop3_versions) { - // confusing environment vs Groovy variables - stash(name: "jdk17-hadoop3-backwards-result-${hadoop3_version}", allowEmpty: true, includes: "${env.OUTPUT_DIR_RELATIVE_JDK17_HADOOP3_BACKWARDS}-${hadoop3_version}/doesn't-match") - } - } - stash name: 'srctarball-result', allowEmpty: true, includes: "output-srctarball/doesn't-match" } } stage ('health checks') { @@ -771,352 +691,6 @@ pipeline { } } } - // If/when we transition to transient runners, we could run every Hadoop check as a matrix job - stage ('yetus jdk17 hadoop3 backwards compatibility checks') { - agent { - node { - label 'hbase' - } - } - environment { - BASEDIR = "${env.WORKSPACE}/component" - TESTS = "${env.DEEP_CHECKS}" - SET_JAVA_HOME = "/usr/lib/jvm/java-17" - // Activates hadoop 3.0 profile in maven runs. - HADOOP_PROFILE = '3.0' - // HADOOP_THREE_VERSION is set in script for loop - TEST_PROFILE = 'runDevTests' - SKIP_ERRORPRONE = true - } - steps { - script { - for (hadoop3_version in hadoop3_versions) { - if (hadoop3_version == env.HADOOP3_DEFAULT_VERSION) { - // We are running the full test suite, no need to run the dev tests too - continue - } - //HADOOP_THREE_VERSION is the environment variable name expected by the nightly shell script - env.HADOOP_THREE_VERSION = hadoop3_version; - env.OUTPUT_DIR_RELATIVE = "${env.OUTPUT_DIR_RELATIVE_JDK17_HADOOP3_BACKWARDS}-${env.HADOOP_THREE_VERSION}" - env.OUTPUT_DIR = "${env.WORKSPACE}/${env.OUTPUT_DIR_RELATIVE_JDK17_HADOOP3_BACKWARDS}-${env.HADOOP_THREE_VERSION}" - try { - stage ('yetus jdk17 hadoop3 backwards compatibility checks inner stage') { - // Must do prior to anything else, since if one of them timesout we'll stash the commentfile - sh '''#!/usr/bin/env bash - set -e - rm -rf "${OUTPUT_DIR}" && mkdir "${OUTPUT_DIR}" - rm -f "${OUTPUT_DIR}/commentfile" - ''' - unstash 'yetus' - dir('component') { - checkout scm - } - sh '''#!/usr/bin/env bash - set -e - rm -rf "${OUTPUT_DIR}/machine" && mkdir "${OUTPUT_DIR}/machine" - "${BASEDIR}/dev-support/gather_machine_environment.sh" "${OUTPUT_DIR_RELATIVE}/machine" - echo "got the following saved stats in '${OUTPUT_DIR_RELATIVE}/machine'" - ls -lh "${OUTPUT_DIR_RELATIVE}/machine" - ''' - script { - def ret = sh( - returnStatus: true, - script: '''#!/usr/bin/env bash - set -e - declare -i status=0 - if "${BASEDIR}/dev-support/hbase_nightly_yetus.sh" ; then - echo "(/) {color:green}+1 jdk17 hadoop ${HADOOP_THREE_VERSION} backward compatibility checks{color}" > "${OUTPUT_DIR}/commentfile" - else - echo "(x) {color:red}-1 jdk17 hadoop ${HADOOP_THREE_VERSION} backward compatibility checks{color}" > "${OUTPUT_DIR}/commentfile" - status=1 - fi - echo "-- For more information [see jdk17 report|${BUILD_URL}JDK17_20Nightly_20Build_20Report_20_28Hadoop3_29/]" >> "${OUTPUT_DIR}/commentfile" - exit "${status}" - ''' - ) - if (ret != 0) { - // mark the build as UNSTABLE instead of FAILURE, to avoid skipping the later publish of - // test output. See HBASE-26339 for more details. - currentBuild.result = 'UNSTABLE' - } - } //script - } //stage ('yetus jdk17 hadoop3 backwards compatibility checks inner stage') { - } //try - finally { - stash name: "jdk17-hadoop3-backwards-result-${HADOOP_THREE_VERSION}", includes: "${OUTPUT_DIR_RELATIVE}/commentfile" - junit testResults: "${env.OUTPUT_DIR_RELATIVE}/**/target/**/TEST-*.xml", allowEmptyResults: true - // zip surefire reports. - sh '''#!/bin/bash -e - if [ ! -f "${OUTPUT_DIR}/commentfile" ]; then - echo "(x) {color:red}-1 jdk17 hadoop ${HADOOP_THREE_VERSION} backward compatibility checks{color}" >"${OUTPUT_DIR}/commentfile" - echo "-- Something went wrong running this stage, please [check relevant console output|${BUILD_URL}/console]." >> "${OUTPUT_DIR}/commentfile" - fi - if [ -d "${OUTPUT_DIR}/archiver" ]; then - count=$(find "${OUTPUT_DIR}/archiver" -type f | wc -l) - if [[ 0 -ne ${count} ]]; then - echo "zipping ${count} archived files" - zip -q -m -r "${OUTPUT_DIR}/test_logs.zip" "${OUTPUT_DIR}/archiver" - else - echo "No archived files, skipping compressing." - fi - else - echo "No archiver directory, skipping compressing." - fi - ''' - sshPublisher(publishers: [ - sshPublisherDesc(configName: 'Nightlies', - transfers: [ - sshTransfer(remoteDirectory: "hbase/${JOB_NAME}/${BUILD_NUMBER}", - sourceFiles: "${env.OUTPUT_DIR_RELATIVE}/test_logs.zip" - ) - ] - ) - ]) - // remove the big test logs zip file, store the nightlies url in test_logs.html - sh '''#!/bin/bash -e - if [ -f "${OUTPUT_DIR}/test_logs.zip" ]; then - echo "Remove ${OUTPUT_DIR}/test_logs.zip for saving space" - rm -rf "${OUTPUT_DIR}/test_logs.zip" - python3 ${BASEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_BASE}/${OUTPUT_DIR_RELATIVE}" > "${OUTPUT_DIR}/test_logs.html" - else - echo "No test_logs.zip, skipping" - fi - ''' - // Has to be relative to WORKSPACE. - archiveArtifacts artifacts: "${env.OUTPUT_DIR_RELATIVE}/*" - archiveArtifacts artifacts: "${env.OUTPUT_DIR_RELATIVE}/**/*" - publishHTML target: [ - allowMissing : true, - keepAll : true, - alwaysLinkToLastBuild: true, - // Has to be relative to WORKSPACE. - reportDir : "${env.OUTPUT_DIR_RELATIVE}", - reportFiles : 'console-report.html', - reportName : "JDK17 Nightly Build Report (Hadoop ${HADOOP_THREE_VERSION} backwards compatibility)" - ] - } //finally - } // for - } //script - } //steps - } //stage ('yetus jdk17 hadoop3 backwards compatibility checks') - - // This is meant to mimic what a release manager will do to create RCs. - // See http://hbase.apache.org/book.html#maven.release - // TODO (HBASE-23870): replace this with invocation of the release tool - stage ('packaging and integration') { - agent { - node { - label 'hbase' - } - } - environment { - BASEDIR = "${env.WORKSPACE}/component" - BRANCH = "${env.BRANCH_NAME}" - } - steps { - dir('component') { - checkout scm - } - sh '''#!/bin/bash -e - echo "Setting up directories" - rm -rf "output-srctarball" && mkdir "output-srctarball" - rm -rf "output-integration" && mkdir "output-integration" "output-integration/hadoop-2" "output-integration/hadoop-3" "output-integration/hadoop-3-shaded" - rm -rf "unpacked_src_tarball" && mkdir "unpacked_src_tarball" - rm -rf "hbase-install" && mkdir "hbase-install" - rm -rf "hbase-client" && mkdir "hbase-client" - rm -rf "hbase-hadoop3-install" - rm -rf "hbase-hadoop3-client" - rm -rf "hadoop-2" && mkdir "hadoop-2" - rm -rf "hadoop-3" && mkdir "hadoop-3" - rm -rf ".m2-for-repo" && mkdir ".m2-for-repo" - rm -rf ".m2-for-src" && mkdir ".m2-for-src" - # remove old hadoop tarballs in workspace - rm -rf hadoop-2*.tar.gz - rm -rf hadoop-3*.tar.gz - rm -f "output-integration/commentfile" - ''' - sh '''#!/usr/bin/env bash - set -e - rm -rf "output-srctarball/machine" && mkdir "output-srctarball/machine" - "${BASEDIR}/dev-support/gather_machine_environment.sh" "output-srctarball/machine" - echo "got the following saved stats in 'output-srctarball/machine'" - ls -lh "output-srctarball/machine" - ''' - sh '''#!/bin/bash -e - echo "Checking the steps for an RM to make a source artifact, then a binary artifact." - docker build -t hbase-integration-test -f "${BASEDIR}/dev-support/docker/Dockerfile" . - docker run --rm -v "${WORKSPACE}":/hbase -v /etc/passwd:/etc/passwd:ro -v /etc/group:/etc/group:ro \ - -u `id -u`:`id -g` -e JAVA_HOME="/usr/lib/jvm/java-17" --workdir=/hbase hbase-integration-test \ - "component/dev-support/hbase_nightly_source-artifact.sh" \ - --intermediate-file-dir output-srctarball \ - --unpack-temp-dir unpacked_src_tarball \ - --maven-m2-initial .m2-for-repo \ - --maven-m2-src-build .m2-for-src \ - --clean-source-checkout \ - component - if [ $? -eq 0 ]; then - echo '(/) {color:green}+1 source release artifact{color}\n-- See build output for details.' >output-srctarball/commentfile - else - echo '(x) {color:red}-1 source release artifact{color}\n-- See build output for details.' >output-srctarball/commentfile - exit 1 - fi - ''' - echo "unpacking the hbase bin tarball into 'hbase-install' and the client tarball into 'hbase-client'" - sh '''#!/bin/bash -e - if [ 2 -ne $(ls -1 "${WORKSPACE}"/unpacked_src_tarball/hbase-assembly/target/hbase-*-bin.tar.gz | grep -v hadoop3 | wc -l) ]; then - echo '(x) {color:red}-1 testing binary artifact{color}\n-- source tarball did not produce the expected binaries.' >>output-srctarball/commentfile - exit 1 - fi - install_artifact=$(ls -1 "${WORKSPACE}"/unpacked_src_tarball/hbase-assembly/target/hbase-*-bin.tar.gz | grep -v client-bin | grep -v hadoop3) - tar --strip-component=1 -xzf "${install_artifact}" -C "hbase-install" - client_artifact=$(ls -1 "${WORKSPACE}"/unpacked_src_tarball/hbase-assembly/target/hbase-*-client-bin.tar.gz | grep -v hadoop3) - tar --strip-component=1 -xzf "${client_artifact}" -C "hbase-client" - if [ 2 -eq $(ls -1 "${WORKSPACE}"/unpacked_src_tarball/hbase-assembly/target/hbase-*-hadoop3-*-bin.tar.gz | wc -l) ]; then - echo "hadoop3 artifacts available, unpacking the hbase hadoop3 bin tarball into 'hbase-hadoop3-install' and the client hadoop3 tarball into 'hbase-hadoop3-client'" - mkdir hbase-hadoop3-install - mkdir hbase-hadoop3-client - hadoop3_install_artifact=$(ls -1 "${WORKSPACE}"/unpacked_src_tarball/hbase-assembly/target/hbase-*-hadoop3-*-bin.tar.gz | grep -v client-bin) - tar --strip-component=1 -xzf "${hadoop3_install_artifact}" -C "hbase-hadoop3-install" - hadoop3_client_artifact=$(ls -1 "${WORKSPACE}"/unpacked_src_tarball/hbase-assembly/target/hbase-*-hadoop3-*-client-bin.tar.gz) - tar --strip-component=1 -xzf "${hadoop3_client_artifact}" -C "hbase-hadoop3-client" - fi - ''' - unstash 'hadoop-2' - sh '''#!/bin/bash -xe - if [[ "${BRANCH}" == *"branch-2"* ]]; then - echo "Attempting to use run an instance on top of Hadoop 2." - artifact=$(ls -1 "${WORKSPACE}"/hadoop-2*.tar.gz | head -n 1) - tar --strip-components=1 -xzf "${artifact}" -C "hadoop-2" - docker build -t hbase-integration-test -f "${BASEDIR}/dev-support/docker/Dockerfile" . - docker run --rm -v "${WORKSPACE}":/hbase -v /etc/passwd:/etc/passwd:ro -v /etc/group:/etc/group:ro \ - -u `id -u`:`id -g` -e JAVA_HOME="/usr/lib/jvm/java-8" --workdir=/hbase hbase-integration-test \ - component/dev-support/hbase_nightly_pseudo-distributed-test.sh \ - --single-process \ - --working-dir output-integration/hadoop-2 \ - --hbase-client-install "hbase-client" \ - hbase-install \ - hadoop-2/bin/hadoop \ - hadoop-2/share/hadoop/yarn/timelineservice \ - hadoop-2/share/hadoop/yarn/test/hadoop-yarn-server-tests-*-tests.jar \ - hadoop-2/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-*-tests.jar \ - hadoop-2/bin/mapred \ - >output-integration/hadoop-2.log 2>&1 - if [ $? -ne 0 ]; then - echo "(x) {color:red}-1 client integration test{color}\n--Failed when running client tests on top of Hadoop 2. [see log for details|${BUILD_URL}/artifact/output-integration/hadoop-2.log]. (note that this means we didn't run on Hadoop 3)" >output-integration/commentfile - exit 2 - fi - echo "(/) {color:green}+1 client integration test for HBase 2 {color}" >output-integration/commentfile - else - echo "Skipping to run against Hadoop 2 for branch ${BRANCH}" - fi - ''' - script { - for (hadoop3_version in hadoop3_versions) { - env.HADOOP3_VERSION = hadoop3_version; - echo "env.HADOOP3_VERSION" + env.hadoop3_version; - stage ("packaging and integration Hadoop 3 inner stage ") { - unstash "hadoop-" + env.HADOOP3_VERSION - sh '''#!/bin/bash -e - echo "Attempting to use run an instance on top of Hadoop ${HADOOP3_VERSION}." - # Clean up any previous tested Hadoop3 files before unpacking the current one - rm -rf hadoop-3/* - # Create working dir - rm -rf "output-integration/hadoop-${HADOOP3_VERSION}" && mkdir "output-integration/hadoop-${HADOOP3_VERSION}" - rm -rf "output-integration/hadoop-${HADOOP3_VERSION}-shaded" && mkdir "output-integration/hadoop-${HADOOP3_VERSION}-shaded" - artifact=$(ls -1 "${WORKSPACE}"/hadoop-${HADOOP3_VERSION}-bin.tar.gz | head -n 1) - tar --strip-components=1 -xzf "${artifact}" -C "hadoop-3" - # we need to patch some files otherwise minicluster will fail to start, see MAPREDUCE-7471 - ${BASEDIR}/dev-support/patch-hadoop3.sh hadoop-3 - hbase_install_dir="hbase-install" - hbase_client_dir="hbase-client" - if [ -d "hbase-hadoop3-install" ]; then - echo "run hadoop3 client integration test against hbase hadoop3 binaries" - hbase_install_dir="hbase-hadoop3-install" - hbase_client_dir="hbase-hadoop3-client" - fi - docker build -t hbase-integration-test -f "${BASEDIR}/dev-support/docker/Dockerfile" . - docker run --rm -v "${WORKSPACE}":/hbase -v /etc/passwd:/etc/passwd:ro -v /etc/group:/etc/group:ro \ - -u `id -u`:`id -g` -e JAVA_HOME="/usr/lib/jvm/java-17" \ - -e HADOOP_OPTS="--add-opens java.base/java.lang=ALL-UNNAMED" \ - --workdir=/hbase hbase-integration-test \ - component/dev-support/hbase_nightly_pseudo-distributed-test.sh \ - --single-process \ - --working-dir output-integration/hadoop-${HADOOP3_VERSION} \ - --hbase-client-install ${hbase_client_dir} \ - ${hbase_install_dir} \ - hadoop-3/bin/hadoop \ - hadoop-3/share/hadoop/yarn/timelineservice \ - hadoop-3/share/hadoop/yarn/test/hadoop-yarn-server-tests-*-tests.jar \ - hadoop-3/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-*-tests.jar \ - hadoop-3/bin/mapred \ - >output-integration/hadoop-${HADOOP3_VERSION}.log 2>&1 - if [ $? -ne 0 ]; then - echo "(x) {color:red}-1 client integration test{color}\n--Failed when running client tests on top of Hadoop ${HADOOP3_VERSION}. [see log for details|${BUILD_URL}/artifact/output-integration/hadoop-${HADOOP3_VERSION}.log]. (note that this means we didn't check the Hadoop ${HADOOP3_VERSION} shaded client)" >> output-integration/commentfile - exit 2 - fi - echo "Attempting to use run an instance on top of Hadoop ${HADOOP3_VERSION}, relying on the Hadoop client artifacts for the example client program." - docker run --rm -v "${WORKSPACE}":/hbase -v /etc/passwd:/etc/passwd:ro -v /etc/group:/etc/group:ro \ - -u `id -u`:`id -g` -e JAVA_HOME="/usr/lib/jvm/java-17" \ - -e HADOOP_OPTS="--add-opens java.base/java.lang=ALL-UNNAMED" \ - --workdir=/hbase hbase-integration-test \ - component/dev-support/hbase_nightly_pseudo-distributed-test.sh \ - --single-process \ - --hadoop-client-classpath hadoop-3/share/hadoop/client/hadoop-client-api-*.jar:hadoop-3/share/hadoop/client/hadoop-client-runtime-*.jar \ - --working-dir output-integration/hadoop-${HADOOP3_VERSION}-shaded \ - --hbase-client-install ${hbase_client_dir} \ - ${hbase_install_dir} \ - hadoop-3/bin/hadoop \ - hadoop-3/share/hadoop/yarn/timelineservice \ - hadoop-3/share/hadoop/yarn/test/hadoop-yarn-server-tests-*-tests.jar \ - hadoop-3/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-*-tests.jar \ - hadoop-3/bin/mapred \ - >output-integration/hadoop-${HADOOP3_VERSION}-shaded.log 2>&1 - if [ $? -ne 0 ]; then - echo "(x) {color:red}-1 client integration test{color}\n--Failed when running client tests on top of Hadoop ${HADOOP3_VERSION} using Hadoop's shaded client. [see log for details|${BUILD_URL}/artifact/output-integration/hadoop-${HADOOP3_VERSION}-shaded.log]." >> output-integration/commentfile - exit 2 - fi - echo "(/) {color:green}+1 client integration test for ${HADOOP3_VERSION} {color}" >> output-integration/commentfile - ''' - } //stage ("packaging and integration Hadoop 3 inner stage ") - } //for - } // script - } //steps - post { - always { - sh '''#!/bin/bash -e - if [ ! -f "output-integration/commentfile" ]; then - echo "(x) {color:red}-1 source release artifact{color}\n-- Something went wrong with this stage, [check relevant console output|${BUILD_URL}/console]." >output-srctarball/commentfile - echo "(x) {color:red}-1 client integration test{color}\n-- Something went wrong with this stage, [check relevant console output|${BUILD_URL}/console]." >output-integration/commentfile - fi - ''' - stash name: 'srctarball-result', includes: "output-srctarball/commentfile,output-integration/commentfile" - sshPublisher(publishers: [ - sshPublisherDesc(configName: 'Nightlies', - transfers: [ - sshTransfer(remoteDirectory: "hbase/${JOB_NAME}/${BUILD_NUMBER}", - sourceFiles: "output-srctarball/hbase-src.tar.gz" - ) - ] - ) - ]) - // remove the big src tarball, store the nightlies url in hbase-src.html - sh '''#!/bin/bash -e - SRC_TAR="${WORKSPACE}/output-srctarball/hbase-src.tar.gz" - if [ -f "${SRC_TAR}" ]; then - echo "Remove ${SRC_TAR} for saving space" - rm -rf "${SRC_TAR}" - python3 ${BASEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_BASE}/output-srctarball" > "${WORKSPACE}/output-srctarball/hbase-src.html" - else - echo "No hbase-src.tar.gz, skipping" - fi - ''' - archiveArtifacts artifacts: 'output-srctarball/*' - archiveArtifacts artifacts: 'output-srctarball/**/*' - archiveArtifacts artifacts: 'output-integration/*' - archiveArtifacts artifacts: 'output-integration/**/*' - } //always - } //post - } //stage packaging } // parallel } //stage:_health checks } //stages @@ -1133,32 +707,18 @@ pipeline { rm -rf ${OUTPUT_DIR_RELATIVE_JDK8_HADOOP3} rm -rf ${OUTPUT_DIR_RELATIVE_JDK11_HADOOP3} rm -rf ${OUTPUT_DIR_RELATIVE_JDK17_HADOOP3} - rm -rf ${OUTPUT_DIR_RELATIVE_JDK17_HADOOP3_BACKWARDS}-* - rm -rf output-srctarball - rm -rf output-integration ''' unstash 'general-result' unstash 'jdk8-hadoop2-result' unstash 'jdk8-hadoop3-result' unstash 'jdk11-hadoop3-result' unstash 'jdk17-hadoop3-result' - unstash 'srctarball-result' def results = ["${env.OUTPUT_DIR_RELATIVE_GENERAL}/commentfile", "${env.OUTPUT_DIR_RELATIVE_JDK8_HADOOP2}/commentfile", "${env.OUTPUT_DIR_RELATIVE_JDK8_HADOOP3}/commentfile", "${env.OUTPUT_DIR_RELATIVE_JDK11_HADOOP3}/commentfile", "${env.OUTPUT_DIR_RELATIVE_JDK17_HADOOP3}/commentfile"] - for (hadoop3_version in hadoop3_versions) { - if (hadoop3_version == env.HADOOP3_DEFAULT_VERSION) { - // We haven't run these tests - continue - } - unstash("jdk17-hadoop3-backwards-result-${hadoop3_version}") - results.add("${env.OUTPUT_DIR_RELATIVE_JDK17_HADOOP3_BACKWARDS}-${hadoop3_version}/commentfile") - } - results.add('output-srctarball/commentfile') - results.add('output-integration/commentfile') echo env.BRANCH_NAME echo env.BUILD_URL echo currentBuild.result diff --git a/dev-support/create-release/README.txt b/dev-support/create-release/README.txt index f679a47cfb33..14c2b7d458db 100644 --- a/dev-support/create-release/README.txt +++ b/dev-support/create-release/README.txt @@ -32,7 +32,8 @@ to remove dry-run mode. Before starting the RC build, run a reconciliation of what is in JIRA with what is in the commit log. Make sure they align and that anomalies are -explained up in JIRA. See http://hbase.apache.org/book.html#maven.release +explained up in JIRA. +See https://hbase.apache.org/docs/building-and-developing/releasing#making-a-release-candidate for how. Regardless of where your release build will run (locally, locally in docker, diff --git a/dev-support/create-release/release-build.sh b/dev-support/create-release/release-build.sh index cc6a5818c19c..8fdf07eaba53 100755 --- a/dev-support/create-release/release-build.sh +++ b/dev-support/create-release/release-build.sh @@ -101,6 +101,8 @@ fi init_locale init_java +#set java 17 for spotless +set_java17_home init_mvn init_python # Print out subset of perl version (used in git hooks and japi-compliance-checker) diff --git a/dev-support/create-release/release-util.sh b/dev-support/create-release/release-util.sh index fd11ae853efb..02020d93de01 100755 --- a/dev-support/create-release/release-util.sh +++ b/dev-support/create-release/release-util.sh @@ -969,7 +969,7 @@ function get_hadoop3_version() { # case spotless:check failure, so we should run spotless:apply before committing function maven_spotless_apply() { # our spotless plugin version requires at least java 11 to run, so we use java 17 here - JAVA_HOME="/usr/lib/jvm/java-17-openjdk-amd64" "${MVN[@]}" spotless:apply + JAVA_HOME="${JAVA17_HOME}" "${MVN[@]}" spotless:apply } function git_add_poms() { diff --git a/dev-support/design-docs/Coprocessor_Design_Improvements-Use_composition_instead_of_inheritance-HBASE-17732.adoc b/dev-support/design-docs/Coprocessor_Design_Improvements-Use_composition_instead_of_inheritance-HBASE-17732.adoc index 2476f8a47825..8d588794efef 100644 --- a/dev-support/design-docs/Coprocessor_Design_Improvements-Use_composition_instead_of_inheritance-HBASE-17732.adoc +++ b/dev-support/design-docs/Coprocessor_Design_Improvements-Use_composition_instead_of_inheritance-HBASE-17732.adoc @@ -49,7 +49,7 @@ logic to internal code paths. [[background]] == Background -Coprocessors are well link:http://hbase.apache.org/book.html#cp[documented in the refguide]. +Coprocessors are well link:https://hbase.apache.org/docs/cp[documented in the refguide]. Here we give a little background information on involved classes, their responsibilities, and relationship to each other. @@ -59,7 +59,7 @@ relationship to each other. *** All *Observer* interfaces derive from Coprocessor interface. **** Coprocessor Interface is a _Marker _Interface. It just has start/stop methods and enums for stages in the Coprocessor Lifecycle. -** http://hbase.apache.org/book.html#_observer_coprocessors[Observers] (interface) +** https://hbase.apache.org/docs/cp#observer-coprocessors[Observers] (interface) *** Contain hooks which third-party programs can override to inject functionality in various internal code paths. For e.g preCreateTable(...) will be called just before any table is created. *** Current set of observers: _MasterObserver, RegionObserver, RegionServerObserver, WALObserver, diff --git a/dev-support/docker/Dockerfile b/dev-support/docker/Dockerfile index 26b2c35b3462..294fc272f1c9 100644 --- a/dev-support/docker/Dockerfile +++ b/dev-support/docker/Dockerfile @@ -24,6 +24,8 @@ FROM ubuntu:22.04 AS base_image SHELL ["/bin/bash", "-o", "pipefail", "-c"] +ARG NODE_VERSION=v20.15.0 + RUN DEBIAN_FRONTEND=noninteractive apt-get -qq update && \ DEBIAN_FRONTEND=noninteractive apt-get -qq install --no-install-recommends -y \ ca-certificates=20211016 \ @@ -55,6 +57,7 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get -qq update && \ shellcheck='0.8.0-*' \ libxml2-dev='2.9.13+dfsg-*' \ libxml2-utils='2.9.13+dfsg-*' \ + zip='3.0-*' \ && \ apt-get clean && \ rm -rf /var/lib/apt/lists/* \ @@ -70,6 +73,14 @@ RUN DEBIAN_FRONTEND=noninteractive apt-get -qq update && \ locale-gen en_US.UTF-8 ENV LANG=en_US.UTF-8 LANGUAGE=en_US:en LC_ALL=en_US.UTF-8 +# Install Node.js. Needed for Playwright. +RUN curl -fsSL "https://nodejs.org/dist/$NODE_VERSION/node-$NODE_VERSION-linux-x64.tar.gz" -o node.tar.gz \ + && tar -xzf node.tar.gz -C /usr/local --strip-components=1 \ + && rm node.tar.gz + +# Install Playwright dependencies. Needed for docs PDF export and docs UI e2e tests. +RUN npx -y playwright install --with-deps + ## # download sundry dependencies # diff --git a/dev-support/flaky-tests/python-requirements.txt b/dev-support/flaky-tests/python-requirements.txt index 75952d259116..b5e8c120440f 100644 --- a/dev-support/flaky-tests/python-requirements.txt +++ b/dev-support/flaky-tests/python-requirements.txt @@ -15,8 +15,8 @@ # See the License for the specific language governing permissions and # limitations under the License. # -requests==2.32.4 +requests==2.33.0 future==0.18.3 -gitpython==3.1.41 +gitpython==3.1.47 rbtools==4.0 jinja2==3.1.6 diff --git a/dev-support/flaky-tests/report-flakies.py b/dev-support/flaky-tests/report-flakies.py index 16096e3344a5..33e9a12f0d5e 100755 --- a/dev-support/flaky-tests/report-flakies.py +++ b/dev-support/flaky-tests/report-flakies.py @@ -54,6 +54,11 @@ parser.add_argument('--is-yetus', metavar='True/False', action='append', choices=['True', 'False'], help='True, if build is yetus style i.e. look for maven output in artifacts; ' 'False, if maven output is in /consoleText itself.') +parser.add_argument('--excludes-threshold-flakiness', metavar='n', type=float, default=20.0, + required=False, help='Flakiness threshold for adding a test to excludes file') +parser.add_argument('--excludes-threshold-runs', metavar='n', type=int, default=10, + required=False, + help='The times of a test should run before it can be added to excludes file') parser.add_argument( "--mvn", action="store_true", help="Writes two strings for including/excluding these flaky tests using maven flags. These " @@ -149,7 +154,6 @@ def expand_multi_config_projects(cli_args): 'excludes': excluded_builds, 'is_yetus': is_yetus}) return final_expanded_urls - # Set of timeout/failed tests across all given urls. all_timeout_tests = set() all_failed_tests = set() @@ -160,6 +164,8 @@ def expand_multi_config_projects(cli_args): # Contains { : [run_ids] } # Used for common min/max build ids when generating sparklines. url_to_build_ids = OrderedDict() +all_flaky_results = {} + # Iterates over each url, gets test results and prints flaky tests. expanded_urls = expand_multi_config_projects(args) @@ -205,36 +211,46 @@ def expand_multi_config_projects(cli_args): bad_tests.update(failed_tests.union(hanging_tests)) # For each bad test, get build ids where it ran, timed out, failed or hanged. - test_to_build_ids = {key : {'all' : set(), 'timeout': set(), 'failed': set(), - 'hanging' : set(), 'bad_count' : 0} + test_to_build_ids = {key: {'all': set(), 'timeout': set(), 'failed': set(), + 'hanging': set(), 'bad_count': 0} for key in bad_tests} + for build in build_id_to_results: [all_tests, failed_tests, timeout_tests, hanging_tests] = build_id_to_results[build] - for bad_test in test_to_build_ids: + for bad_test, test_result in test_to_build_ids.items(): is_bad = False if all_tests.issuperset([bad_test]): - test_to_build_ids[bad_test]["all"].add(build) + test_result["all"].add(build) if timeout_tests.issuperset([bad_test]): - test_to_build_ids[bad_test]['timeout'].add(build) + test_result['timeout'].add(build) is_bad = True if failed_tests.issuperset([bad_test]): - test_to_build_ids[bad_test]['failed'].add(build) + test_result['failed'].add(build) is_bad = True if hanging_tests.issuperset([bad_test]): - test_to_build_ids[bad_test]['hanging'].add(build) + test_result['hanging'].add(build) is_bad = True if is_bad: - test_to_build_ids[bad_test]['bad_count'] += 1 + test_result['bad_count'] += 1 # Calculate flakyness % and successful builds for each test. Also sort build ids. - for bad_test in test_to_build_ids: - test_result = test_to_build_ids[bad_test] + for bad_test, test_result in test_to_build_ids.items(): test_result['flakyness'] = test_result['bad_count'] * 100.0 / len(test_result['all']) test_result['success'] = (test_result['all'].difference( test_result['failed'].union(test_result['hanging']))) for key in ['all', 'timeout', 'failed', 'hanging', 'success']: test_result[key] = sorted(test_result[key]) - + # record flaky test result + # record the one with more runs, or greater flakiness if runs are equal + if bad_test not in all_flaky_results: + all_flaky_results[bad_test] = {'runs': len(test_result['all']), + 'flakyness': test_result['flakyness']} + elif all_flaky_results[bad_test]['runs'] < len(test_result['all']): + all_flaky_results[bad_test] = {'runs': len(test_result['all']), + 'flakyness': test_result['flakyness']} + elif all_flaky_results[bad_test]['runs'] == len(test_result['all']) and \ + all_flaky_results[bad_test]['flakyness'] < test_result['flakyness']: + all_flaky_results[bad_test]['flakyness'] = test_result['flakyness'] # Sort tests in descending order by flakyness. sorted_test_to_build_ids = OrderedDict( @@ -260,14 +276,21 @@ def expand_multi_config_projects(cli_args): print("Builds without any test runs: {}".format(build_ids_without_tests_run)) print("") - all_bad_tests = all_hanging_tests.union(all_failed_tests) if args.mvn: includes = ",".join(all_bad_tests) with open(output_dir + "/includes", "w") as inc_file: inc_file.write(includes) - excludes = ["**/{0}.java".format(bad_test) for bad_test in all_bad_tests] + excludes = [] + for bad_test in all_bad_tests: + if bad_test not in all_flaky_results: + print(f"No flaky record found for {bad_test}") + continue + test_result = all_flaky_results[bad_test] + if test_result['flakyness'] > args.excludes_threshold_flakiness and \ + test_result['runs'] >= args.excludes_threshold_runs: + excludes.append(f"**/{bad_test}.java") with open(output_dir + "/excludes", "w") as exc_file: exc_file.write(",".join(excludes)) @@ -283,5 +306,5 @@ def expand_multi_config_projects(cli_args): with open(output_dir + "/dashboard.html", "w") as f: datetime = time.strftime("%m/%d/%Y %H:%M:%S") - f.write(template.render(datetime=datetime, bad_tests_count=len(all_bad_tests), + f.write(template.render(datetime=datetime, bad_tests_count=len(bad_tests), results=url_to_bad_test_results, build_ids=url_to_build_ids)) diff --git a/dev-support/generate-website/build-hbase-website.sh b/dev-support/generate-website/build-hbase-website.sh new file mode 100755 index 000000000000..4e28ff757de4 --- /dev/null +++ b/dev-support/generate-website/build-hbase-website.sh @@ -0,0 +1,250 @@ +#!/bin/bash +# +#/** +# * Licensed to the Apache Software Foundation (ASF) under one +# * or more contributor license agreements. See the NOTICE file +# * distributed with this work for additional information +# * regarding copyright ownership. The ASF licenses this file +# * to you under the Apache License, Version 2.0 (the +# * "License"); you may not use this file except in compliance +# * with the License. You may obtain a copy of the License at +# * +# * http://www.apache.org/licenses/LICENSE-2.0 +# * +# * Unless required by applicable law or agreed to in writing, software +# * distributed under the License is distributed on an "AS IS" BASIS, +# * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# * See the License for the specific language governing permissions and +# * limitations under the License. +# */ + +# This script is meant to run as part of a Jenkins job such as +# https://builds.apache.org/job/hbase_generate_website/ + +set -e +function usage { + echo "Usage: ${0} [options] /path/to/hbase/checkout" + echo "" + echo " --working-dir /path/to/use Path for writing logs and a local checkout of hbase-site repo." + echo " if given must exist." + echo " defaults to making a directory via mktemp." + echo " --local-repo /path/for/maven/.m2 Path for putting local maven repo." + echo " if given must exist." + echo " defaults to making a clean directory in --working-dir." + echo " --help show this usage message." + exit 1 +} +# if no args specified, show usage +if [ $# -lt 1 ]; then + usage +fi + +# Get arguments +declare component_dir +declare working_dir +declare local_repo +while [ $# -gt 0 ] +do + case "$1" in + --working-dir) shift; working_dir=$1; shift;; + --local-repo) shift; local_repo=$1; shift;; + --) shift; break;; + -*) usage ;; + *) break;; # terminate while loop + esac +done + +# should still have where component checkout is. +if [ $# -lt 1 ]; then + usage +fi + +MVN="mvn" +if ! command -v mvn &>/dev/null; then + MVN=$MAVEN_HOME/bin/mvn +fi + +component_dir="$(cd "$(dirname "$1")"; pwd)/$(basename "$1")" + +if [ -z "${working_dir}" ]; then + echo "[DEBUG] defaulting to creating a directory via mktemp" + if ! working_dir="$(mktemp -d -t hbase-generate-website)" ; then + echo "Failed to create temporary working directory. Please specify via --working-dir" + exit 1 + fi +else + # absolutes please + working_dir="$(cd "$(dirname "${working_dir}")"; pwd)/$(basename "${working_dir}")" + if [ ! -d "${working_dir}" ]; then + echo "passed working directory '${working_dir}' must already exist." + exit 1 + fi +fi + +echo "You'll find logs and temp files in ${working_dir}" + +if [ -z "${local_repo}" ]; then + echo "[DEBUG] defaulting to creating a local repo within '${working_dir}'" + local_repo="${working_dir}/.m2/repo" + # Nuke the local maven repo each time, to start with a known environment + rm -Rf "${local_repo}" + mkdir -p "${local_repo}" +else + # absolutes please + local_repo="$(cd "$(dirname "${local_repo}")"; pwd)/$(basename "${local_repo}")" + if [ ! -d "${local_repo}" ]; then + echo "passed directory for storing the maven repo '${local_repo}' must already exist." + exit 1 + fi +fi + +export MAVEN_OPTS="${MAVEN_OPTS} -Dmaven.repo.local=${local_repo}" + +# Verify the Maven version +${MVN} -version +# Verify the git version +git --version + +cd "${working_dir}" + +# Clean any leftover files in case we are reusing the workspace +rm -Rf -- *.patch *.patch.zip target *.txt hbase-site + +# Save and print the SHA we are building +CURRENT_HBASE_COMMIT="$(cd "${component_dir}" && git rev-parse HEAD)" +# Fail if it's empty +if [ -z "${CURRENT_HBASE_COMMIT}" ]; then + echo "Got back a blank answer for the current HEAD. failing." + exit 1 +fi +echo "Current HBase commit: $CURRENT_HBASE_COMMIT" + +# Clone the hbase-site repo manually so it doesn't trigger spurious +# commits in Jenkins. +git clone --depth 1 --branch asf-site https://gitbox.apache.org/repos/asf/hbase-site.git + +# Figure out if the commit of the hbase repo has already been built and bail if so. +declare -i PUSHED +PUSHED=$(cd hbase-site && git rev-list --grep "${CURRENT_HBASE_COMMIT}" --fixed-strings --count HEAD) +echo "[DEBUG] hash was found in $PUSHED commits for hbase-site repository." + +if [ "${PUSHED}" -ne 0 ]; then + echo "$CURRENT_HBASE_COMMIT is already mentioned in the hbase-site commit log. Not building." + exit 0 +else + echo "$CURRENT_HBASE_COMMIT is not yet mentioned in the hbase-site commit log. Assuming we don't have it yet." +fi + +# Go to the hbase directory so we can build the site +cd "${component_dir}" + +# This will only be set for builds that are triggered by SCM change, not manual builds +if [ -n "$CHANGE_ID" ]; then + echo -n " ($CHANGE_ID - $CHANGE_TITLE)" +fi + +# Build and install HBase, then build the site +echo "Building HBase" +# TODO we have to do a local install first because for whatever reason, the maven-javadoc-plugin's +# forked compile phase requires that test-scoped dependencies be available, which +# doesn't work since we will not have done a test-compile phase (MJAVADOC-490). the first place this +# breaks for me is hbase-server trying to find hbase-http:test and hbase-zookeeper:test. +# But! some sunshine: because we're doing a full install before running site, we can skip all the +# compiling in the forked executions. We have to do it awkwardly because MJAVADOC-444. +if ${MVN} \ + --batch-mode \ + -Psite-install-step \ + --errors \ + --log-file="${working_dir}/hbase-install-log-${CURRENT_HBASE_COMMIT}.txt" \ + clean install \ + && ${MVN} site \ + --batch-mode \ + -Dscala.skip=true \ + -Psite-build-step \ + --errors \ + --log-file="${working_dir}/hbase-site-log-${CURRENT_HBASE_COMMIT}.txt"; then + echo "Successfully built site." +else + status=$? + echo "Maven commands to build the site failed. check logs for details ${working_dir}/hbase-*-log-*.txt" + exit $status +fi + +# Stage the site +echo "Staging HBase site" +${MVN} \ + --batch-mode \ + --errors \ + --log-file="${working_dir}/hbase-stage-log-${CURRENT_HBASE_COMMIT}.txt" \ + site:stage +status=$? +if [ $status -ne 0 ] || [ ! -d target/staging ]; then + echo "Failure: mvn site:stage" + exit $status +fi + +# Get ready to update the hbase-site repo with the new artifacts +cd "${working_dir}/hbase-site" + +#Remove previously-generated files +FILES_TO_REMOVE=("hbase-*" + "apidocs" + "devapidocs" + "testapidocs" + "testdevapidocs" + "xref" + "xref-test" + "*book*" + "*.html" + "*.pdf*" + "css" + "js" + "images") + +for FILE in "${FILES_TO_REMOVE[@]}"; do + if [ -e "${FILE}" ]; then + echo "Removing hbase-site/$FILE" + rm -Rf "${FILE}" + fi +done + +# Copy in the newly-built artifacts +# First copy documentation from Maven site build +echo "Copying documentation from target/staging" +# TODO what do we do when the site build wants to remove something? Can't rsync because e.g. release-specific docs. +cp -pPR "${component_dir}"/target/staging/* . + +# Then copy the new website (landing page) from hbase-website/build/client +echo "Copying new website from hbase-website/build/client" +cp -pPR "${component_dir}"/hbase-website/build/client/* . + +# If the index.html is missing, bail because this is serious +if [ ! -f index.html ]; then + echo "The index.html is missing. Aborting." + exit 1 +fi + +echo "Adding all the files we know about" +git add . +if [[ -z "$(git status --porcelain)" ]]; then + echo "No files to commit, skipping..." + exit 0 +fi +# Create the commit message and commit the changes +WEBSITE_COMMIT_MSG="Published site at $CURRENT_HBASE_COMMIT." +echo "WEBSITE_COMMIT_MSG: $WEBSITE_COMMIT_MSG" +git commit -m "${WEBSITE_COMMIT_MSG}" -a +# Dump a little report +echo "This commit changed these files (excluding Modified files):" +git diff --name-status --diff-filter=ADCRTXUB origin/asf-site | tee "${working_dir}/hbase-file-diff-summary-${CURRENT_HBASE_COMMIT}.txt" +# Create a patch, which Jenkins can save as an artifact and can be examined for debugging +git format-patch --stdout origin/asf-site > "${working_dir}/${CURRENT_HBASE_COMMIT}.patch" +if [ ! -s "${working_dir}/${CURRENT_HBASE_COMMIT}.patch" ]; then + echo "Something went wrong when creating the patch of our updated site." + exit 1 +fi +echo "Change set saved to patch ${working_dir}/${CURRENT_HBASE_COMMIT}.patch" + +# Zip up the patch so Jenkins can save it +cd "${working_dir}" +zip website.patch.zip "${CURRENT_HBASE_COMMIT}.patch" diff --git a/dev-support/generate-website/generate-hbase-website.Jenkinsfile b/dev-support/generate-website/generate-hbase-website.Jenkinsfile new file mode 100644 index 000000000000..2fdccf0c4f2e --- /dev/null +++ b/dev-support/generate-website/generate-hbase-website.Jenkinsfile @@ -0,0 +1,137 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +pipeline { + agent { + node { + label 'hbase' + } + } + triggers { + pollSCM('@daily') + } + options { + buildDiscarder(logRotator(numToKeepStr: '30')) + timeout (time: 1, unit: 'HOURS') + timestamps() + skipDefaultCheckout() + disableConcurrentBuilds() + } + parameters { + booleanParam(name: 'DEBUG', defaultValue: false, description: 'Produce a lot more meta-information.') + booleanParam(name: 'FORCE_FAIL', defaultValue: false, description: 'force a failure to test notifications.') + } + stages { + stage ('build hbase website') { + steps { + dir('component') { + checkout scm + } + sh '''#!/bin/bash -e + if [ "${DEBUG}" = "true" ]; then + set -x + fi + if [ "${FORCE_FAIL}" = "true" ]; then + false + fi + user=$(whoami) + docker build -t hbase-build-website -f "${WORKSPACE}/component/dev-support/docker/Dockerfile" . + docker run --rm -v "${WORKSPACE}":/home/${user} -v /etc/passwd:/etc/passwd:ro -v /etc/group:/etc/group:ro \ + -u `id -u`:`id -g` -e JAVA_HOME="/usr/lib/jvm/java-17" -e GIT_AUTHOR_NAME="HBase" \ + -e GIT_AUTHOR_EMAIL="dev@hbase.apache.org" -e GIT_COMMITTER_NAME="HBase" \ + -e GIT_COMMITTER_EMAIL="dev@hbase.apache.org" --workdir=/home/${user} hbase-build-website \ + "component/dev-support/generate-website/build-hbase-website.sh" \ + --working-dir /home/${user} component + ''' + script { + if (fileExists('website.patch.zip')) { + sh'''#!/bin/bash -e + patch=$(ls -1 *.patch | head -n 1) + echo "Has patch ${patch}, stash and then publish" + ''' + stash name: 'patch', includes: "*.patch" + env.PUBLISH_WEBSITE = "true" + } else { + echo "No patch file, skip stashing and publishing" + env.PUBLISH_WEBSITE = "false" + } + } + } + } + stage('publish hbase website') { + agent { + node { + label 'git-websites' + } + } + when { + expression { + return env.PUBLISH_WEBSITE == 'true' + } + } + steps { + sh '''#!/bin/bash -e + # wipe out stall repo and files + rm -rf *.patch + rm -rf hbase-site + ''' + unstash 'patch' + sh '''#!/bin/bash -e + git clone --depth 1 --branch asf-site https://gitbox.apache.org/repos/asf/hbase-site.git + patch=$(ls -1 *.patch | head -n 1) + cd hbase-site; + echo "applying ${patch}" + git am ../${patch} + echo "Publishing changes to remote repo..." + if git push origin asf-site; then + echo "changes pushed." + else + echo "Failed to push to asf-site. Website not updated." + exit 1 + fi + echo "Sending empty commit to work around INFRA-10751." + git commit --allow-empty -m "INFRA-10751 Empty commit" + # Push the empty commit + if git push origin asf-site; then + echo "empty commit pushed." + else + echo "Failed to push the empty commit to asf-site. Website may not update. Manually push an empty commit to fix this. (See INFRA-10751)" + exit 1 + fi + echo "Pushed the changes to branch asf-site. Refresh http://hbase.apache.org/ to see the changes within a few minutes." + ''' + } + } + } + post { + always { + // Has to be relative to WORKSPACE. + archiveArtifacts artifacts: '*.patch.zip,hbase-*.txt' + } + failure { + mail to: 'dev@hbase.apache.org', replyTo: 'dev@hbase.apache.org', subject: "Failure: HBase Generate Website", body: """ +Build status: ${currentBuild.currentResult} + +The HBase website has not been updated to incorporate recent HBase changes. + +See ${env.BUILD_URL}console +""" + } + cleanup { + deleteDir() + } + } +} diff --git a/dev-support/gh_hide_old_comments.sh b/dev-support/gh_hide_old_comments.sh index 61217cfa241b..abba55a39ad2 100755 --- a/dev-support/gh_hide_old_comments.sh +++ b/dev-support/gh_hide_old_comments.sh @@ -40,25 +40,79 @@ declare CURL="${CURL:-curl}" function fetch_comments { local pr="$1" local comments_file + local page_file + local headers_file local -a curl_args - curl_args=( - --fail - "${GITHUB_AUTH[@]}" - --header 'Accept: application/vnd.github+json' - --header 'X-GitHub-Api-Version: 2022-11-28' - --request GET - --url "${GITHUB_API_URL}/repos/${REPO}/issues/${pr}/comments?per_page=500" - ) - if [ "${DEBUG}" = true ] ; then - curl_args+=(--verbose) - else - curl_args+=(--silent) - fi + local page=1 + local next_url comments_file="$(mktemp "comments_${pr}" 2>/dev/null || mktemp -t "comments_${pr}.XXXXXXXXXX")" || \ { >&2 echo 'cannot create temp file'; exit 1 ;} - "${CURL}" "${curl_args[@]}" > "${comments_file}" + page_file="$(mktemp "page_${pr}" 2>/dev/null || mktemp -t "page_${pr}.XXXXXXXXXX")" || \ + { >&2 echo 'cannot create temp file'; exit 1 ;} + headers_file="$(mktemp "headers_${pr}" 2>/dev/null || mktemp -t "headers_${pr}.XXXXXXXXXX")" || \ + { >&2 echo 'cannot create temp file'; exit 1 ;} + + # cleanup temp files on error + trap 'rm -f "${page_file}" "${headers_file}"; exit 1' ERR + + next_url="${GITHUB_API_URL}/repos/${REPO}/issues/${pr}/comments?per_page=100" + + # start with empty JSON array + echo '[]' > "${comments_file}" + + while [ -n "${next_url}" ] ; do + curl_args=( + --fail + --max-time 30 + "${GITHUB_AUTH[@]}" + --header 'Accept: application/vnd.github+json' + --header 'X-GitHub-Api-Version: 2022-11-28' + --dump-header "${headers_file}" + --request GET + --url "${next_url}" + ) + if [ "${DEBUG}" = true ] ; then + curl_args+=(--verbose) + >&2 echo "Fetching page ${page}: ${next_url}" + else + curl_args+=(--silent) + fi + + if ! "${CURL}" "${curl_args[@]}" > "${page_file}"; then + >&2 echo "Failed to fetch page ${page}: ${next_url}" + rm -f "${page_file}" "${headers_file}" + exit 1 + fi + + if [ "${DEBUG}" = 'true' ] ; then + >&2 echo "Page ${page} returned $(jq length "${page_file}") comments" + fi + + # merge this page into the accumulated results + if ! jq -s '.[0] + .[1]' "${comments_file}" "${page_file}" > "${comments_file}.tmp"; then + >&2 echo "Failed to merge comments from page ${page}" + rm -f "${page_file}" "${headers_file}" "${comments_file}.tmp" + exit 1 + fi + mv "${comments_file}.tmp" "${comments_file}" + + # check for next page in Link header + # Link header format: ; rel="next", ; rel="last" + # Extract URL associated with rel="next" regardless of position + next_url="" + if grep -qi '^link:' "${headers_file}" ; then + next_url=$(grep -i '^link:' "${headers_file}" | tr ',' '\n' | grep 'rel="next"' | sed -n 's/.*<\([^>]*\)>.*/\1/p' || true) + fi + + page=$((page + 1)) + done + + rm -f "${page_file}" "${headers_file}" + trap - ERR + if [ "${DEBUG}" = 'true' ] ; then + >&2 echo "Total comments fetched: $(jq length "${comments_file}")" >&2 cat "${comments_file}" fi echo "${comments_file}" @@ -104,11 +158,16 @@ function identify_most_recent_build_number { local pr="$1" local comments_file="$2" local jq_filter + local url_pattern="${JOB_NAME}/job/PR-${pr}/(?[0-9]+)/" + # GitHub Actions URLs don't have /job/ in them + if [[ "${JOB_NAME}" == *"GH-Actions"* ]]; then + url_pattern="${JOB_NAME}/PR-${pr}/(?[0-9]+)/" + fi read -r -d '' jq_filter << EOF || : .[] \ | select(.user.id == ${BUILD_BOT_USER_ID}) \ | .body \ -| capture("${JOB_NAME}/job/PR-${pr}/(?[0-9]+)/") \ +| capture("${url_pattern}") \ | .buildnum EOF @@ -122,10 +181,15 @@ function identify_old_comment_ids { local comments_file="$2" local most_recent_build_number="$3" local jq_filter + local url_pattern="${JOB_NAME}/job/PR-${pr}/(?[0-9]+)/" + # GitHub Actions URLs don't have /job/ in them + if [[ "${JOB_NAME}" == *"GH-Actions"* ]]; then + url_pattern="${JOB_NAME}/PR-${pr}/(?[0-9]+)/" + fi read -r -d '' jq_filter << EOF || : .[] \ | select(.user.id == ${BUILD_BOT_USER_ID}) \ -| { node_id, buildnum: (.body | capture("${JOB_NAME}/job/PR-${pr}/(?[0-9]+)/") | .buildnum | tonumber) } \ +| { node_id, buildnum: (.body | capture("${url_pattern}") | .buildnum | tonumber) } \ | select(.buildnum < (${most_recent_build_number} | tonumber)) \ | .node_id EOF diff --git a/dev-support/git-jira-release-audit/requirements.txt b/dev-support/git-jira-release-audit/requirements.txt index 8eb0eb04305f..7aaaacf23aa5 100644 --- a/dev-support/git-jira-release-audit/requirements.txt +++ b/dev-support/git-jira-release-audit/requirements.txt @@ -19,21 +19,21 @@ blessed==1.17.0 certifi==2024.7.4 cffi==1.13.2 chardet==3.0.4 -cryptography==44.0.1 +cryptography==46.0.7 defusedxml==0.6.0 enlighten==1.4.0 gitdb2==2.0.6 -GitPython==3.1.41 +GitPython==3.1.47 idna==3.7 jira==2.0.0 oauthlib==3.1.0 pbr==5.4.4 pycparser==2.19 -PyJWT==2.4.0 -requests==2.32.4 +PyJWT==2.12.0 +requests==2.33.0 requests-oauthlib==1.3.0 requests-toolbelt==0.9.1 six==1.14.0 smmap2==2.0.5 -urllib3==2.5.0 +urllib3==2.6.3 wcwidth==0.1.8 diff --git a/dev-support/hadoop3-backwards-compatibility-check.Jenkinsfile b/dev-support/hadoop3-backwards-compatibility-check.Jenkinsfile new file mode 100644 index 000000000000..8e8eff8ded30 --- /dev/null +++ b/dev-support/hadoop3-backwards-compatibility-check.Jenkinsfile @@ -0,0 +1,326 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +// Jenkinsfile for Hadoop3 Backwards Compatibility Checks +// Uses matrix job to parallelize checks across different Hadoop3 versions + +pipeline { + agent { + node { + label 'hbase' + } + } + triggers { + pollSCM('H H */2 * *') + } + options { + buildDiscarder(logRotator(numToKeepStr: '20')) + timeout (time: 8, unit: 'HOURS') + timestamps() + skipDefaultCheckout() + disableConcurrentBuilds() + } + environment { + YETUS_RELEASE = '0.15.0' + HADOOP_VERSIONS = "3.2.4,3.3.5,3.3.6,3.4.0,3.4.1,3.4.2" + } + parameters { + booleanParam(name: 'USE_YETUS_PRERELEASE', defaultValue: false, description: '''Check to use the current HEAD of apache/yetus rather than our configured release. + + Should only be used manually when e.g. there is some non-work-aroundable issue in yetus we are checking a fix for.''') + booleanParam(name: 'DEBUG', defaultValue: false, description: 'Produce a lot more meta-information.') + } + stages { + stage ('scm-checkout') { + steps { + dir('component') { + checkout scm + } + } + } + stage ('thirdparty installs') { + parallel { + stage ('yetus install') { + steps { + dir('downloads-yetus') { + sh '''#!/usr/bin/env bash + echo "Make sure we have a directory for downloading dependencies: $(pwd)" + ''' + } + sh '''#!/usr/bin/env bash + set -e + echo "Ensure we have a copy of Apache Yetus." + if [[ true != "${USE_YETUS_PRERELEASE}" ]]; then + YETUS_DIR="${WORKSPACE}/yetus-${YETUS_RELEASE}" + echo "Checking for Yetus ${YETUS_RELEASE} in '${YETUS_DIR}'" + if ! "${YETUS_DIR}/bin/test-patch" --version >/dev/null 2>&1 ; then + rm -rf "${YETUS_DIR}" + "${WORKSPACE}/component/dev-support/jenkins-scripts/cache-apache-project-artifact.sh" \ + --working-dir "${WORKSPACE}/downloads-yetus" \ + --keys 'https://downloads.apache.org/yetus/KEYS' \ + --verify-tar-gz \ + "${WORKSPACE}/yetus-${YETUS_RELEASE}-bin.tar.gz" \ + "yetus/${YETUS_RELEASE}/apache-yetus-${YETUS_RELEASE}-bin.tar.gz" + mv "yetus-${YETUS_RELEASE}-bin.tar.gz" yetus.tar.gz + else + echo "Reusing cached install of Apache Yetus version ${YETUS_RELEASE}." + fi + else + YETUS_DIR="${WORKSPACE}/yetus-git" + rm -rf "${YETUS_DIR}" + echo "downloading from github" + curl -L --fail https://api.github.com/repos/apache/yetus/tarball/HEAD -o yetus.tar.gz + fi + if [ ! -d "${YETUS_DIR}" ]; then + echo "unpacking yetus into '${YETUS_DIR}'" + mkdir -p "${YETUS_DIR}" + gunzip -c yetus.tar.gz | tar xpf - -C "${YETUS_DIR}" --strip-components 1 + fi + ''' + stash name: 'yetus', includes: "yetus-*/*,yetus-*/**/*" + } + } + } + } + stage ('backwards compatibility checks') { + matrix { + axes { + axis { + name 'HADOOP3_VERSION' + values '3.2.4', '3.3.5', '3.3.6', '3.4.0', '3.4.1', '3.4.2' + } + } + agent { + node { + label 'hbase' + } + } + when { + expression { + if (HADOOP3_VERSION == '3.2.4') { + // only branch-2.5 need to run against hadoop 3.2.4, here we also includes + // HBASE-XXXXX-branch-2.5 feature branch + return env.BRANCH_NAME.contains('branch-2.5') + } + return true + } + } + environment { + PROJECT = 'hbase' + BASEDIR = "${WORKSPACE}/component" + PERSONALITY_FILE = "${BASEDIR}/dev-support/hbase-personality.sh" + TESTS_FILTER = 'checkstyle,javac,javadoc,pylint,shellcheck,shelldocs,blanks,perlcritic,ruby-lint,rubocop' + EXCLUDE_TESTS_URL = "${JENKINS_URL}/job/HBase-Find-Flaky-Tests/job/${BRANCH_NAME}/lastSuccessfulBuild/artifact/output/excludes" + ASF_NIGHTLIES = 'https://nightlies.apache.org' + ASF_NIGHTLIES_BASE_ORI = "${ASF_NIGHTLIES}/hbase/${JOB_NAME}/${BUILD_NUMBER}" + ASF_NIGHTLIES_BASE = "${ASF_NIGHTLIES_BASE_ORI.replaceAll(' ', '%20')}" + TESTS = 'compile,htmlout,javac,maven,mvninstall,shadedjars,unit' + SET_JAVA_HOME = "/usr/lib/jvm/java-17" + HADOOP_PROFILE = '3.0' + TEST_PROFILE = 'runDevTests' + SKIP_ERRORPRONE = true + OUTPUT_DIR_RELATIVE = "output-jdk17-hadoop3-backwards-${HADOOP3_VERSION}" + OUTPUT_DIR = "${WORKSPACE}/${OUTPUT_DIR_RELATIVE}" + AUTHOR_IGNORE_LIST = 'src/main/asciidoc/_chapters/developer.adoc' + BLANKS_EOL_IGNORE_FILE = 'dev-support/blanks-eol-ignore.txt' + BLANKS_TABS_IGNORE_FILE = 'dev-support/blanks-tabs-ignore.txt' + // output from surefire; sadly the archive function in yetus only works on file names. + ARCHIVE_PATTERN_LIST = 'TEST-*.xml,org.apache.h*.txt,*.dumpstream,*.dump' + } + stages { + stage ('run checks') { + steps { + sh '''#!/usr/bin/env bash + set -e + rm -rf "${OUTPUT_DIR}" && mkdir "${OUTPUT_DIR}" + rm -f "${OUTPUT_DIR}/commentfile" + ''' + unstash 'yetus' + dir('component') { + checkout scm + } + sh '''#!/usr/bin/env bash + set -e + rm -rf "${OUTPUT_DIR}/machine" && mkdir "${OUTPUT_DIR}/machine" + "${BASEDIR}/dev-support/gather_machine_environment.sh" "${OUTPUT_DIR_RELATIVE}/machine" + echo "got the following saved stats in '${OUTPUT_DIR_RELATIVE}/machine'" + ls -lh "${OUTPUT_DIR_RELATIVE}/machine" + ''' + script { + def ret = sh( + returnStatus: true, + script: '''#!/usr/bin/env bash + set -e + declare -i status=0 + if "${BASEDIR}/dev-support/hbase_nightly_yetus.sh" ; then + echo "(/) {color:green}+1 jdk17 hadoop ${HADOOP3_VERSION} backward compatibility checks{color}" > "${OUTPUT_DIR}/commentfile" + else + echo "(x) {color:red}-1 jdk17 hadoop ${HADOOP3_VERSION} backward compatibility checks{color}" > "${OUTPUT_DIR}/commentfile" + status=1 + fi + echo "-- For more information [see jdk17 report|${BUILD_URL}console]" >> "${OUTPUT_DIR}/commentfile" + exit "${status}" + ''' + ) + if (ret != 0) { + currentBuild.result = 'UNSTABLE' + } + } + } + } + } + post { + always { + script { + stash name: "jdk17-hadoop3-backwards-result-${HADOOP3_VERSION}", includes: "${OUTPUT_DIR_RELATIVE}/commentfile" + junit testResults: "${env.OUTPUT_DIR_RELATIVE}/**/target/**/TEST-*.xml", allowEmptyResults: true + // zip surefire reports. + sh '''#!/bin/bash -e + if [ ! -f "${OUTPUT_DIR}/commentfile" ]; then + echo "(x) {color:red}-1 jdk17 hadoop ${HADOOP3_VERSION} backward compatibility checks{color}" >"${OUTPUT_DIR}/commentfile" + echo "-- Something went wrong running this stage, please [check relevant console output|${BUILD_URL}/console]." >> "${OUTPUT_DIR}/commentfile" + fi + if [ -d "${OUTPUT_DIR}/archiver" ]; then + count=$(find "${OUTPUT_DIR}/archiver" -type f | wc -l) + if [[ 0 -ne ${count} ]]; then + echo "zipping ${count} archived files" + zip -q -m -r "${OUTPUT_DIR}/test_logs.zip" "${OUTPUT_DIR}/archiver" + else + echo "No archived files, skipping compressing." + fi + else + echo "No archiver directory, skipping compressing." + fi + ''' + def logFile = "${env.OUTPUT_DIR_RELATIVE}/test_logs.zip" + if (fileExists(logFile)) { + sshPublisher(publishers: [ + sshPublisherDesc(configName: 'Nightlies', + transfers: [ + sshTransfer(remoteDirectory: "hbase/${JOB_NAME}/${BUILD_NUMBER}", + sourceFiles: "${env.OUTPUT_DIR_RELATIVE}/test_logs.zip" + ) + ] + ) + ]) + sh '''#!/bin/bash -e + echo "Remove ${OUTPUT_DIR}/test_logs.zip for saving space" + rm -rf "${OUTPUT_DIR}/test_logs.zip" + python3 ${BASEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_BASE}/${OUTPUT_DIR_RELATIVE}" > "${OUTPUT_DIR}/test_logs.html" + ''' + } + archiveArtifacts artifacts: "${env.OUTPUT_DIR_RELATIVE}/*" + archiveArtifacts artifacts: "${env.OUTPUT_DIR_RELATIVE}/**/*" + publishHTML target: [ + allowMissing: true, + keepAll: true, + alwaysLinkToLastBuild: true, + reportDir: "${env.OUTPUT_DIR_RELATIVE}", + reportFiles: 'console-report.html', + reportName: "JDK17 Nightly Build Report (Hadoop ${HADOOP3_VERSION} backwards compatibility)" + ] + } // script + } // always + } // post + } // matrix + } // stage ('backwards compatibility checks') + } // stages + post { + always { + script { + sh "printenv" + // wipe out all the output directories before unstashing + sh''' + echo "Clean up result directories" + rm -rf output-jdk17-hadoop3-backwards-* + ''' + def results = [] + for (hadoopVersion in getHadoopVersions(env.HADOOP_VERSIONS)) { + try { + unstash "jdk17-hadoop3-backwards-result-${hadoopVersion}" + results.add("output-jdk17-hadoop3-backwards-${hadoopVersion}/commentfile") + } catch (e) { + echo "unstash ${hadoopVersion} failed, ignore" + } + } + try { + def comment = "Results for branch ${env.BRANCH_NAME}\n" + comment += "\t[build ${currentBuild.displayName} on builds.a.o|${env.BUILD_URL}]: " + if (currentBuild.result == null || currentBuild.result == "SUCCESS") { + comment += "(/) *{color:green}+1 overall{color}*\n" + } else { + comment += "(x) *{color:red}-1 overall{color}*\n" + } + comment += "----\n" + comment += "Backwards compatibility checks:\n" + comment += results.collect { fileExists(file: it) ? readFile(file: it) : "" }.join("\n\n") + + echo "[INFO] Comment:" + echo comment + + def jiras = getJirasToComment(env.BRANCH_NAME, []) + if (jiras.isEmpty()) { + echo "[DEBUG] non-feature branch, checking change messages for jira keys." + jiras = getJirasToCommentFromChangesets(currentBuild) + } + jiras.each { currentIssue -> + jiraComment issueKey: currentIssue, body: comment + } + } catch (Exception exception) { + echo "Got exception: ${exception}" + echo " ${exception.getStackTrace()}" + } + } + } + } +} + +@NonCPS +List getHadoopVersions(String versions) { + return versions.split(',').collect { it.trim() }.findAll { it } as String[] +} + +import org.jenkinsci.plugins.workflow.support.steps.build.RunWrapper +@NonCPS +List getJirasToCommentFromChangesets(RunWrapper thisBuild) { + def seenJiras = [] + thisBuild.changeSets.each { cs -> + cs.getItems().each { change -> + CharSequence msg = change.msg + echo "change: ${change}" + echo " ${msg}" + echo " ${change.commitId}" + echo " ${change.author}" + seenJiras = getJirasToComment(msg, seenJiras) + } + } + return seenJiras +} + +@NonCPS +List getJirasToComment(CharSequence source, List seen) { + source.eachMatch("HBASE-[0-9]+") { currentIssue -> + echo "[DEBUG] found jira key: ${currentIssue}" + if (currentIssue in seen) { + echo "[DEBUG] already commented on ${currentIssue}." + } else { + echo "[INFO] commenting on ${currentIssue}." + seen << currentIssue + } + } + return seen +} diff --git a/dev-support/hbase-personality.sh b/dev-support/hbase-personality.sh index 9a5d34cc2138..577f7c77deb9 100755 --- a/dev-support/hbase-personality.sh +++ b/dev-support/hbase-personality.sh @@ -298,30 +298,16 @@ function personality_file_tests { local filename=$1 yetus_debug "HBase specific personality_file_tests" - # If the change is to the refguide, then we don't need any builtin yetus tests - # the refguide test (below) will suffice for coverage. - if [[ ${filename} =~ src/main/asciidoc ]] || - [[ ${filename} =~ src/main/xslt ]]; then - yetus_debug "Skipping builtin yetus checks for ${filename}. refguide test should pick it up." - else - # If we change our asciidoc, rebuild mvnsite - if [[ ${BUILDTOOL} = maven ]]; then - if [[ ${filename} =~ src/site || ${filename} =~ src/main/asciidoc ]]; then - yetus_debug "tests/mvnsite: ${filename}" - add_test mvnsite - fi - fi - # If we change checkstyle configs, run checkstyle - if [[ ${filename} =~ checkstyle.*\.xml ]]; then - yetus_debug "tests/checkstyle: ${filename}" - add_test checkstyle - fi - # fallback to checking which tests based on what yetus would do by default - if declare -f "${BUILDTOOL}_builtin_personality_file_tests" >/dev/null; then - "${BUILDTOOL}_builtin_personality_file_tests" "${filename}" - elif declare -f builtin_personality_file_tests >/dev/null; then - builtin_personality_file_tests "${filename}" - fi + # If we change checkstyle configs, run checkstyle + if [[ ${filename} =~ checkstyle.*\.xml ]]; then + yetus_debug "tests/checkstyle: ${filename}" + add_test checkstyle + fi + # fallback to checking which tests based on what yetus would do by default + if declare -f "${BUILDTOOL}_builtin_personality_file_tests" >/dev/null; then + "${BUILDTOOL}_builtin_personality_file_tests" "${filename}" + elif declare -f builtin_personality_file_tests >/dev/null; then + builtin_personality_file_tests "${filename}" fi } @@ -330,6 +316,9 @@ function personality_file_tests ## @audience private ## @stability evolving ## @param name of variable to set with maven arguments +# NOTE: INCLUDE_TESTS_URL uses -Dtest= which conflicts with pom.xml patterns. +# Do not use INCLUDE_TESTS_URL with profiles that define their own patterns +# (e.g., runLargeTests-wave1, runLargeTests-wave2, runLargeTests-wave3). function get_include_exclude_tests_arg { local __resultvar=$1 @@ -397,8 +386,7 @@ function refguide_filefilter # we only generate ref guide on master branch now if [[ "${PATCH_BRANCH}" = master ]]; then - if [[ ${filename} =~ src/main/asciidoc ]] || - [[ ${filename} =~ src/main/xslt ]] || + if [[ ${filename} =~ hbase-website ]] || [[ ${filename} =~ hbase-common/src/main/resources/hbase-default\.xml ]]; then add_test refguide fi @@ -424,8 +412,8 @@ function refguide_rebuild # shellcheck disable=2046 echo_and_redirect "${logfile}" \ $(maven_executor) clean site --batch-mode \ - -pl . \ - -Dtest=NoUnitTests -DHBasePatchProcess -Prelease \ + -pl hbase-website \ + -DskipTests -DHBasePatchProcess -Prelease \ -Dmaven.javadoc.skip=true -Dcheckstyle.skip=true -Dspotbugs.skip=true count=$(${GREP} -c '\[ERROR\]' "${logfile}") @@ -435,31 +423,37 @@ function refguide_rebuild return 1 fi - if ! mv target/site "${PATCH_DIR}/${repostatus}-site"; then + if ! mv hbase-website/build/client "${PATCH_DIR}/${repostatus}-site"; then add_vote_table -1 refguide "${repostatus} failed to produce a site directory." add_footer_table refguide "@@BASE@@/${repostatus}-refguide.log" return 1 fi - if [[ ! -f "${PATCH_DIR}/${repostatus}-site/book.html" ]]; then + if [[ ! -f "${PATCH_DIR}/${repostatus}-site/index.html" ]]; then add_vote_table -1 refguide "${repostatus} failed to produce the html version of the reference guide." add_footer_table refguide "@@BASE@@/${repostatus}-refguide.log" return 1 fi - pdf_output="apache_hbase_reference_guide.pdf" +pdf_output="apache-hbase-reference-guide.pdf" - if [[ ! -f "${PATCH_DIR}/${repostatus}-site/${pdf_output}" ]]; then + if ! mv "hbase-website/public/books/${pdf_output}" "${PATCH_DIR}/${repostatus}-site"; then add_vote_table -1 refguide "${repostatus} failed to produce the pdf version of the reference guide." add_footer_table refguide "@@BASE@@/${repostatus}-refguide.log" return 1 fi + if [[ ! -f "${PATCH_DIR}/${repostatus}-site/${pdf_output}" ]]; then + add_vote_table -1 refguide "${repostatus} failed to verify the pdf version of the reference guide." + add_footer_table refguide "@@BASE@@/${repostatus}-refguide.log" + return 1 + fi + add_vote_table 0 refguide "${repostatus} has no errors when building the reference guide. See footer for rendered docs, which you should manually inspect." if [[ -n "${ASF_NIGHTLIES_GENERAL_CHECK_BASE}" ]]; then - add_footer_table refguide "${ASF_NIGHTLIES_GENERAL_CHECK_BASE}/${repostatus}-site/book.html" + add_footer_table refguide "${ASF_NIGHTLIES_GENERAL_CHECK_BASE}/${repostatus}-site/index.html" else - add_footer_table refguide "@@BASE@@/${repostatus}-site/book.html" + add_footer_table refguide "@@BASE@@/${repostatus}-site/index.html" fi return 0 } @@ -612,17 +606,17 @@ function hadoopcheck_rebuild # TODO remove this on non 2.5 branches ? yetus_info "Setting Hadoop 3 versions to test based on branch-2.5 rules" if [[ "${QUICK_HADOOPCHECK}" == "true" ]]; then - hbase_hadoop3_versions="3.2.4 3.3.6 3.4.1" + hbase_hadoop3_versions="3.2.4 3.3.6 3.4.2" else - hbase_hadoop3_versions="3.2.3 3.2.4 3.3.2 3.3.3 3.3.4 3.3.5 3.3.6 3.4.0 3.4.1" + hbase_hadoop3_versions="3.2.3 3.2.4 3.3.2 3.3.3 3.3.4 3.3.5 3.3.6 3.4.0 3.4.1 3.4.2" fi else yetus_info "Setting Hadoop 3 versions to test based on branch-2.6+/master/feature branch rules" # Isn't runnung these tests with the default Hadoop version redundant ? if [[ "${QUICK_HADOOPCHECK}" == "true" ]]; then - hbase_hadoop3_versions="3.3.6 3.4.1" + hbase_hadoop3_versions="3.3.6 3.4.2" else - hbase_hadoop3_versions="3.3.5 3.3.6 3.4.0 3.4.1" + hbase_hadoop3_versions="3.3.5 3.3.6 3.4.0 3.4.1 3.4.2" fi fi @@ -852,6 +846,7 @@ function spotless_rebuild { local repostatus=$1 local logfile="${PATCH_DIR}/${repostatus}-spotless.txt" + local linecommentsfile="${PATCH_DIR}/${repostatus}-spotless-linecomments.txt" if ! verify_needed_test spotless; then return 0 @@ -869,12 +864,27 @@ function spotless_rebuild count=$(${GREP} -c '\[ERROR\]' "${logfile}") if [[ ${count} -gt 0 ]]; then - add_vote_table -1 spotless "${repostatus} has ${count} errors when running spotless:check, run spotless:apply to fix." - add_footer_table spotless "@@BASE@@/${repostatus}-spotless.txt" + # Generate file-level annotations for GitHub Actions + if [[ -n "${BUGLINECOMMENTS}" ]]; then + # Extract files with violations: lines like "[ERROR] src/path/to/file.java" + # with leading whitespace after [ERROR] + ${GREP} '^\[ERROR\][[:space:]]\+[^[:space:]]' "${logfile}" \ + | ${SED} 's/^\[ERROR\][[:space:]]*//g' \ + | while read -r file; do + echo "${file}:1:Spotless formatting required, run mvn spotless:apply" + done > "${linecommentsfile}" + if [[ -s "${linecommentsfile}" ]]; then + bugsystem_linecomments_queue spotless "${linecommentsfile}" + fi + fi + + add_vote_table_v2 -1 spotless \ + "@@BASE@@/${repostatus}-spotless.txt" \ + "${repostatus} has ${count} errors when running spotless:check, run spotless:apply to fix." return 1 fi - add_vote_table +1 spotless "${repostatus} has no errors when running spotless:check." + add_vote_table_v2 +1 spotless "" "${repostatus} has no errors when running spotless:check." return 0 } diff --git a/dev-support/hbase-vote.sh b/dev-support/hbase-vote.sh index abaa437fd750..3bab4f9b813f 100755 --- a/dev-support/hbase-vote.sh +++ b/dev-support/hbase-vote.sh @@ -149,8 +149,8 @@ function verify_checksums() { } function unzip_from_source() { - tar -zxvf hbase-"${HBASE_VERSION}"-src.tar.gz - cd hbase-"${HBASE_VERSION}" + tar -zxvf *-src.tar.gz + cd "$(tar -tzf *-src.tar.gz | head -1 | cut -d/ -f1)" } function rat_test() { diff --git a/dev-support/integration-test/integration-test.Jenkinsfile b/dev-support/integration-test/integration-test.Jenkinsfile new file mode 100644 index 000000000000..b17025ab6425 --- /dev/null +++ b/dev-support/integration-test/integration-test.Jenkinsfile @@ -0,0 +1,404 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +pipeline { + agent { + node { + label 'hbase' + } + } + triggers { + pollSCM('@daily') + } + options { + buildDiscarder(logRotator(numToKeepStr: '20')) + timeout (time: 16, unit: 'HOURS') + timestamps() + skipDefaultCheckout() + disableConcurrentBuilds() + } + environment { + HADOOP_VERSIONS = "2.10.2,3.2.4,3.3.5,3.3.6,3.4.0,3.4.1,3.4.2,3.4.3" + BASEDIR = "${env.WORKSPACE}/component" + } + parameters { + booleanParam(name: 'DEBUG', defaultValue: false, description: 'Produce a lot more meta-information.') + } + stages { + stage('scm-checkout') { + steps { + dir('component') { + checkout scm + } + } + } + // This is meant to mimic what a release manager will do to create RCs. + // See https://hbase.apache.org/docs/building-and-developing/releasing#making-a-release-candidate + // TODO (HBASE-23870): replace this with invocation of the release tool + stage ('packaging test') { + steps { + sh '''#!/bin/bash -e + echo "Setting up directories" + rm -rf "output-srctarball" && mkdir "output-srctarball" + rm -rf "unpacked_src_tarball" && mkdir "unpacked_src_tarball" + rm -rf ".m2-for-repo" && mkdir ".m2-for-repo" + rm -rf ".m2-for-src" && mkdir ".m2-for-src" + ''' + sh '''#!/bin/bash -e + rm -rf "output-srctarball/machine" && mkdir "output-srctarball/machine" + "${BASEDIR}/dev-support/gather_machine_environment.sh" "output-srctarball/machine" + echo "got the following saved stats in 'output-srctarball/machine'" + ls -lh "output-srctarball/machine" + ''' + sh '''#!/bin/bash -e + echo "Checking the steps for an RM to make a source artifact, then a binary artifact." + docker build -t hbase-integration-test -f "${BASEDIR}/dev-support/docker/Dockerfile" . + docker run --rm -v "${WORKSPACE}":/hbase -v /etc/passwd:/etc/passwd:ro -v /etc/group:/etc/group:ro \ + -u `id -u`:`id -g` -e JAVA_HOME="/usr/lib/jvm/java-17" --workdir=/hbase hbase-integration-test \ + "component/dev-support/integration-test/source-artifact.sh" \ + --intermediate-file-dir output-srctarball \ + --unpack-temp-dir unpacked_src_tarball \ + --maven-m2-initial .m2-for-repo \ + --maven-m2-src-build .m2-for-src \ + --clean-source-checkout \ + component + if [ $? -eq 0 ]; then + echo '(/) {color:green}+1 source release artifact{color}\n-- See build output for details.' >output-srctarball/commentfile + else + echo '(x) {color:red}-1 source release artifact{color}\n-- See build output for details.' >output-srctarball/commentfile + exit 1 + fi + ''' + echo "make sure we have proper hbase tarballs under hbase-assembly" + sh '''#!/bin/bash -e + if [ 2 -ne $(ls -1 "${WORKSPACE}"/unpacked_src_tarball/hbase-assembly/target/hbase-*-bin.tar.gz | grep -v hadoop3 | wc -l) ]; then + echo '(x) {color:red}-1 testing binary artifact{color}\n-- source tarball did not produce the expected binaries.' >>output-srctarball/commentfile + exit 1 + fi + if [[ "${BRANCH_NAME}" == *"branch-2"* ]]; then + if [ 2 -ne $(ls -1 "${WORKSPACE}"/unpacked_src_tarball/hbase-assembly/target/hbase-*-hadoop3-*-bin.tar.gz | wc -l) ]; then + echo '(x) {color:red}-1 testing binary artifact{color}\n-- source tarball did not produce the expected hadoop3 binaries.' >>output-srctarball/commentfile + exit 1 + fi + fi + ''' + stash name: 'hbase-install', includes: "unpacked_src_tarball/hbase-assembly/target/hbase-*-bin.tar.gz" + } // steps + post { + always { + script { + def srcFile = "${env.WORKSPACE}/output-srctarball/hbase-src.tar.gz" + if (fileExists(srcFile)) { + echo "upload hbase-src.tar.gz to nightlies" + sshPublisher(publishers: [ + sshPublisherDesc(configName: 'Nightlies', + transfers: [ + sshTransfer(remoteDirectory: "hbase/${JOB_NAME}/${BUILD_NUMBER}", + sourceFiles: srcFile + ) + ] + ) + ]) + // remove the big src tarball, store the nightlies url in hbase-src.html + sh '''#!/bin/bash -e + SRC_TAR="${WORKSPACE}/output-srctarball/hbase-src.tar.gz" + echo "Remove ${SRC_TAR} for saving space" + rm -rf "${SRC_TAR}" + python3 ${BASEDIR}/dev-support/gen_redirect_html.py "${ASF_NIGHTLIES_BASE}/output-srctarball" > "${WORKSPACE}/output-srctarball/hbase-src.html" + ''' + } + } + archiveArtifacts artifacts: 'output-srctarball/*' + archiveArtifacts artifacts: 'output-srctarball/**/*' + } + } + } // packaging test + stage ('integration test matrix') { + matrix { + agent { + node { + label 'hbase' + } + } + axes { + axis { + name 'HADOOP_VERSION' + // matrix does not support dynamic axis values, so here we need to keep align with the + // above environment + values "2.10.2","3.2.4","3.3.5","3.3.6","3.4.0","3.4.1","3.4.2","3.4.3" + } + } + environment { + BASEDIR = "${env.WORKSPACE}/component" + OUTPUT_DIR = "output-integration-hadoop-${env.HADOOP_VERSION}" + } + when { + expression { + if (HADOOP_VERSION == '2.10.2') { + // only branch-2/branch-2.x need to run against hadoop2, here we also includes + // HBASE-XXXXX-branch-2 feature branch + return env.BRANCH_NAME.contains('branch-2') + } + if (HADOOP_VERSION == '3.2.4') { + // only branch-2.5 need to run against hadoop 3.2.4, here we also includes + // HBASE-XXXXX-branch-2.5 feature branch + return env.BRANCH_NAME.contains('branch-2.5') + } + return true + } + } + stages { + stage('scm-checkout') { + steps { + sh '''#!/bin/bash -e + echo "Setting up directories" + rm -rf "${OUTPUT_DIR}" && mkdir "${OUTPUT_DIR}" + echo "(x) {color:red}-1 client integration test for ${HADOOP_VERSION}{color}\n-- Something went wrong with this stage, [check relevant console output|${BUILD_URL}/console]." >${OUTPUT_DIR}/commentfile + rm -rf "unpacked_src_tarball" + rm -rf "hbase-install" && mkdir "hbase-install" + rm -rf "hbase-client" && mkdir "hbase-client" + rm -rf "hadoop-install" && mkdir "hadoop-install" + rm -rf "hbase-hadoop3-install" + rm -rf "hbase-hadoop3-client" + # remove old hadoop tarballs in workspace + rm -rf hadoop-*.tar.gz + ''' + dir('component') { + checkout scm + } + } // steps + } // scm-checkout + stage('install hadoop') { + steps { + dir("downloads-hadoop") { + sh '''#!/bin/bash -e + echo "Make sure we have a directory for downloading dependencies: $(pwd)" + ''' + sh '''#!/bin/bash -e + echo "Ensure we have a copy of Hadoop ${HADOOP_VERSION}" + "${WORKSPACE}/component/dev-support/jenkins-scripts/cache-apache-project-artifact.sh" \ + --working-dir "${WORKSPACE}/downloads-hadoop" \ + --keys 'https://downloads.apache.org/hadoop/common/KEYS' \ + --verify-tar-gz \ + "${WORKSPACE}/hadoop-${HADOOP_VERSION}-bin.tar.gz" \ + "hadoop/common/hadoop-${HADOOP_VERSION}/hadoop-${HADOOP_VERSION}.tar.gz" + for stale in $(ls -1 "${WORKSPACE}"/hadoop-*.tar.gz | grep -v ${HADOOP_VERSION}); do + echo "Delete stale hadoop cache ${stale}" + rm -rf $stale + done + artifact=$(ls -1 "${WORKSPACE}"/hadoop-${HADOOP_VERSION}-bin.tar.gz | head -n 1) + tar --strip-components=1 -xzf "${artifact}" -C "${WORKSPACE}/hadoop-install" + if [[ ${HADOOP_VERSION} == 3.* ]]; then + # we need to patch some files otherwise minicluster will fail to start, see MAPREDUCE-7471 + ${BASEDIR}/dev-support/integration-test/patch-hadoop3.sh "${WORKSPACE}/hadoop-install" + fi + ''' + } // dir + } // steps + } // install hadoop + stage('install hbase') { + steps { + unstash 'hbase-install' + sh'''#!/bin/bash -e + install_artifact=$(ls -1 "${WORKSPACE}"/unpacked_src_tarball/hbase-assembly/target/hbase-*-bin.tar.gz | grep -v client-bin | grep -v hadoop3) + tar --strip-component=1 -xzf "${install_artifact}" -C "hbase-install" + client_artifact=$(ls -1 "${WORKSPACE}"/unpacked_src_tarball/hbase-assembly/target/hbase-*-client-bin.tar.gz | grep -v hadoop3) + tar --strip-component=1 -xzf "${client_artifact}" -C "hbase-client" + if ls "${WORKSPACE}"/unpacked_src_tarball/hbase-assembly/target/hbase-*-hadoop3-*-bin.tar.gz &>/dev/null; then + echo "hadoop3 artifacts available, unpacking the hbase hadoop3 bin tarball into 'hbase-hadoop3-install' and the client hadoop3 tarball into 'hbase-hadoop3-client'" + mkdir hbase-hadoop3-install + mkdir hbase-hadoop3-client + hadoop3_install_artifact=$(ls -1 "${WORKSPACE}"/unpacked_src_tarball/hbase-assembly/target/hbase-*-hadoop3-*-bin.tar.gz | grep -v client-bin) + tar --strip-component=1 -xzf "${hadoop3_install_artifact}" -C "hbase-hadoop3-install" + hadoop3_client_artifact=$(ls -1 "${WORKSPACE}"/unpacked_src_tarball/hbase-assembly/target/hbase-*-hadoop3-*-client-bin.tar.gz) + tar --strip-component=1 -xzf "${hadoop3_client_artifact}" -C "hbase-hadoop3-client" + fi + ''' + } // steps + } + stage('integration test ') { + steps { + sh '''#!/bin/bash -e + hbase_install_dir="hbase-install" + hbase_client_dir="hbase-client" + if [[ ${HADOOP_VERSION} == 3.* ]] && [[ -d "hbase-hadoop3-install" ]]; then + echo "run hadoop3 client integration test against hbase hadoop3 binaries" + hbase_install_dir="hbase-hadoop3-install" + hbase_client_dir="hbase-hadoop3-client" + fi + java_home="/usr/lib/jvm/java-17" + hadoop_opts="--add-opens java.base/java.lang=ALL-UNNAMED" + if [[ ${HADOOP_VERSION} == 2.* ]]; then + java_home="/usr/lib/jvm/java-8" + hadoop_opts="" + fi + echo "Attempting to run an instance on top of Hadoop ${HADOOP_VERSION}." + # Create working dir + rm -rf "${OUTPUT_DIR}/non-shaded" && mkdir "${OUTPUT_DIR}/non-shaded" + docker build -t hbase-integration-test -f "${BASEDIR}/dev-support/docker/Dockerfile" . + docker run --rm -v "${WORKSPACE}":/hbase -v /etc/passwd:/etc/passwd:ro -v /etc/group:/etc/group:ro \ + -u `id -u`:`id -g` -e JAVA_HOME="${java_home}" \ + -e HADOOP_OPTS="${hadoop_opts}" \ + --workdir=/hbase hbase-integration-test \ + component/dev-support/integration-test/pseudo-distributed-test.sh \ + --single-process \ + --working-dir ${OUTPUT_DIR}/non-shaded \ + --hbase-client-install ${hbase_client_dir} \ + ${hbase_install_dir} \ + hadoop-install/bin/hadoop \ + hadoop-install/share/hadoop/yarn/timelineservice \ + hadoop-install/share/hadoop/yarn/test/hadoop-yarn-server-tests-*-tests.jar \ + hadoop-install/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-*-tests.jar \ + hadoop-install/bin/mapred \ + >${OUTPUT_DIR}/hadoop.log 2>&1 + if [ $? -ne 0 ]; then + echo "(x) {color:red}-1 client integration test for ${HADOOP_VERSION}{color}\n--Failed when running client tests on top of Hadoop ${HADOOP_VERSION}. [see log for details|${BUILD_URL}/artifact/${OUTPUT_DIR}/hadoop.log]. (note that this means we didn't check the Hadoop ${HADOOP_VERSION} shaded client)" >${OUTPUT_DIR}/commentfile + exit 2 + fi + echo "(/) {color:green}+1 client integration test for ${HADOOP_VERSION} {color}" >${OUTPUT_DIR}/commentfile + if [[ ${HADOOP_VERSION} == 2.* ]] || [[ ${HADOOP_VERSION} == 3.2.* ]]; then + echo "skip running shaded hadoop client test for ${HADOOP_VERSION}" + exit 0 + fi + # Create working dir + rm -rf "${OUTPUT_DIR}/shaded" && mkdir "${OUTPUT_DIR}/shaded" + echo "Attempting to run an instance on top of Hadoop ${HADOOP_VERSION}, relying on the Hadoop client artifacts for the example client program." + docker run --rm -v "${WORKSPACE}":/hbase -v /etc/passwd:/etc/passwd:ro -v /etc/group:/etc/group:ro \ + -u `id -u`:`id -g` -e JAVA_HOME="${java_home}" \ + -e HADOOP_OPTS="${hadoop_opts}" \ + --workdir=/hbase hbase-integration-test \ + component/dev-support/integration-test/pseudo-distributed-test.sh \ + --single-process \ + --hadoop-client-classpath hadoop-install/share/hadoop/client/hadoop-client-api-*.jar:hadoop-install/share/hadoop/client/hadoop-client-runtime-*.jar \ + --working-dir ${OUTPUT_DIR}/shaded \ + --hbase-client-install ${hbase_client_dir} \ + ${hbase_install_dir} \ + hadoop-install/bin/hadoop \ + hadoop-install/share/hadoop/yarn/timelineservice \ + hadoop-install/share/hadoop/yarn/test/hadoop-yarn-server-tests-*-tests.jar \ + hadoop-install/share/hadoop/mapreduce/hadoop-mapreduce-client-jobclient-*-tests.jar \ + hadoop-install/bin/mapred \ + >${OUTPUT_DIR}/hadoop-shaded.log 2>&1 + if [ $? -ne 0 ]; then + echo "(x) {color:red}-1 client integration testfor ${HADOOP_VERSION}{color}\n--Failed when running client tests on top of Hadoop ${HADOOP_VERSION} using Hadoop's shaded client. [see log for details|${BUILD_URL}/artifact/${OUTPUT_DIR}/hadoop-shaded.log]." >> ${OUTPUT_DIR}/commentfile + exit 2 + fi + echo "(/) {color:green}+1 client integration test for ${HADOOP_VERSION} with shaded hadoop client{color}" >> ${OUTPUT_DIR}/commentfile + ''' + } // steps + post { + always { + stash name: "test-result-${env.HADOOP_VERSION}", includes: "${env.OUTPUT_DIR}/commentfile" + archiveArtifacts artifacts: "${env.OUTPUT_DIR}/*" + archiveArtifacts artifacts: "${env.OUTPUT_DIR}/**/*" + } // always + } // post + } // integration test + } // stages + } // matrix + } // integration test matrix + } // stages + post { + always { + script { + sh "printenv" + // wipe out all the output directories before unstashing + sh''' + echo "Clean up result directories" + rm -rf output-srctarball + rm -rf output-integration-hadoop-* + ''' + def results = [] + results.add('output-srctarball/commentfile') + for (hadoopVersion in getHadoopVersions(env.HADOOP_VERSIONS)) { + try { + unstash "test-result-${hadoopVersion}" + results.add("output-integration-hadoop-${hadoopVersion}/commentfile") + } catch (e) { + echo "unstash ${hadoopVersion} failed, ignore" + } + } + echo env.BRANCH_NAME + echo env.BUILD_URL + echo currentBuild.result + echo currentBuild.durationString + def comment = "Results for branch ${env.BRANCH_NAME}\n" + comment += "\t[build ${currentBuild.displayName} on builds.a.o|${env.BUILD_URL}]: " + if (currentBuild.result == null || currentBuild.result == "SUCCESS") { + comment += "(/) *{color:green}+1 overall{color}*\n" + } else { + comment += "(x) *{color:red}-1 overall{color}*\n" + // Ideally get the committer our of the change and @ mention them in the per-jira comment + } + comment += "----\ndetails (if available):\n\n" + echo "" + echo "[DEBUG] trying to aggregate step-wise results" + comment += results.collect { fileExists(file: it) ? readFile(file: it) : "" }.join("\n\n") + echo "[INFO] Comment:" + echo comment + echo "" + echo "[DEBUG] checking to see if feature branch" + def jiras = getJirasToComment(env.BRANCH_NAME, []) + if (jiras.isEmpty()) { + echo "[DEBUG] non-feature branch, checking change messages for jira keys." + echo "[INFO] There are ${currentBuild.changeSets.size()} change sets." + jiras = getJirasToCommentFromChangesets(currentBuild) + } + jiras.each { currentIssue -> + jiraComment issueKey: currentIssue, body: comment + } + } // script + } // always + } // post +} + +@NonCPS +List getHadoopVersions(String versions) { + return versions.split(',').collect { it.trim() }.findAll { it } as String[] +} + +import org.jenkinsci.plugins.workflow.support.steps.build.RunWrapper +@NonCPS +List getJirasToCommentFromChangesets(RunWrapper thisBuild) { + def seenJiras = [] + thisBuild.changeSets.each { cs -> + cs.getItems().each { change -> + CharSequence msg = change.msg + echo "change: ${change}" + echo " ${msg}" + echo " ${change.commitId}" + echo " ${change.author}" + echo "" + seenJiras = getJirasToComment(msg, seenJiras) + } + } + return seenJiras +} + +@NonCPS +List getJirasToComment(CharSequence source, List seen) { + source.eachMatch("HBASE-[0-9]+") { currentIssue -> + echo "[DEBUG] found jira key: ${currentIssue}" + if (currentIssue in seen) { + echo "[DEBUG] already commented on ${currentIssue}." + } else { + echo "[INFO] commenting on ${currentIssue}." + seen << currentIssue + } + } + return seen +} + diff --git a/dev-support/integration-test/patch-hadoop3.sh b/dev-support/integration-test/patch-hadoop3.sh new file mode 100755 index 000000000000..b4c51ca9487d --- /dev/null +++ b/dev-support/integration-test/patch-hadoop3.sh @@ -0,0 +1,24 @@ +#!/usr/bin/env bash +## +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +## + +hadoop_dir=$1 + +sed -i "s/HADOOP_TOOLS_DIR=\${HADOOP_TOOLS_DIR:-\"share\/hadoop\/tools\"}/HADOOP_TOOLS_DIR=\${HADOOP_TOOLS_DIR:-\"\$HADOOP_TOOLS_HOME\/share\/hadoop\/tools\"}/g" "$hadoop_dir/libexec/hadoop-functions.sh" +sed -i "/HADOOP_CLASSNAME=org.apache.hadoop.mapreduce.MiniHadoopClusterManager/a mockitojar=\$(echo \"\${HADOOP_TOOLS_LIB_JARS_DIR}\"\/mockito-core-[0-9]*.jar)\nhadoop_add_classpath \"\${mockitojar}\"" "$hadoop_dir/bin/mapred" +curl https://repo1.maven.org/maven2/org/mockito/mockito-core/2.28.2/mockito-core-2.28.2.jar -o "$hadoop_dir/share/hadoop/tools/lib/mockito-core-2.28.2.jar" diff --git a/dev-support/integration-test/pseudo-distributed-test.sh b/dev-support/integration-test/pseudo-distributed-test.sh new file mode 100755 index 000000000000..3089b6db3079 --- /dev/null +++ b/dev-support/integration-test/pseudo-distributed-test.sh @@ -0,0 +1,540 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +set -e +function usage { + echo "Usage: ${0} [options] /path/to/component/bin-install /path/to/hadoop/executable /path/to/share/hadoop/yarn/timelineservice /path/to/hadoop/hadoop-yarn-server-tests-tests.jar /path/to/hadoop/hadoop-mapreduce-client-jobclient-tests.jar /path/to/mapred/executable" + echo "" + echo " --zookeeper-data /path/to/use Where the embedded zookeeper instance should write its data." + echo " defaults to 'zk-data' in the working-dir." + echo " --working-dir /path/to/use Path for writing configs and logs. must exist." + echo " defaults to making a directory via mktemp." + echo " --hadoop-client-classpath /path/to/some.jar:/path/to/another.jar classpath for hadoop jars." + echo " defaults to 'hadoop classpath'" + echo " --hbase-client-install /path/to/unpacked/client/tarball if given we'll look here for hbase client jars instead of the bin-install" + echo " --force-data-clean Delete all data in HDFS and ZK prior to starting up hbase" + echo " --single-process Run as single process instead of pseudo-distributed" + echo "" + exit 1 +} +# if no args specified, show usage +if [ $# -lt 5 ]; then + usage +fi + +# Get arguments +declare component_install +declare hadoop_exec +declare working_dir +declare zk_data_dir +declare clean +declare distributed="true" +declare hadoop_jars +declare hbase_client +while [ $# -gt 0 ] +do + case "$1" in + --working-dir) shift; working_dir=$1; shift;; + --force-data-clean) shift; clean="true";; + --zookeeper-data) shift; zk_data_dir=$1; shift;; + --single-process) shift; distributed="false";; + --hadoop-client-classpath) shift; hadoop_jars="$1"; shift;; + --hbase-client-install) shift; hbase_client="$1"; shift;; + --) shift; break;; + -*) usage ;; + *) break;; # terminate while loop + esac +done + +# should still have where component checkout is. +if [ $# -lt 5 ]; then + usage +fi +component_install="$(cd "$(dirname "$1")"; pwd)/$(basename "$1")" +hadoop_exec="$(cd "$(dirname "$2")"; pwd)/$(basename "$2")" +timeline_service_dir="$(cd "$(dirname "$3")"; pwd)/$(basename "$3")" +yarn_server_tests_test_jar="$(cd "$(dirname "$4")"; pwd)/$(basename "$4")" +mapred_jobclient_test_jar="$(cd "$(dirname "$5")"; pwd)/$(basename "$5")" +mapred_exec="$(cd "$(dirname "$6")"; pwd)/$(basename "$6")" + +if [ ! -x "${hadoop_exec}" ]; then + echo "hadoop cli does not appear to be executable." >&2 + exit 1 +fi + +if [ ! -x "${mapred_exec}" ]; then + echo "mapred cli does not appear to be executable." >&2 + exit 1 +fi + +if [ ! -d "${component_install}" ]; then + echo "Path to HBase binary install should be a directory." >&2 + exit 1 +fi + +if [ ! -f "${yarn_server_tests_test_jar}" ]; then + echo "Specified YARN server tests test jar is not a file." >&2 + exit 1 +fi + +if [ ! -f "${mapred_jobclient_test_jar}" ]; then + echo "Specified MapReduce jobclient test jar is not a file." >&2 + exit 1 +fi + +if [ -z "${working_dir}" ]; then + if ! working_dir="$(mktemp -d -t hbase-pseudo-dist-test)" ; then + echo "Failed to create temporary working directory. Please specify via --working-dir" >&2 + exit 1 + fi +else + # absolutes please + working_dir="$(cd "$(dirname "${working_dir}")"; pwd)/$(basename "${working_dir}")" + if [ ! -d "${working_dir}" ]; then + echo "passed working directory '${working_dir}' must already exist." >&2 + exit 1 + fi +fi + +if [ -z "${zk_data_dir}" ]; then + zk_data_dir="${working_dir}/zk-data" + mkdir "${zk_data_dir}" +else + # absolutes please + zk_data_dir="$(cd "$(dirname "${zk_data_dir}")"; pwd)/$(basename "${zk_data_dir}")" + if [ ! -d "${zk_data_dir}" ]; then + echo "passed directory for unpacking the source tarball '${zk_data_dir}' must already exist." + exit 1 + fi +fi + +if [ -z "${hbase_client}" ]; then + hbase_client="${component_install}" +else + echo "Using HBase client-side artifact" + # absolutes please + hbase_client="$(cd "$(dirname "${hbase_client}")"; pwd)/$(basename "${hbase_client}")" + if [ ! -d "${hbase_client}" ]; then + echo "If given hbase client install should be a directory with contents of the client tarball." >&2 + exit 1 + fi +fi + +if [ -n "${hadoop_jars}" ]; then + declare -a tmp_jars + for entry in $(echo "${hadoop_jars}" | tr ':' '\n'); do + tmp_jars=("${tmp_jars[@]}" "$(cd "$(dirname "${entry}")"; pwd)/$(basename "${entry}")") + done + hadoop_jars="$(IFS=:; echo "${tmp_jars[*]}")" +fi + + +echo "You'll find logs and temp files in ${working_dir}" + +function redirect_and_run { + log_base=$1 + shift + echo "$*" >"${log_base}.err" + "$@" >"${log_base}.out" 2>>"${log_base}.err" +} + +(cd "${working_dir}" + +echo "Hadoop version information:" +"${hadoop_exec}" version +hadoop_version=$("${hadoop_exec}" version | head -n 1) +hadoop_version="${hadoop_version#Hadoop }" +if [ "${hadoop_version%.*.*}" -gt 2 ]; then + "${hadoop_exec}" envvars +else + echo "JAVA_HOME: ${JAVA_HOME}" +fi + +# Ensure that if some other Hadoop install happens to be present in the environment we ignore it. +HBASE_DISABLE_HADOOP_CLASSPATH_LOOKUP="true" +export HBASE_DISABLE_HADOOP_CLASSPATH_LOOKUP + +if [ -n "${clean}" ]; then + echo "Cleaning out ZooKeeper..." + rm -rf "${zk_data_dir:?}/*" +fi + +echo "HBase version information:" +"${component_install}/bin/hbase" version 2>/dev/null +hbase_version=$("${component_install}/bin/hbase" version 2>&1 | grep ^HBase | head -n 1) +hbase_version="${hbase_version#HBase }" + +if [ ! -s "${hbase_client}/lib/shaded-clients/hbase-shaded-mapreduce-${hbase_version}.jar" ]; then + echo "HBase binary install doesn't appear to include a shaded mapreduce artifact." >&2 + exit 1 +fi + +if [ ! -s "${hbase_client}/lib/shaded-clients/hbase-shaded-client-${hbase_version}.jar" ]; then + echo "HBase binary install doesn't appear to include a shaded client artifact." >&2 + exit 1 +fi + +if [ ! -s "${hbase_client}/lib/shaded-clients/hbase-shaded-client-byo-hadoop-${hbase_version}.jar" ]; then + echo "HBase binary install doesn't appear to include a shaded client artifact." >&2 + exit 1 +fi + +echo "Writing out configuration for HBase." +rm -rf "${working_dir}/hbase-conf" +mkdir "${working_dir}/hbase-conf" + +if [ -f "${component_install}/conf/log4j2.properties" ]; then + cp "${component_install}/conf/log4j2.properties" "${working_dir}/hbase-conf/log4j2.properties" +else + cat >"${working_dir}/hbase-conf/log4j2.properties" <"${working_dir}/hbase-conf/hbase-site.xml" < + + + + + hbase.rootdir + + /hbase + + + hbase.zookeeper.property.dataDir + ${zk_data_dir} + + + hbase.cluster.distributed + ${distributed} + + +EOF + +if [ "true" = "${distributed}" ]; then + cat >"${working_dir}/hbase-conf/regionservers" <"${working_dir}/hadoop_cluster_command.out" 2>"${working_dir}/hadoop_cluster_command.err" & +elif [ "${hadoop_version%.*.*}" -gt 2 ]; then + "${mapred_exec}" minicluster -format -writeConfig "${working_dir}/hbase-conf/core-site.xml" -writeDetails "${working_dir}/hadoop_cluster_info.json" >"${working_dir}/hadoop_cluster_command.out" 2>"${working_dir}/hadoop_cluster_command.err" & +else + HADOOP_CLASSPATH="${timeline_service_dir}/*:${timeline_service_dir}/lib/*:${yarn_server_tests_test_jar}" "${hadoop_exec}" jar "${mapred_jobclient_test_jar}" minicluster -format -writeConfig "${working_dir}/hbase-conf/core-site.xml" -writeDetails "${working_dir}/hadoop_cluster_info.json" >"${working_dir}/hadoop_cluster_command.out" 2>"${working_dir}/hadoop_cluster_command.err" & +fi + +echo "$!" > "${working_dir}/hadoop.pid" + +# 2 + 4 + 8 + .. + 256 ~= 8.5 minutes. +max_sleep_time=512 +sleep_time=2 +until [[ -s "${working_dir}/hbase-conf/core-site.xml" || "${sleep_time}" -ge "${max_sleep_time}" ]]; do + printf '\twaiting for Hadoop to finish starting up.\n' + sleep "${sleep_time}" + sleep_time="$((sleep_time*2))" +done + +if [ "${sleep_time}" -ge "${max_sleep_time}" ] ; then + echo "time out waiting for Hadoop to startup" >&2 + exit 1 +fi + +if [ "${hadoop_version%.*.*}" -gt 2 ]; then + echo "Verifying configs" + hadoop_conf_files="" + for f in "${working_dir}"/hbase-conf/*-site.xml; do + hadoop_conf_files="$hadoop_conf_files -conffile $f" + done + "${hadoop_exec}" --config "${working_dir}/hbase-conf/" conftest $hadoop_conf_files +fi + +if [ -n "${clean}" ]; then + echo "Cleaning out HDFS..." + "${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -rm -r /hbase + "${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -rm -r example/ + "${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -rm -r example-region-listing.data +fi + +echo "Listing HDFS contents" +redirect_and_run "${working_dir}/hadoop_cluster_smoke" \ + "${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -ls -R / + +echo "Starting up HBase" +HBASE_CONF_DIR="${working_dir}/hbase-conf/" HBASE_LOG_DIR="${working_dir}" "${component_install}/bin/start-hbase.sh" + +sleep_time=2 +until "${component_install}/bin/hbase" --config "${working_dir}/hbase-conf/" shell --noninteractive >"${working_dir}/waiting_hbase_startup.log" 2>&1 <"${working_dir}/table_create.log" 2>&1 < 1000, SPLITALGO => 'UniformSplit'} +EOF + +echo "writing out example TSV to example.tsv" +cat >"${working_dir}/example.tsv" <"${working_dir}/scan_import.out" 2>"${working_dir}/scan_import.err" </dev/null | grep "row(s)" | awk '{print $1}') +if [ ! "${import_rowcount}" -eq 48 ]; then + echo "ERROR: Instead of finding 48 rows, we found ${import_rowcount}." + exit 2 +fi + +if [ -z "${hadoop_jars}" ]; then + echo "Hadoop client jars not given; getting them from 'hadoop classpath' for the example." + hadoop_jars=$("${hadoop_exec}" --config "${working_dir}/hbase-conf/" classpath) +fi + +echo "Building shaded client example." +cat >"${working_dir}/HBaseClientReadWriteExample.java" < regions = new LinkedList<>(); + try (Admin admin = connection.getAdmin()) { + final ClusterMetrics cluster = admin.getClusterMetrics(); + System.out.println(String.format("\tCluster reports version %s, ave load %f, region count %d", cluster.getHBaseVersion(), cluster.getAverageLoad(), cluster.getRegionCount())); + for (ServerMetrics server : cluster.getLiveServerMetrics().values()) { + for (RegionMetrics region : server.getRegionMetrics().values()) { + regions.add(region.getNameAsString()); + } + } + } + final Path listing = new Path("example-region-listing.data"); + System.out.println("Writing list to HDFS"); + try (FileSystem fs = FileSystem.newInstance(hadoop)) { + final Path path = fs.makeQualified(listing); + try (FSDataOutputStream out = fs.create(path)) { + out.writeInt(regions.size()); + for (String region : regions) { + out.writeUTF(region); + } + out.hsync(); + } + } + final List puts = new LinkedList<>(); + final Put marker = new Put(new byte[] { (byte)0 }); + System.out.println("Reading list from HDFS"); + try (FileSystem fs = FileSystem.newInstance(hadoop)) { + final Path path = fs.makeQualified(listing); + final CellBuilder builder = CellBuilderFactory.create(CellBuilderType.SHALLOW_COPY); + try (FSDataInputStream in = fs.open(path)) { + final int count = in.readInt(); + marker.addColumn(FAMILY_BYTES, Bytes.toBytes("count"), Bytes.toBytes(count)); + for(int i = 0; i < count; i++) { + builder.clear(); + final byte[] row = Bytes.toBytes(in.readUTF()); + final Put put = new Put(row); + builder.setRow(row); + builder.setFamily(FAMILY_BYTES); + builder.setType(Cell.Type.Put); + put.add(builder.build()); + puts.add(put); + } + } + } + System.out.println("Writing list into HBase table"); + try (Table table = connection.getTable(TableName.valueOf("test:example"))) { + table.put(marker); + table.put(puts); + } + } + } +} +EOF +redirect_and_run "${working_dir}/hbase-shaded-client-compile" \ + $JAVA_HOME/bin/javac -cp "${hbase_client}/lib/shaded-clients/hbase-shaded-client-byo-hadoop-${hbase_version}.jar:${hadoop_jars}" "${working_dir}/HBaseClientReadWriteExample.java" +echo "Running shaded client example. It'll fetch the set of regions, round-trip them to a file in HDFS, then write them one-per-row into the test table." +# The order of classpath entries here is important. if we're using non-shaded Hadoop 3 / 2.9.0 jars, we have to work around YARN-2190. +redirect_and_run "${working_dir}/hbase-shaded-client-example" \ + $JAVA_HOME/bin/java -cp "${working_dir}/hbase-conf/:${hbase_client}/lib/shaded-clients/hbase-shaded-client-byo-hadoop-${hbase_version}.jar:${hbase_dep_classpath}:${working_dir}:${hadoop_jars}" HBaseClientReadWriteExample + +echo "Checking on results of example program." +"${hadoop_exec}" --config "${working_dir}/hbase-conf/" fs -copyToLocal "example-region-listing.data" "${working_dir}/example-region-listing.data" + +"${hbase_client}/bin/hbase" --config "${working_dir}/hbase-conf/" shell --noninteractive >"${working_dir}/scan_example.out" 2>"${working_dir}/scan_example.err" </dev/null | grep "row(s)" | awk '{print $1}') +if [ "${example_rowcount}" -gt "1049" ]; then + echo "Found ${example_rowcount} rows, which is enough to cover 48 for import, 1000 example's use of user table regions, 1 for example's use of meta region, and 1 for example's count record" +else + echo "ERROR: Only found ${example_rowcount} rows." +fi + +) diff --git a/dev-support/integration-test/source-artifact.sh b/dev-support/integration-test/source-artifact.sh new file mode 100755 index 000000000000..7292d2da8c37 --- /dev/null +++ b/dev-support/integration-test/source-artifact.sh @@ -0,0 +1,242 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +function usage { + echo "Usage: ${0} [options] /path/to/component/checkout" + echo "" + echo " --intermediate-file-dir /path/to/use Path for writing listings and diffs. must exist." + echo " defaults to making a directory via mktemp." + echo " --unpack-temp-dir /path/to/use Path for unpacking tarball. default to" + echo " 'unpacked_src_tarball' in intermediate directory." + echo " --maven-m2-initial /path/to/use Path for maven artifacts while building in" + echo " component-dir." + echo " --maven-m2-src-build /path/to/use Path for maven artifacts while building from the" + echo " unpacked source tarball." + echo " --clean-source-checkout Destructively clean component checkout before" + echo " comparing to source tarball. N.B. will delete" + echo " anything in the checkout dir that isn't from" + echo " a git checkout, including ignored files." + exit 1 +} + +set -e + +MVN="mvn" +if ! command -v mvn &>/dev/null; then + MVN=$MAVEN_HOME/bin/mvn +fi +# if no args specified, show usage +if [ $# -lt 1 ]; then + usage +fi + +# Get arguments +declare component_dir +declare unpack_dir +declare m2_initial +declare m2_tarbuild +declare working_dir +declare source_clean +while [ $# -gt 0 ] +do + case "$1" in + --unpack-temp-dir) shift; unpack_dir=$1; shift;; + --maven-m2-initial) shift; m2_initial=$1; shift;; + --maven-m2-src-build) shift; m2_tarbuild=$1; shift;; + --intermediate-file-dir) shift; working_dir=$1; shift;; + --clean-source-checkout) shift; source_clean="true";; + --) shift; break;; + -*) usage ;; + *) break;; # terminate while loop + esac +done + +# should still have where component checkout is. +if [ $# -lt 1 ]; then + usage +fi +component_dir="$(cd "$(dirname "$1")"; pwd)/$(basename "$1")" + +if [ -z "${working_dir}" ]; then + if ! working_dir="$(mktemp -d -t hbase-srctarball-test)" ; then + echo "Failed to create temporary working directory. Please specify via --unpack-temp-dir" + exit 1 + fi +else + # absolutes please + working_dir="$(cd "$(dirname "${working_dir}")"; pwd)/$(basename "${working_dir}")" + if [ ! -d "${working_dir}" ]; then + echo "passed working directory '${working_dir}' must already exist." + exit 1 + fi +fi + +echo "You'll find logs and temp files in ${working_dir}" + +if [ -z "${unpack_dir}" ]; then + unpack_dir="${working_dir}/unpacked_src_tarball" + mkdir "${unpack_dir}" +else + # absolutes please + unpack_dir="$(cd "$(dirname "${unpack_dir}")"; pwd)/$(basename "${unpack_dir}")" + if [ ! -d "${unpack_dir}" ]; then + echo "passed directory for unpacking the source tarball '${unpack_dir}' must already exist." + exit 1 + fi + rm -rf "${unpack_dir:?}/*" +fi + +if [ -z "${m2_initial}" ]; then + m2_initial="${working_dir}/.m2-initial" + mkdir "${m2_initial}" +else + # absolutes please + m2_initial="$(cd "$(dirname "${m2_initial}")"; pwd)/$(basename "${m2_initial}")" + if [ ! -d "${m2_initial}" ]; then + echo "passed directory for storing the initial build's maven repo '${m2_initial}' " \ + "must already exist." + exit 1 + fi +fi + +if [ -z "${m2_tarbuild}" ]; then + m2_tarbuild="${working_dir}/.m2-tarbuild" + mkdir "${m2_tarbuild}" +else + # absolutes please + m2_tarbuild="$(cd "$(dirname "${m2_tarbuild}")"; pwd)/$(basename "${m2_tarbuild}")" + if [ ! -d "${m2_tarbuild}" ]; then + echo "passed directory for storing the build from src tarball's maven repo '${m2_tarbuild}' " \ + "must already exist." + exit 1 + fi +fi + +# This is meant to mimic what a release manager will do to create RCs. +# See https://hbase.apache.org/docs/building-and-developing/releasing#making-a-release-candidate + +echo "Maven details, in case our JDK doesn't match expectations:" +${MVN} --version --offline | tee "${working_dir}/maven_version" + +echo "Do a clean building of the source artifact using code in ${component_dir}" +cd "${component_dir}" +if [ -n "${source_clean}" ]; then + echo "Clean..." + git clean -xdfff >"${working_dir}/component_git_clean.log" 2>&1 +fi +echo "Follow the ref guide section on making a RC: Step 6 Build the source tarball" +git archive --format=tar.gz --output="${working_dir}/hbase-src.tar.gz" \ + --prefix="hbase-SOMEVERSION/" HEAD \ + >"${working_dir}/component_build_src_tarball.log" 2>&1 + +cd "${unpack_dir}" +echo "Unpack the source tarball" +tar --strip-components=1 -xzf "${working_dir}/hbase-src.tar.gz" \ + >"${working_dir}/srctarball_unpack.log" 2>&1 + +cd "${component_dir}" +echo "Diff against source tree" +diff --binary --recursive . "${unpack_dir}" >"${working_dir}/diff_output" || true + +cd "${working_dir}" +# expectation check largely based on HBASE-14952 +echo "Checking against things we don't expect to include in the source tarball (git related, etc.)" +# Add in lines to show differences between the source tarball and this branch, in the same format diff would give. +# e.g. prior to HBASE-19152 we'd have the following lines (ignoring the bash comment marker): +#Only in .: .gitattributes +#Only in .: .gitignore +cat >known_excluded <"${working_dir}/unexpected.diff" ; then + echo "Any output here are unexpected differences between the source artifact we'd make for an RC and the current branch." + echo "One potential source of differences is if you have an unclean working directory; you should expect to see" + echo "such extraneous files below." + echo "" + echo "The expected differences are on the < side and the current differences are on the > side." + echo "In a given set of differences, '.' refers to the branch in the repo and 'unpacked_src_tarball' refers to what we pulled out of the tarball." + diff known_excluded diff_output +else + echo "Everything looks as expected." +fi + +function get_hadoop3_version { + local version="$1" + if [[ "${version}" =~ -SNAPSHOT$ ]]; then + echo "${version/-SNAPSHOT/-hadoop3-SNAPSHOT}" + else + echo "${version}-hadoop3" + fi +} + +function build_tarball { + local build_hadoop3=$1 + local mvn_extra_args="" + local build_log="srctarball_install.log" + local tarball_glob="hbase-*-bin.tar.gz" + if [ $build_hadoop3 -ne 0 ]; then + local version=$(${MVN} -Dmaven.repo.local="${m2_tarbuild}" help:evaluate -Dexpression=project.version -q -DforceStdout) + local hadoop3_version=$(get_hadoop3_version $version) + mvn_extra_args="-Drevision=${hadoop3_version} -Dhadoop.profile=3.0" + build_log="hadoop3_srctarball_install.log" + tarball_glob="hbase-*-hadoop3-*-bin.tar.gz" + echo "Follow the ref guide section on making a RC: Step 8 Build the hadoop3 binary tarball." + else + echo "Follow the ref guide section on making a RC: Step 7 Build the binary tarball." + fi + if ${MVN} --threads=2 -DskipTests -Prelease --batch-mode -Dmaven.repo.local="${m2_tarbuild}" ${mvn_extra_args} clean install \ + assembly:single >"${working_dir}/${build_log}" 2>&1; then + for artifact in "${unpack_dir}"/hbase-assembly/target/${tarball_glob}; do + if [ -f "${artifact}" ]; then + # TODO check the layout of the binary artifact we just made. + echo "Building a binary tarball from the source tarball succeeded." + return 0 + fi + done + fi + + echo "Building a binary tarball from the source tarball failed. see ${working_dir}/${build_log} for details." + # Copy up the rat.txt to the working dir so available in build archive in case rat complaints. + # rat.txt can be under any module target dir... copy them all up renaming them to include parent dir as we go. + find ${unpack_dir} -name rat.txt -type f | while IFS= read -r NAME; do cp -v "$NAME" "${working_dir}/${NAME//\//_}"; done + return 1 +} + +cd "${unpack_dir}" + +if ${MVN} -Dmaven.repo.local="${m2_tarbuild}" help:active-profiles | grep -q hadoop-3.0; then + echo "The hadoop-3.0 profile is activated by default, build a default tarball." + build_tarball 0 +else + echo "The hadoop-3.0 profile is not activated by default, build a default tarball first." + # use java 8 to build with hadoop2 + JAVA_HOME="/usr/lib/jvm/java-8" build_tarball 0 + if [ $? -ne 0 ]; then + exit 1 + fi + + # move the previous tarballs out, so it will not be cleaned while building against hadoop3 + mv "${unpack_dir}"/hbase-assembly/target/hbase-*-bin.tar.gz "${unpack_dir}"/ + echo "build a hadoop3 tarball." + build_tarball 1 + if [ $? -ne 0 ]; then + exit 1 + fi + # move tarballs back + mv "${unpack_dir}"/hbase-*-bin.tar.gz "${unpack_dir}"/hbase-assembly/target/ +fi diff --git a/dev-support/jenkins_precommit_github_yetus.sh b/dev-support/jenkins_precommit_github_yetus.sh index 8604d96760dc..4ec0c1d3829e 100755 --- a/dev-support/jenkins_precommit_github_yetus.sh +++ b/dev-support/jenkins_precommit_github_yetus.sh @@ -31,7 +31,6 @@ declare -i missing_env=0 declare -a required_envs=( # these ENV variables define the required API with Jenkinsfile_GitHub "ARCHIVE_PATTERN_LIST" - "BUILD_URL_ARTIFACTS" "DOCKERFILE" "GITHUB_PASSWORD" "GITHUB_USER" @@ -39,7 +38,6 @@ declare -a required_envs=( "PLUGINS" "SET_JAVA_HOME" "SOURCEDIR" - "TESTS_FILTER" "YETUSDIR" "AUTHOR_IGNORE_LIST" "BLANKS_EOL_IGNORE_FILE" @@ -53,6 +51,12 @@ for required_env in "${required_envs[@]}"; do fi done +# BUILD_URL_ARTIFACTS is required for Jenkins but set in personality for GitHub Actions +if [[ "${GITHUB_ACTIONS}" != "true" ]] && [[ -z "${BUILD_URL_ARTIFACTS}" ]]; then + echo "[ERROR] Required environment variable 'BUILD_URL_ARTIFACTS' is not set." + missing_env=${missing_env}+1 +fi + if [ ${missing_env} -gt 0 ]; then echo "[ERROR] Please set the required environment variables before invoking. If this error is " \ "on Jenkins, then please file a JIRA about the error." @@ -91,7 +95,11 @@ YETUS_ARGS+=("--console-report-file=${PATCHDIR}/console.txt") YETUS_ARGS+=("--html-report-file=${PATCHDIR}/report.html") # enable writing back to Github YETUS_ARGS+=("--github-token=${GITHUB_PASSWORD}") -YETUS_ARGS+=("--github-write-comment") +# GitHub Actions fork PRs cannot write comments (GITHUB_TOKEN has no PR write permission) +# Jenkins can write comments via its own credentials +if [[ "${GITHUB_ACTIONS}" != "true" ]]; then + YETUS_ARGS+=("--github-write-comment") +fi # auto-kill any surefire stragglers during unit test runs YETUS_ARGS+=("--reapermode=kill") # set relatively high limits for ASF machines @@ -103,7 +111,9 @@ YETUS_ARGS+=("--spotbugs-strict-precheck") # rsync these files back into the archive dir YETUS_ARGS+=("--archive-list=${ARCHIVE_PATTERN_LIST}") # URL for user-side presentation in reports and such to our artifacts -YETUS_ARGS+=("--build-url-artifacts=${BUILD_URL_ARTIFACTS}") +if [[ -n "${BUILD_URL_ARTIFACTS}" ]]; then + YETUS_ARGS+=("--build-url-artifacts=${BUILD_URL_ARTIFACTS}") +fi # plugins to enable YETUS_ARGS+=("--plugins=${PLUGINS},-findbugs") # run in docker mode and specifically point to our @@ -115,15 +125,17 @@ YETUS_ARGS+=("--java-home=${SET_JAVA_HOME}") YETUS_ARGS+=("--author-ignore-list=${AUTHOR_IGNORE_LIST}") YETUS_ARGS+=("--blanks-eol-ignore-file=${BLANKS_EOL_IGNORE_FILE}") YETUS_ARGS+=("--blanks-tabs-ignore-file=${BLANKS_TABS_IGNORE_FILE}*") -YETUS_ARGS+=("--tests-filter=${TESTS_FILTER}") +if [[ -n "${TESTS_FILTER}" ]]; then + YETUS_ARGS+=("--tests-filter=${TESTS_FILTER}") +fi YETUS_ARGS+=("--personality=${SOURCEDIR}/dev-support/hbase-personality.sh") YETUS_ARGS+=("--quick-hadoopcheck") if [[ "${SKIP_ERRORPRONE}" = "true" ]]; then # skip error prone YETUS_ARGS+=("--skip-errorprone") fi -# effectively treat dev-support as a custom maven module -YETUS_ARGS+=("--skip-dirs=dev-support") +# Exclude non-code directories from module detection to avoid triggering full builds +YETUS_ARGS+=("--skip-dirs=dev-support,.github,bin,conf") # For testing with specific hadoop version. Activates corresponding profile in maven runs. if [[ -n "${HADOOP_PROFILE}" ]]; then # Master has only Hadoop3 support. We don't need to activate any profile. @@ -156,6 +168,10 @@ fi if [[ -n "${JAVA8_HOME}" ]]; then YETUS_ARGS+=("--java8-home=${JAVA8_HOME}") fi +# Test profile for running specific test categories (e.g., runDevTests, runLargeTests-wave1) +if [[ -n "${TEST_PROFILE}" ]]; then + YETUS_ARGS+=("--test-profile=${TEST_PROFILE}") +fi echo "Launching yetus with command line:" echo "${TESTPATCHBIN} ${YETUS_ARGS[*]}" diff --git a/dev-support/make_rc.sh b/dev-support/make_rc.sh index 1d65f1807c7a..bd65ceb87296 100755 --- a/dev-support/make_rc.sh +++ b/dev-support/make_rc.sh @@ -17,4 +17,4 @@ # limitations under the License. echo "Replaced by ./dev-support/create-release/do-release-docker.sh script." -echo "See http://hbase.apache.org/book.html#do-release-docker.sh" +echo "See https://hbase.apache.org/docs/building-and-developing/releasing#making-a-release-candidate" diff --git a/dev-support/spotbugs-exclude.xml b/dev-support/spotbugs-exclude.xml index 2f0684eff4d7..17b8d2cbdedd 100644 --- a/dev-support/spotbugs-exclude.xml +++ b/dev-support/spotbugs-exclude.xml @@ -271,4 +271,16 @@ + + + + + + + + + diff --git a/dev-support/yetus_console_to_md.py b/dev-support/yetus_console_to_md.py new file mode 100644 index 000000000000..bee5512eec71 --- /dev/null +++ b/dev-support/yetus_console_to_md.py @@ -0,0 +1,522 @@ +#!/usr/bin/env python3 +## +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Convert Apache Yetus console output to Markdown format. +""" +import os +import re +import sys +from io import TextIOWrapper +from pathlib import Path +from typing import Dict, List, Optional, Tuple + +# Vote to emoji mapping +VOTE_EMOJI = { + '+1': '✅', + '-1': '❌', + '0': '🆗', + '+0': '🆗', + '-0': '⚠️' +} + + +def convert_vote(vote: str) -> str: + """Convert vote string to emoji.""" + return VOTE_EMOJI.get(vote, vote) + + +def is_runtime(text: str) -> bool: + """Check if text is a runtime like '41m 24s'.""" + return bool(re.match(r'^\d+m\s+\d+s$', text)) + + +def parse_table_row(line: str) -> Tuple[str, str, str, str]: + """ + Parse a table row and return tuple of cell values. + Returns exactly 4 columns: (vote, subsystem, runtime, comment) + """ + parts = line.split('|') + # Remove first empty element (from leading |) + parts = parts[1:] if len(parts) > 1 else [] + + # Take first 4 columns and strip whitespace + result: List[str] = [p.strip() for p in parts[:4]] + + # Pad to 4 columns if needed + while len(result) < 4: + result.append('') + + return result[0], result[1], result[2], result[3] + + +def is_results_section_start(line: str) -> bool: + """Check if line indicates the start of Results section.""" + return bool(re.search(r'^\[\w+] Results:', line.strip())) + + +def is_tests_run_summary(line: str) -> bool: + """Check if line is the Tests run summary line.""" + return bool(re.search(r'^\[\w+] Tests run:', line.strip())) + + +def parse_results_section( + f: TextIOWrapper, + failures: List[str], + flakes: List[str], + errors: List[str] +) -> None: + """ + Parse the Results section within a patch-unit file. + """ + current_error_type = None + while line := f.readline(): + stripped = line.strip() + + # Section end markers + if is_tests_run_summary(line): + return + + # Detect error type sections + if re.search(r'^\[\w+] Failures:', stripped): + current_error_type = failures + elif re.search(r'^\[\w+] Flakes:', stripped): + current_error_type = flakes + elif re.search(r'^\[\w+] Errors:', stripped): + current_error_type = errors + else: + # Parse test entries + if current_error_type is not None: + test_match = re.search( + r'^\[\w+]\s+((?:org\.)?\S+\.(?:\w+\.)*\w+\.\w+)', + stripped + ) + if test_match: + test_name = test_match.group(1) + if 'test' in test_name.lower(): + current_error_type.append(test_name) + + +def skip_to_results_section(f: TextIOWrapper) -> bool: + """ + Skip the io stream to the Results section. + After calling this method, the TextIOWrapper will locate at the next line of "Results: " + + Returns: + True if we find a results section, False if we have reached the EOF + """ + while line := f.readline(): + if is_results_section_start(line): + return True + return False + + +def scan_all_tests(dir: Path) -> Dict[str, str]: + """ + Scan the archiver dir to find all the tests and their module + + Returns: + Dict mapping test name to module name + """ + module = None + module_to_test_name = {} + for dirpath, _, filenames in os.walk(dir): + if len(filenames) > 0: + # /archiver//target/surefire-reports + module = dirpath.split(os.sep)[-3] + for filename in filenames: + match = re.match(r'(org\.apache\.[^-]+)\.txt', filename) + if match: + module_to_test_name[match.group(1)] = module + return module_to_test_name + + +def parse_patch_unit_file( + file_path: Path, + failures: List[str], + flakes: List[str], + errors: List[str] +) -> None: + """ + Parse a patch-unit-*.txt file and extract failed tests by module. + """ + with open(file_path, 'r') as f: + while skip_to_results_section(f): + parse_results_section(f, failures, flakes, errors) + + +def get_module(test_name: str, test_name_to_module: Dict[str, str]) -> str: + rindex_of_bracket = test_name.rfind('[') + if rindex_of_bracket > 0: + # parameterized test, remove the tailing parameters + test_name = test_name[:rindex_of_bracket] + + module = test_name_to_module.get(test_name) + if module: + return module + + # usually the failed test name has the method name suffix, but the test_name_to_module only + # contains class name, so let's try to remove the last part and try again + rindex_of_dot = test_name.rfind('.') + if rindex_of_dot > 0: + test_name = test_name[:rindex_of_dot] + + module = test_name_to_module.get(test_name) + if module: + return module + return 'default' + + +def increase(module_to_count: Dict[str, int], module: str) -> None: + if module in module_to_count: + module_to_count[module] += 1 + else: + module_to_count[module] = 1 + + +def add_to_details(test_name: str, module: str, error_type: str, + details: Dict[str, Dict[str, List[str]]]) -> None: + if module not in details: + error_type_to_tests = {} + details[module] = error_type_to_tests + else: + error_type_to_tests = details[module] + + if error_type in error_type_to_tests: + error_type_to_tests[error_type].append(test_name) + else: + error_type_to_tests[error_type] = [test_name] + + +def process_failed_tests( + error_type: str, + failed_tests: List[str], + module_to_test_name: Dict[str, str], + counts: Dict[str, Dict[str, int]], + details: Dict[str, Dict[str, List[str]]] +) -> None: + for test_name in failed_tests: + module = get_module(test_name, module_to_test_name) + increase(counts[error_type], module) + add_to_details(test_name, module, error_type, details) + + +def aggregate_failed_tests(yetus_dir: Path) -> Tuple[ + Dict[str, Dict[str, int]], Dict[str, Dict[str, List[str]]]]: + """ + Aggregate failed tests from all patch-unit-*.txt files. + + Returns: + Tuple of: + - counts: {error_type: {module: count}} + - details: {module: {error_type: [test_names]}} + """ + patch_files = list(yetus_dir.glob('patch-unit-*.txt')) + + if not patch_files: + return {}, {} + + # Aggregate results from all files + failures = [] + flakes = [] + errors = [] + + for patch_file in patch_files: + parse_patch_unit_file(patch_file, failures, flakes, errors) + + if not failures and not flakes and not errors: + return {}, {} + + counts = {'Failures': {}, 'Flakes': {}, 'Errors': {}} + details = {} + module_to_test_name = scan_all_tests(yetus_dir / 'archiver') + process_failed_tests('Failures', failures, module_to_test_name, counts, details) + process_failed_tests('Flakes', flakes, module_to_test_name, counts, details) + process_failed_tests('Errors', errors, module_to_test_name, counts, details) + + return dict(counts), dict(details) + + +def generate_failed_tests_table( + counts: Dict[str, Dict[str, int]], + details: Dict[str, Dict[str, List[str]]] +) -> List[str]: + """Generate the Failed Tests HTML table.""" + total_failures = sum(sum(m.values()) for m in counts.values()) + if total_failures == 0: + return [] + + content = [ + '\n## Failed Tests\n\n', + '\n', + '\n', + '\n' + ] + + error_types = ['Failures', 'Flakes', 'Errors'] + + for error_type in error_types: + if error_type not in counts: + continue + + modules = counts[error_type] + total_count = sum(modules.values()) + num_modules = len(modules) + + first_row = True + for module in sorted(modules.keys()): + tests = details.get(module, {}).get(error_type, []) + tests_str = '
'.join(sorted(set(tests))) if tests else '' + + if first_row: + content.append( + f'' + f'' + f'\n' + ) + first_row = False + else: + content.append(f'\n') + + content.extend(['\n', '
Error TypeCountModuleTests
{error_type}{total_count}{module}{tests_str}
{module}{tests_str}
\n']) + + return content + + +def collect_continuation_lines( + lines: List[str], + start_idx: int +) -> Tuple[List[str], int]: + """ + Collect continuation lines for a table row. + + Args: + lines: All lines from the file + start_idx: Index to start checking from + + Returns: + Tuple of (list of comment parts, next index to process) + """ + comment_parts = [] + i = start_idx + + while i < len(lines): + line = lines[i] + stripped = line.strip() + + if not stripped.startswith('|'): + break + + if '|| Subsystem || Report/Notes ||' in line: + break + + vote, _, runtime, comment = parse_table_row(line) + + # Stop at new data row + if vote in VOTE_EMOJI: + break + + # Empty vote/subsystem means continuation or separator + if not vote: + if comment: + comment_parts.append(comment) + i += 1 + elif runtime and is_runtime(runtime): + break + else: + i += 1 + else: + break + + return comment_parts, i + + +def process_first_table(lines: List[str], start_idx: int) -> Tuple[List[str], int]: + """ + Process the first table (Vote, Subsystem, Runtime, Comment). + + Returns: + Tuple of (Markdown lines, next index to process) + """ + content = [ + '\n', + '| Vote | Subsystem | Runtime | Comment |\n', + '|------|-----------|---------|---------|\n' + ] + + i = start_idx + + # Skip the original separator line + if i < len(lines) and '===' in lines[i]: + i += 1 + + while i < len(lines): + line = lines[i] + stripped = line.strip() + + if '|| Subsystem || Report/Notes ||' in line: + break + + if stripped.startswith('+--'): + i += 1 + continue + + if not stripped.startswith('|'): + i += 1 + continue + + vote, subsystem, runtime, comment = parse_table_row(line) + + # Section header (vote and subsystem are empty) + if not vote and not subsystem: + if comment: + content.append(f'| | | | {comment} |\n') + elif runtime and is_runtime(runtime): + content.append(f'| | | {runtime} | |\n') + i += 1 + continue + + # Data row with vote + if vote in VOTE_EMOJI: + vote_emoji = convert_vote(vote) + comment_parts = [comment] if comment else [] + + continuation_parts, i = collect_continuation_lines(lines, i + 1) + comment_parts.extend(continuation_parts) + + comment_text = ' '.join(comment_parts) + content.append(f'| {vote_emoji} | {subsystem} | {runtime} | {comment_text} |\n') + continue + + # Other cases, skip + i += 1 + + return content, i + + +def process_second_table(lines: List[str], start_idx: int) -> Tuple[List[str], int]: + """ + Process the second table (Subsystem, Report/Notes). + + Returns: + Tuple of (Markdown lines, next index to process) + """ + content = [ + '\n## Subsystem Reports\n\n', + '| Subsystem | Report/Notes |\n', + '|-----------|------------|\n' + ] + + i = start_idx + + # Skip the original separator line + if i < len(lines) and '===' in lines[i]: + i += 1 + + while i < len(lines): + line = lines[i] + stripped = line.strip() + + if not stripped.startswith('|'): + break + + # Split by | and get non-empty parts + parts = [p.strip() for p in stripped.split('|') if p.strip()] + if len(parts) >= 2: + content.append(f'| {parts[0]} | {parts[1]} |\n') + + i += 1 + + return content, i + + +def convert_console_to_markdown(input_dir: str, output_file: Optional[str] = None) -> str: + """Convert Yetus console output to Markdown format.""" + input_path = Path(input_dir) + + if not input_path.is_dir(): + print(f'Error: Input path "{input_dir}" is not a directory', file=sys.stderr) + sys.exit(1) + + console_file = input_path / 'console.txt' + if not console_file.exists(): + print(f'Error: console.txt not found in "{input_dir}"', file=sys.stderr) + sys.exit(1) + + with open(console_file, 'r') as f: + lines = f.readlines() + + content = [] + i = 0 + + while i < len(lines): + line = lines[i] + stripped = line.strip() + + if stripped == '-1 overall': + content.append(f'

❌ {stripped}

\n') + i += 1 + elif stripped == '+1 overall': + content.append(f'

✅ {stripped}

\n') + i += 1 + elif '| Vote |' in line and 'Subsystem' in line: + table_content, i = process_first_table(lines, i + 1) + content.extend(table_content) + + counts, details = aggregate_failed_tests(input_path) + if counts: + content.extend(generate_failed_tests_table(counts, details)) + elif '|| Subsystem || Report/Notes ||' in line: + table_content, i = process_second_table(lines, i + 1) + content.extend(table_content) + else: + i += 1 + + result = ''.join(content) + + if output_file: + with open(output_file, 'w') as f: + f.write(result) + print(f'Converted {input_dir} to {output_file}', file=sys.stderr) + else: + print(result, end='') + + return result + + +def main(): + if len(sys.argv) < 2: + print(f'Usage: {sys.argv[0]} [output_file]', file=sys.stderr) + print( + f' input_directory: Directory containing console.txt and optional patch-unit-*.txt files', + file=sys.stderr) + print(f' If output_file is not provided, output goes to stdout', file=sys.stderr) + sys.exit(1) + + input_dir = sys.argv[1] + output_file = sys.argv[2] if len(sys.argv) > 2 else None + + if not Path(input_dir).exists(): + print(f'Error: Input directory "{input_dir}" does not exist', file=sys.stderr) + sys.exit(1) + + convert_console_to_markdown(input_dir, output_file) + + +if __name__ == '__main__': + main()