Skip to content

HBASE-29356 Incorrect split behavior when region information is missing #7035

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -307,11 +307,11 @@ private boolean proceedWithMergePlanning(TableDescriptor tableDescriptor) {
*/
private double getAverageRegionSizeMb(final List<RegionInfo> tableRegions,
final TableDescriptor tableDescriptor) {
TableName table = tableDescriptor.getTableName();
if (isEmpty(tableRegions)) {
throw new IllegalStateException(
"Cannot calculate average size of a table without any regions.");
throw new IllegalStateException("Cannot calculate the average size of regions in the table "
+ table + " without any regions.");
}
TableName table = tableDescriptor.getTableName();
double avgRegionSize;
int targetRegionCount = tableDescriptor.getNormalizerTargetRegionCount();
long targetRegionSize = tableDescriptor.getNormalizerTargetRegionSize();
Expand All @@ -322,14 +322,39 @@ private double getAverageRegionSizeMb(final List<RegionInfo> tableRegions,
avgRegionSize = targetRegionSize;
} else {
final int regionCount = tableRegions.size();
final long totalSizeMb = tableRegions.stream().mapToLong(this::getRegionSizeMB).sum();
// Count of regions whose size is known
int regionCountKnownSize = 0;
long totalSizeMb = 0;
for (RegionInfo regionInfo : tableRegions) {
long regionSize = getRegionSizeMB(regionInfo);
if (regionSize != -1) {
totalSizeMb += regionSize;
regionCountKnownSize++;
}
}
if (targetRegionCount > 0) {
avgRegionSize = totalSizeMb / (double) targetRegionCount;
if (targetRegionCount <= (regionCount - regionCountKnownSize)) {
throw new IllegalStateException(
"Cannot calculate the average size of regions in the table " + table
+ ". Target region count " + targetRegionCount
+ " must be greater than the number of regions with unknown size "
+ (regionCount - regionCountKnownSize) + ".");
}
avgRegionSize =
totalSizeMb / (double) (targetRegionCount - (regionCount - regionCountKnownSize));
} else {
avgRegionSize = totalSizeMb / (double) regionCount;
if (regionCountKnownSize == 0) {
throw new IllegalStateException(
"Cannot calculate the average size of regions in the table " + table
+ " without any regions of known size.");
}
avgRegionSize = totalSizeMb / (double) regionCountKnownSize;
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think that you also need to protect against a 0 value here, in the same way.

Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I added the condition, including the test modification.

}
LOG.debug("Table {}, total aggregated regions size: {} MB and average region size {} MB",
table, totalSizeMb, String.format("%.3f", avgRegionSize));
LOG.debug(
"Table {}, total aggregated regions size: {} MB and average region size {} MB, "
+ "number of regions with unknown size {}",
table, totalSizeMb, String.format("%.3f", avgRegionSize),
regionCount - regionCountKnownSize);
}

return avgRegionSize;
Expand Down Expand Up @@ -393,6 +418,13 @@ private List<NormalizationPlan> computeMergeNormalizationPlans(final NormalizeCo
for (current = rangeStart; current < ctx.getTableRegions().size(); current++) {
final RegionInfo regionInfo = ctx.getTableRegions().get(current);
final long regionSizeMb = getRegionSizeMB(regionInfo);
if (regionSizeMb == -1) {
LOG.debug(
"For region {} in table {} cannot determine size, skipping check merging region",
regionInfo.getRegionNameAsString(), ctx.getTableName());
rangeStart = Math.max(current, rangeStart + 1);
continue;
}
if (skipForMerge(configuration, ctx, regionInfo)) {
// this region cannot participate in a range. resume the outer loop.
rangeStart = Math.max(current, rangeStart + 1);
Expand Down Expand Up @@ -457,6 +489,12 @@ private List<NormalizationPlan> computeSplitNormalizationPlans(final NormalizeCo
continue;
}
final long regionSizeMb = getRegionSizeMB(hri);
if (regionSizeMb == -1) {
LOG.debug(
"For region {} in table {} cannot determine size, skipping check splitting region",
hri.getRegionNameAsString(), ctx.getTableName());
continue;
}
if (regionSizeMb > 2 * avgRegionSize) {
LOG.info(
"Table {}, large region {} has size {} MB, more than twice avg size {} MB, "
Expand Down Expand Up @@ -490,7 +528,12 @@ private static boolean isOldEnoughForMerge(final NormalizerConfiguration normali
*/
private boolean isLargeEnoughForMerge(final NormalizerConfiguration normalizerConfiguration,
final NormalizeContext ctx, final RegionInfo regionInfo) {
return getRegionSizeMB(regionInfo) >= normalizerConfiguration.getMergeMinRegionSizeMb(ctx);
long regionSizeMb = getRegionSizeMB(regionInfo);
if (regionSizeMb == -1) {
return false;
} else {
return regionSizeMb >= normalizerConfiguration.getMergeMinRegionSizeMb(ctx);
}
}

/**
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@
import org.junit.Rule;
import org.junit.Test;
import org.junit.experimental.categories.Category;
import org.junit.rules.ExpectedException;
import org.mockito.Mockito;

/**
Expand All @@ -96,6 +97,9 @@ public class TestSimpleRegionNormalizer {
@Rule
public TableNameTestRule name = new TableNameTestRule();

@Rule
public ExpectedException thrown = ExpectedException.none();

@Before
public void before() {
conf = HBaseConfiguration.create();
Expand Down Expand Up @@ -676,13 +680,27 @@ private void setupMocksForNormalizer(Map<byte[], Integer> regionSizes,
ServerName sn = ServerName.valueOf("localhost", 0, 0L);
when(masterServices.getAssignmentManager().getRegionStates().getRegionsOfTable(any()))
.thenReturn(regionInfoList);
when(masterServices.getAssignmentManager().getRegionStates().getRegionServerOfRegion(any()))
.thenReturn(sn);
for (RegionInfo regionInfo : regionInfoList) {
int regionSize = regionSizes.get(regionInfo.getRegionName());
// ensures that the getRegionSizeMB method returns -1, simulating the condition when the
// region size cannot be found
if (regionSize == -1) {
when(masterServices.getAssignmentManager().getRegionStates()
.getRegionServerOfRegion(regionInfo)).thenReturn(null);
} else {
when(masterServices.getAssignmentManager().getRegionStates()
.getRegionServerOfRegion(regionInfo)).thenReturn(sn);
}
}
when(
masterServices.getAssignmentManager().getRegionStates().getRegionState(any(RegionInfo.class)))
.thenReturn(RegionState.createForTesting(null, RegionState.State.OPEN));

for (Map.Entry<byte[], Integer> region : regionSizes.entrySet()) {
// skip regions where we don't know the size
if (region.getValue() == -1) {
continue;
}
RegionMetrics regionLoad = Mockito.mock(RegionMetrics.class);
when(regionLoad.getRegionName()).thenReturn(region.getKey());
when(regionLoad.getStoreFileSize())
Expand Down Expand Up @@ -757,4 +775,42 @@ private static Map<byte[], Integer> createRegionSizesMap(final List<RegionInfo>
}
return ret;
}

@Test
public void testSplitOfLargeRegionIfOneIsNotKnow() {
final TableName tableName = name.getTableName();
final List<RegionInfo> regionInfos = createRegionInfos(tableName, 5);
final Map<byte[], Integer> regionSizes = createRegionSizesMap(regionInfos, 8, 6, -1, 10, 30);

setupMocksForNormalizer(regionSizes, regionInfos);

assertThat(normalizer.computePlansForTable(tableDescriptor),
contains(new SplitNormalizationPlan(regionInfos.get(4), 30)));
}

@Test
public void testSplitOfAllUnknownSize() {
final TableName tableName = name.getTableName();
final List<RegionInfo> regionInfos = createRegionInfos(tableName, 4);
final Map<byte[], Integer> regionSizes = createRegionSizesMap(regionInfos, -1, -1, -1, -1);
thrown.expect(IllegalStateException.class);
thrown.expectMessage("Cannot calculate the average size of regions in the table " + tableName
+ " without any regions of known size.");
setupMocksForNormalizer(regionSizes, regionInfos);
normalizer.computePlansForTable(tableDescriptor);
}

@Test
public void testThrowsWhenTargetRegionCountLessThanUnknownSizeRegions() {
final TableName tableName = name.getTableName();
final List<RegionInfo> regionInfos = createRegionInfos(tableName, 4);
final Map<byte[], Integer> regionSizes = createRegionSizesMap(regionInfos, -1, -1, 10, 10);
thrown.expect(IllegalStateException.class);
thrown.expectMessage("Cannot calculate the average size of regions in the table " + tableName
+ ". Target region count 1 must be greater than the number of regions with unknown size 2.");
setupMocksForNormalizer(regionSizes, regionInfos);
when(tableDescriptor.getNormalizerTargetRegionCount()).thenReturn(1);
normalizer.computePlansForTable(tableDescriptor);
}

}