Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
122 changes: 64 additions & 58 deletions hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java
Original file line number Diff line number Diff line change
Expand Up @@ -2036,80 +2036,86 @@ public BalanceResponse balance(BalanceRequest request) throws IOException {
}

synchronized (this.balancer) {
// Only allow one balance run at at time.
if (this.assignmentManager.getRegionTransitScheduledCount() > 0) {
List<RegionStateNode> regionsInTransition = assignmentManager.getRegionsInTransition();
// if hbase:meta region is in transition, result of assignment cannot be recorded
// ignore the force flag in that case
boolean metaInTransition = assignmentManager.isMetaRegionInTransition();
List<RegionStateNode> toPrint = regionsInTransition;
int max = 5;
boolean truncated = false;
if (regionsInTransition.size() > max) {
toPrint = regionsInTransition.subList(0, max);
truncated = true;
}
try {
this.balancer.onBalancingStart();

// Only allow one balance run at at time.
if (this.assignmentManager.getRegionTransitScheduledCount() > 0) {
List<RegionStateNode> regionsInTransition = assignmentManager.getRegionsInTransition();
// if hbase:meta region is in transition, result of assignment cannot be recorded
// ignore the force flag in that case
boolean metaInTransition = assignmentManager.isMetaRegionInTransition();
List<RegionStateNode> toPrint = regionsInTransition;
int max = 5;
boolean truncated = false;
if (regionsInTransition.size() > max) {
toPrint = regionsInTransition.subList(0, max);
truncated = true;
}

if (!request.isIgnoreRegionsInTransition() || metaInTransition) {
LOG.info("Not running balancer (ignoreRIT=false" + ", metaRIT=" + metaInTransition
+ ") because " + assignmentManager.getRegionTransitScheduledCount()
+ " region(s) are scheduled to transit " + toPrint
+ (truncated ? "(truncated list)" : ""));
if (!request.isIgnoreRegionsInTransition() || metaInTransition) {
LOG.info("Not running balancer (ignoreRIT=false" + ", metaRIT=" + metaInTransition
+ ") because " + assignmentManager.getRegionTransitScheduledCount()
+ " region(s) are scheduled to transit " + toPrint
+ (truncated ? "(truncated list)" : ""));
return responseBuilder.build();
}
}
if (this.serverManager.areDeadServersInProgress()) {
LOG.info("Not running balancer because processing dead regionserver(s): "
+ this.serverManager.getDeadServers());
return responseBuilder.build();
}
}
if (this.serverManager.areDeadServersInProgress()) {
LOG.info("Not running balancer because processing dead regionserver(s): "
+ this.serverManager.getDeadServers());
return responseBuilder.build();
}

if (this.cpHost != null) {
try {
if (this.cpHost.preBalance(request)) {
LOG.debug("Coprocessor bypassing balancer request");
if (this.cpHost != null) {
try {
if (this.cpHost.preBalance(request)) {
LOG.debug("Coprocessor bypassing balancer request");
return responseBuilder.build();
}
} catch (IOException ioe) {
LOG.error("Error invoking master coprocessor preBalance()", ioe);
return responseBuilder.build();
}
} catch (IOException ioe) {
LOG.error("Error invoking master coprocessor preBalance()", ioe);
return responseBuilder.build();
}
}

Map<TableName, Map<ServerName, List<RegionInfo>>> assignments =
this.assignmentManager.getRegionStates().getAssignmentsForBalancer(tableStateManager,
this.serverManager.getOnlineServersList());
for (Map<ServerName, List<RegionInfo>> serverMap : assignments.values()) {
serverMap.keySet().removeAll(this.serverManager.getDrainingServersList());
}
Map<TableName, Map<ServerName, List<RegionInfo>>> assignments =
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I can see space character here in diff? Did you fix formatting here and next few lines?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess this is because of introduction of try block. I ran "mvn spotless:apply" to fix formatting.

this.assignmentManager.getRegionStates().getAssignmentsForBalancer(tableStateManager,
this.serverManager.getOnlineServersList());
for (Map<ServerName, List<RegionInfo>> serverMap : assignments.values()) {
serverMap.keySet().removeAll(this.serverManager.getDrainingServersList());
}

// Give the balancer the current cluster state.
this.balancer.updateClusterMetrics(getClusterMetricsWithoutCoprocessor());
// Give the balancer the current cluster state.
this.balancer.updateClusterMetrics(getClusterMetricsWithoutCoprocessor());

List<RegionPlan> plans = this.balancer.balanceCluster(assignments);
List<RegionPlan> plans = this.balancer.balanceCluster(assignments);

responseBuilder.setBalancerRan(true).setMovesCalculated(plans == null ? 0 : plans.size());
responseBuilder.setBalancerRan(true).setMovesCalculated(plans == null ? 0 : plans.size());

if (skipRegionManagementAction("balancer")) {
// make one last check that the cluster isn't shutting down before proceeding.
return responseBuilder.build();
}
if (skipRegionManagementAction("balancer")) {
// make one last check that the cluster isn't shutting down before proceeding.
return responseBuilder.build();
}

// For dry run we don't actually want to execute the moves, but we do want
// to execute the coprocessor below
List<RegionPlan> sucRPs =
request.isDryRun() ? Collections.emptyList() : executeRegionPlansWithThrottling(plans);
// For dry run we don't actually want to execute the moves, but we do want
// to execute the coprocessor below
List<RegionPlan> sucRPs =
request.isDryRun() ? Collections.emptyList() : executeRegionPlansWithThrottling(plans);

if (this.cpHost != null) {
try {
this.cpHost.postBalance(request, sucRPs);
} catch (IOException ioe) {
// balancing already succeeded so don't change the result
LOG.error("Error invoking master coprocessor postBalance()", ioe);
if (this.cpHost != null) {
try {
this.cpHost.postBalance(request, sucRPs);
} catch (IOException ioe) {
// balancing already succeeded so don't change the result
LOG.error("Error invoking master coprocessor postBalance()", ioe);
}
}
}

responseBuilder.setMovesExecuted(sucRPs.size());
responseBuilder.setMovesExecuted(sucRPs.size());
} finally {
this.balancer.onBalancingComplete();
}
}

// If LoadBalancer did not generate any plans, it means the cluster is already balanced.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -188,6 +188,14 @@ default void throttle(RegionPlan plan) throws Exception {
// noop
}

default void onBalancingStart() {
// noop
}

default void onBalancingComplete() {
// noop
}

/**
* @return true if Master carries regions
* @deprecated since 2.4.0, will be removed in 3.0.0.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,8 @@
import java.util.List;
import java.util.Map;
import java.util.Optional;
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicReference;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.ClusterMetrics;
import org.apache.hadoop.hbase.RegionMetrics;
Expand Down Expand Up @@ -64,13 +66,36 @@ public class CacheAwareLoadBalancer extends StochasticLoadBalancer {
private Long sleepTime;
private Configuration configuration;

/**
* Tracks whether a balance run is currently in progress.
*/
private final AtomicBoolean isBalancing = new AtomicBoolean(false);

/**
* Holds a configuration update that arrived while a balance run was in progress.
*/
private AtomicReference<Configuration> pendingConfiguration = new AtomicReference<>();

public enum GeneratorFunctionType {
LOAD,
CACHE_RATIO
}

@Override
public synchronized void loadConf(Configuration configuration) {
public void loadConf(Configuration configuration) {
// If balance is running, store configuration in pendingConfiguration and return immediately.
// Defer the config update.
if (isBalancing.get()) {
LOG.debug(
"Balance is in progress, defer applying configuration change until balance completed.");
pendingConfiguration.set(configuration);
} else {
// Apply configuration change immediately.
updateConfiguration(configuration);
}
}

public void updateConfiguration(Configuration configuration) {
this.configuration = configuration;
this.costFunctions = new ArrayList<>();
super.loadConf(configuration);
Expand All @@ -79,6 +104,38 @@ public synchronized void loadConf(Configuration configuration) {
sleepTime = configuration.getLong(MOVE_THROTTLING, MOVE_THROTTLING_DEFAULT.toMillis());
}

/**
* Sets {@link #isBalancing} to {@code true} before a balance run starts.
*/
@Override
public void onBalancingStart() {
LOG.debug("Setting isBalancing to true as balance is starting");
isBalancing.set(true);
}

/**
* Sets {@link #isBalancing} to {@code false} after a balance run completes and applies any
* pending configuration that arrived during balancing.
*/
@Override
public void onBalancingComplete() {
LOG.debug("Setting isBalancing to false as balance is completed");
isBalancing.set(false);
applyPendingConfiguration();
}

/**
* If a pending configuration was stored during a balance run, apply it and clear the pending
* reference.
*/
public void applyPendingConfiguration() {
Configuration toApply = pendingConfiguration.getAndSet(null);
if (toApply != null) {
LOG.info("Applying pending configuration after balance completed.");
updateConfiguration(toApply);
}
}

@Override
protected Map<Class<? extends CandidateGenerator>, CandidateGenerator>
createCandidateGenerators(Configuration conf) {
Expand Down Expand Up @@ -193,10 +250,13 @@ public void throttle(RegionPlan plan) {
+ "Throttling move for {}ms.",
plan.getRegionInfo().getEncodedName(), plan.getDestination(), sleepTime);
}
try {
Thread.sleep(sleepTime);
} catch (InterruptedException e) {
throw new RuntimeException(e);
synchronized (this) {
try {
// Release the monitor while waiting to avoid blocking other threads.
wait(sleepTime);
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Comment line here to explain its giving up monitor.

} catch (InterruptedException e) {
throw new RuntimeException(e);
}
}
}
}
Expand Down
Loading