hbase 源代码分析 (19) HMaster 启动负载均衡过程分析

流程
1)balancer由master启动
2)默认balancer 是StochasticLoadBalancer
public static Class<? extends LoadBalancer> getDefaultLoadBalancerClass() {
  return StochasticLoadBalancer.class;
}
3)balancer由AssignmentManager管理
this.assignmentManager = new AssignmentManager(this, serverManager,
  this.balancer, this.service, this.metricsMaster,
  this.tableLockManager);
4)集群状态守护进程ClusterStatusChore不断更新balancer里的集群信息。每1分钟一次
this.clusterStatusChore = new ClusterStatusChore(this, balancer);
protected void chore() {
  try {
    balancer.setClusterStatus(master.getClusterStatus());
  } catch (InterruptedIOException e) {
    LOG.warn("Ignoring interruption", e);
  }
}
5)balancer自己的守护进程BalancerChore
同样每5分钟调用一次均衡
public BalancerChore(HMaster master) {
  super(master.getServerName() + "-BalancerChore", master, master.getConfiguration().getInt(
    "hbase.balancer.period", 300000));
  this.master = master;
}

@Override
protected void chore() {
  try {
    master.balance();
  } catch (IOException e) {
    LOG.error("Failed to balance.", e);
  }
}

6)均衡如下:
   
   
  1. public boolean balance(boolean force) throws IOException {
  2. //此处有省略,主要是判断是否能进行balance,比如有Service dead 不能, 有新启动 ,不能
  3. int maximumBalanceTime = getBalancerCutoffTime();
  4. //获取每个表,对应的主机名,对应的HRegionInfo,一个表放在了哪些机器,然后这台机器又有多少个这个表的region。
  5. Map<TableName, Map<ServerName, List<HRegionInfo>>> assignmentsByTable =
  6. this.assignmentManager.getRegionStates().getAssignmentsByTable();
  7. List<RegionPlan> plans = new ArrayList<RegionPlan>();
  8. //Give the balancer the current cluster state.
  9. this.balancer.setClusterStatus(getClusterStatus());
  10.     //按照每个表生成计划。
  11. for (Entry<TableName, Map<ServerName, List<HRegionInfo>>> e : assignmentsByTable.entrySet()) {
  12. List<RegionPlan> partialPlans = this.balancer.balanceCluster(e.getKey(), e.getValue());
  13. if (partialPlans != null) plans.addAll(partialPlans);
  14. }
  15. long cutoffTime = System.currentTimeMillis() + maximumBalanceTime;
  16. int rpCount = 0; // number of RegionPlans balanced so far
  17. long totalRegPlanExecTime = 0;
  18. if (plans != null && !plans.isEmpty()) {
  19. for (RegionPlan plan: plans) {
  20. LOG.info("balance " + plan);
  21. long balStartTime = System.currentTimeMillis();
  22. //TODO: bulk assign
  23. //执行计划
  24. this.assignmentManager.balance(plan);
  25. }
  26. //.....
  27. .....
  28. }

7)生成计划过程
   
   
  1. @Override
  2. public synchronized List<RegionPlan> balanceCluster(Map<ServerName,
  3. List<HRegionInfo>> clusterState) {
  4. //首先移动一些本应该在Master主机的regionService上的表。或者本不应该在上面的需要移动
  5. //判断依据 master包含这个表,副本为0
  6. //return tablesOnMaster.contains(region.getTable().getNameAsString())
  7. // && region.getReplicaId() == HRegionInfo.DEFAULT_REPLICA_ID;
  1. List<RegionPlan> plans = balanceMasterRegions(clusterState);
  2. if (plans != null || clusterState == null || clusterState.size() <= 1) {
  3. return plans;
  4. }
  5. if (masterServerName != null && clusterState.containsKey(masterServerName)) {
  6. if (clusterState.size() <= 2) {
  7. return null;
  8. }
  9. clusterState = new HashMap<ServerName, List<HRegionInfo>>(clusterState);
  10. clusterState.remove(masterServerName);
  11. }
  12. RegionLocationFinder finder = null;
  13. if (this.localityCost != null && this.localityCost.getMultiplier() > 0) {
  14. finder = this.regionFinder;
  15. }
  16. //检查整个集群。
  17. Cluster cluster = new Cluster(clusterState, loads, finder, rackManager);
  18. //判断是否需要均衡。
  19. //判断依据,当前表在每个RegionService的region数目是否均衡。
  20. //float average = cs.getLoadAverage(); // for logging
    //int floor = (int) Math.floor(average * (1 - slop));
    //int ceiling = (int) Math.ceil(average * (1 + slop));
  21. if (!needsBalance(cluster)) {
  22. return null;
  23. }
  24. //既然需要移动,肯定需要计算价值,下面就是算价值
  25. long startTime = EnvironmentEdgeManager.currentTime();
  26. initCosts(cluster);
  27. double currentCost = computeCost(cluster, Double.MAX_VALUE);
  28. curOverallCost = currentCost;
  29. for (int i = 0; i < this.curFunctionCosts.length; i++) {
  30. curFunctionCosts[i] = tempFunctionCosts[i];
  31. }
  32. double initCost = currentCost;
  33. double newCost = currentCost;
  34. //如果要移动最大不超过 800步× numRegions×numServers
  35. long computedMaxSteps = Math.min(this.maxSteps,
  36. ((long)cluster.numRegions * (long)this.stepsPerRegion * (long)cluster.numServers));
  37. // Perform a stochastic walk to see if we can get a good fit.
  38. long step;
  39. for (step = 0; step < computedMaxSteps; step++) {
  40. int generatorIdx = RANDOM.nextInt(candidateGenerators.length);
  41. CandidateGenerator p = candidateGenerators[generatorIdx];
  42. Cluster.Action action = p.generate(cluster);
  43. if (action.type == Type.NULL) {
  44. continue;
  45. }
  46. cluster.doAction(action);
  47. updateCostsWithAction(cluster, action);
  48. newCost = computeCost(cluster, currentCost);
  49. // Should this be kept?
  50. if (newCost < currentCost) {
  51. currentCost = newCost;
  52. // save for JMX
  53. curOverallCost = currentCost;
  54. for (int i = 0; i < this.curFunctionCosts.length; i++) {
  55. curFunctionCosts[i] = tempFunctionCosts[i];
  56. }
  57. } else {
  58. // Put things back the way they were before.
  59. // TODO: undo by remembering old values
  60. Action undoAction = action.undoAction();
  61. cluster.doAction(undoAction);
  62. updateCostsWithAction(cluster, undoAction);
  63. }
  64. if (EnvironmentEdgeManager.currentTime() - startTime >
  65. maxRunningTime) {
  66. break;
  67. }
  68. }
  69. long endTime = EnvironmentEdgeManager.currentTime();
  70. metricsBalancer.balanceCluster(endTime - startTime);
  71. // update costs metrics
  72. updateStochasticCosts(tableName, curOverallCost, curFunctionCosts);
  73. //算完代价后会将移动计划写清楚
  74. if (initCost > currentCost) {
  75. plans = createRegionPlans(cluster);
  76. if (LOG.isDebugEnabled()) {
  77. LOG.debug("Finished computing new load balance plan. Computation took "
  78. + (endTime - startTime) + "ms to try " + step
  79. + " different iterations. Found a solution that moves "
  80. + plans.size() + " regions; Going from a computed cost of "
  81. + initCost + " to a new cost of " + currentCost);
  82. }
  83. return plans;
  84. }
  85. if (LOG.isDebugEnabled()) {
  86. LOG.debug("Could not find a better load balance plan. Tried "
  87. + step + " different configurations in " + (endTime - startTime)
  88. + "ms, and did not find anything with a computed cost less than " + initCost);
  89. }
  90. return null;
  91. }
8)执行计划
   
   
  1. public void assign(RegionState state,
  2. boolean setOfflineInZK, final boolean forceNewPlan) {
  3. long startTime = EnvironmentEdgeManager.currentTime();
  4. try {
  5. HRegionInfo region = state.getRegion();
  6. //更新状态
  7. currentState = regionStates.updateRegionState(region,
  8. State.PENDING_OPEN, plan.getDestination());
  9. boolean needNewPlan;
  10. final String assignMsg = "Failed assignment of " + region.getRegionNameAsString() +
  11. " to " + plan.getDestination();
  12. try {
  13. List<ServerName> favoredNodes = ServerName.EMPTY_SERVER_LIST;
  14. if (this.shouldAssignRegionsWithFavoredNodes) {
  15. favoredNodes = ((FavoredNodeLoadBalancer)this.balancer).getFavoredNodes(region);
  16. }
  17. //真正执行
  18. regionOpenState = serverManager.sendRegionOpen(
  19. plan.getDestination(), region, versionOfOfflineNode, favoredNodes);
  20. //失败需要在试一试
  21. if (regionOpenState == RegionOpeningState.FAILED_OPENING) {
  22. // Failed opening this region, looping again on a new server.
  23. needNewPlan = true;
  24. LOG.warn(assignMsg + ", regionserver says 'FAILED_OPENING', " +
  25. " trying to assign elsewhere instead; " +
  26. "try=" + i + " of " + this.maximumAttempts);
  27. } else {
  28. //完成了
  29. // we're done
  30. if (regionOpenState == RegionOpeningState.ALREADY_OPENED) {
  31. processAlreadyOpenedRegion(region, plan.getDestination());
  32. }
  33. return;
  34. }
  35. }
  36. // MetaRegion 需要一直等到,不能失败。
  37. if (i == this.maximumAttempts) {
  38. // For meta region, we have to keep retrying until succeeding
  39. if (region.isMetaRegion()) {
  40. waitForRetryingMetaAssignment();
  41. }
  42. else {
  43. // Don't reset the region state or get a new plan any more.
  44. // This is the last try.
  45. continue;
  46. }
  47. }
  48. if (needNewPlan) {
  49. RegionPlan newPlan = null;
  50. try {
  51. newPlan = getRegionPlan(region, true);
  52. } catch (HBaseIOException e) {
  53. LOG.warn("Failed to get region plan", e);
  54. }
  55. if (newPlan == null) {
  56. regionStates.updateRegionState(region, State.FAILED_OPEN);
  57. LOG.warn("Unable to find a viable location to assign region " +
  58. region.getRegionNameAsString());
  59. return;
  60. }
  61. if (plan != newPlan && !plan.getDestination().equals(newPlan.getDestination())) {
  62. // Clean out plan we failed execute and one that doesn't look like it'll
  63. // succeed anyways; we need a new plan!
  64. // Transition back to OFFLINE
  65. LOG.info("Region assignment plan changed from " + plan.getDestination() + " to "
  66. + newPlan.getDestination() + " server.");
  67. currentState = regionStates.updateRegionState(region, State.OFFLINE);
  68. versionOfOfflineNode = -1;
  69. if (useZKForAssignment) {
  70. setOfflineInZK = true;
  71. }
  72. plan = newPlan;
  73. } else if(plan.getDestination().equals(newPlan.getDestination()) &&
  74. previousException instanceof FailedServerException) {
  75. }
  76. }
  77. }
  78. // Run out of attempts
  79. regionStates.updateRegionState(region, State.FAILED_OPEN);
  80. } finally {
  81. metricsAssignmentManager.updateAssignmentTime(EnvironmentEdgeManager.currentTime() - startTime);
  82. }
  83. }
到此结束

评论
添加红包

请填写红包祝福语或标题

红包个数最小为10个

红包金额最低5元

当前余额3.43前往充值 >
需支付:10.00
成就一亿技术人!
领取后你会自动成为博主和红包主的粉丝 规则
hope_wisdom
发出的红包
实付
使用余额支付
点击重新获取
扫码支付
钱包余额 0

抵扣说明:

1.余额是钱包充值的虚拟货币,按照1:1的比例进行支付金额的抵扣。
2.余额无法直接购买下载,可以购买VIP、付费专栏及课程。

余额充值