001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase.util; 019 020import java.io.BufferedInputStream; 021import java.io.BufferedOutputStream; 022import java.io.Closeable; 023import java.io.DataInputStream; 024import java.io.DataOutputStream; 025import java.io.File; 026import java.io.FileInputStream; 027import java.io.FileOutputStream; 028import java.io.IOException; 029import java.nio.file.Files; 030import java.nio.file.Paths; 031import java.util.ArrayList; 032import java.util.Arrays; 033import java.util.Collections; 034import java.util.EnumSet; 035import java.util.HashSet; 036import java.util.Iterator; 037import java.util.List; 038import java.util.Locale; 039import java.util.Optional; 040import java.util.Set; 041import java.util.concurrent.Callable; 042import java.util.concurrent.CancellationException; 043import java.util.concurrent.ExecutionException; 044import java.util.concurrent.ExecutorService; 045import java.util.concurrent.Executors; 046import java.util.concurrent.Future; 047import java.util.concurrent.TimeUnit; 048import java.util.concurrent.TimeoutException; 049import java.util.function.Predicate; 050import org.apache.commons.io.IOUtils; 051import org.apache.hadoop.conf.Configuration; 052import org.apache.hadoop.hbase.ClusterMetrics.Option; 053import org.apache.hadoop.hbase.HBaseConfiguration; 054import org.apache.hadoop.hbase.HConstants; 055import org.apache.hadoop.hbase.HRegionLocation; 056import org.apache.hadoop.hbase.MetaTableAccessor; 057import org.apache.hadoop.hbase.ServerName; 058import org.apache.hadoop.hbase.UnknownRegionException; 059import org.apache.hadoop.hbase.client.Admin; 060import org.apache.hadoop.hbase.client.Connection; 061import org.apache.hadoop.hbase.client.ConnectionFactory; 062import org.apache.hadoop.hbase.client.DoNotRetryRegionException; 063import org.apache.hadoop.hbase.client.RegionInfo; 064import org.apache.hadoop.hbase.client.RegionInfoBuilder; 065import org.apache.hadoop.hbase.client.Result; 066import org.apache.hadoop.hbase.master.RackManager; 067import org.apache.hadoop.hbase.master.RegionState; 068import org.apache.hadoop.hbase.master.assignment.AssignmentManager; 069import org.apache.hadoop.hbase.zookeeper.MetaTableLocator; 070import org.apache.hadoop.hbase.zookeeper.ZKWatcher; 071import org.apache.hadoop.hbase.zookeeper.ZNodePaths; 072import org.apache.yetus.audience.InterfaceAudience; 073import org.slf4j.Logger; 074import org.slf4j.LoggerFactory; 075 076import org.apache.hbase.thirdparty.org.apache.commons.cli.CommandLine; 077import org.apache.hbase.thirdparty.org.apache.commons.collections4.CollectionUtils; 078 079/** 080 * Tool for loading/unloading regions to/from given regionserver This tool can be run from Command 081 * line directly as a utility. Supports Ack/No Ack mode for loading/unloading operations.Ack mode 082 * acknowledges if regions are online after movement while noAck mode is best effort mode that 083 * improves performance but will still move on if region is stuck/not moved. Motivation behind noAck 084 * mode being RS shutdown where even if a Region is stuck, upon shutdown master will move it 085 * anyways. This can also be used by constructiong an Object using the builder and then calling 086 * {@link #load()} or {@link #unload()} methods for the desired operations. 087 */ 088@InterfaceAudience.Public 089public class RegionMover extends AbstractHBaseTool implements Closeable { 090 public static final String MOVE_RETRIES_MAX_KEY = "hbase.move.retries.max"; 091 public static final String MOVE_WAIT_MAX_KEY = "hbase.move.wait.max"; 092 public static final String SERVERSTART_WAIT_MAX_KEY = "hbase.serverstart.wait.max"; 093 public static final int DEFAULT_MOVE_RETRIES_MAX = 5; 094 public static final int DEFAULT_MOVE_WAIT_MAX = 60; 095 public static final int DEFAULT_SERVERSTART_WAIT_MAX = 180; 096 097 private static final Logger LOG = LoggerFactory.getLogger(RegionMover.class); 098 099 private RegionMoverBuilder rmbuilder; 100 private boolean ack = true; 101 private int maxthreads = 1; 102 private int timeout; 103 private List<String> isolateRegionIdArray; 104 private String loadUnload; 105 private String hostname; 106 private String filename; 107 private String excludeFile; 108 private String designatedFile; 109 private int port; 110 private Connection conn; 111 private Admin admin; 112 private RackManager rackManager; 113 114 private RegionMover(RegionMoverBuilder builder) throws IOException { 115 this.hostname = builder.hostname; 116 this.filename = builder.filename; 117 this.excludeFile = builder.excludeFile; 118 this.designatedFile = builder.designatedFile; 119 this.maxthreads = builder.maxthreads; 120 this.isolateRegionIdArray = builder.isolateRegionIdArray; 121 this.ack = builder.ack; 122 this.port = builder.port; 123 this.timeout = builder.timeout; 124 setConf(builder.conf); 125 this.conn = ConnectionFactory.createConnection(conf); 126 this.admin = conn.getAdmin(); 127 // Only while running unit tests, builder.rackManager will not be null for the convenience of 128 // providing custom rackManager. Otherwise for regular workflow/user triggered action, 129 // builder.rackManager is supposed to be null. Hence, setter of builder.rackManager is 130 // provided as @InterfaceAudience.Private and it is commented that this is just 131 // to be used by unit test. 132 rackManager = builder.rackManager == null ? new RackManager(conf) : builder.rackManager; 133 } 134 135 private RegionMover() { 136 } 137 138 @Override 139 public void close() { 140 IOUtils.closeQuietly(this.admin, e -> LOG.warn("failed to close admin", e)); 141 IOUtils.closeQuietly(this.conn, e -> LOG.warn("failed to close conn", e)); 142 } 143 144 /** 145 * Builder for Region mover. Use the {@link #build()} method to create RegionMover object. Has 146 * {@link #filename(String)}, {@link #excludeFile(String)}, {@link #maxthreads(int)}, 147 * {@link #ack(boolean)}, {@link #timeout(int)}, {@link #designatedFile(String)} methods to set 148 * the corresponding options. 149 */ 150 public static class RegionMoverBuilder { 151 private boolean ack = true; 152 private int maxthreads = 1; 153 private int timeout = Integer.MAX_VALUE; 154 private List<String> isolateRegionIdArray = new ArrayList<>(); 155 private String hostname; 156 private String filename; 157 private String excludeFile = null; 158 private String designatedFile = null; 159 private String defaultDir = System.getProperty("java.io.tmpdir"); 160 @InterfaceAudience.Private 161 final int port; 162 private final Configuration conf; 163 private RackManager rackManager; 164 165 public RegionMoverBuilder(String hostname) { 166 this(hostname, createConf()); 167 } 168 169 /** 170 * Creates a new configuration and sets region mover specific overrides 171 */ 172 private static Configuration createConf() { 173 Configuration conf = HBaseConfiguration.create(); 174 conf.setInt("hbase.client.prefetch.limit", 1); 175 conf.setInt("hbase.client.pause", 500); 176 conf.setInt("hbase.client.retries.number", 100); 177 return conf; 178 } 179 180 /** 181 * @param hostname Hostname to unload regions from or load regions to. Can be either hostname or 182 * hostname:port. 183 * @param conf Configuration object 184 */ 185 public RegionMoverBuilder(String hostname, Configuration conf) { 186 String[] splitHostname = hostname.toLowerCase().split(":"); 187 this.hostname = splitHostname[0]; 188 if (splitHostname.length == 2) { 189 this.port = Integer.parseInt(splitHostname[1]); 190 } else { 191 this.port = conf.getInt(HConstants.REGIONSERVER_PORT, HConstants.DEFAULT_REGIONSERVER_PORT); 192 } 193 this.filename = defaultDir + File.separator + System.getProperty("user.name") + this.hostname 194 + ":" + Integer.toString(this.port); 195 this.conf = conf; 196 } 197 198 /** 199 * Path of file where regions will be written to during unloading/read from during loading 200 * @return RegionMoverBuilder object 201 */ 202 public RegionMoverBuilder filename(String filename) { 203 this.filename = filename; 204 return this; 205 } 206 207 /** 208 * Set the max number of threads that will be used to move regions 209 */ 210 public RegionMoverBuilder maxthreads(int threads) { 211 this.maxthreads = threads; 212 return this; 213 } 214 215 /** 216 * Set the region ID to isolate on the region server. 217 */ 218 public RegionMoverBuilder isolateRegionIdArray(List<String> isolateRegionIdArray) { 219 this.isolateRegionIdArray = isolateRegionIdArray; 220 return this; 221 } 222 223 /** 224 * Path of file containing hostnames to be excluded during region movement. Exclude file should 225 * have 'host:port' per line. Port is mandatory here as we can have many RS running on a single 226 * host. 227 */ 228 public RegionMoverBuilder excludeFile(String excludefile) { 229 this.excludeFile = excludefile; 230 return this; 231 } 232 233 /** 234 * Set the designated file. Designated file contains hostnames where region moves. Designated 235 * file should have 'host:port' per line. Port is mandatory here as we can have many RS running 236 * on a single host. 237 * @param designatedFile The designated file 238 * @return RegionMoverBuilder object 239 */ 240 public RegionMoverBuilder designatedFile(String designatedFile) { 241 this.designatedFile = designatedFile; 242 return this; 243 } 244 245 /** 246 * Set ack/noAck mode. 247 * <p> 248 * In ack mode regions are acknowledged before and after moving and the move is retried 249 * hbase.move.retries.max times, if unsuccessful we quit with exit code 1.No Ack mode is a best 250 * effort mode,each region movement is tried once.This can be used during graceful shutdown as 251 * even if we have a stuck region,upon shutdown it'll be reassigned anyway. 252 * <p> 253 * @return RegionMoverBuilder object 254 */ 255 public RegionMoverBuilder ack(boolean ack) { 256 this.ack = ack; 257 return this; 258 } 259 260 /** 261 * Set the timeout for Load/Unload operation in seconds.This is a global timeout,threadpool for 262 * movers also have a separate time which is hbase.move.wait.max * number of regions to 263 * load/unload 264 * @param timeout in seconds 265 * @return RegionMoverBuilder object 266 */ 267 public RegionMoverBuilder timeout(int timeout) { 268 this.timeout = timeout; 269 return this; 270 } 271 272 /** 273 * Set specific rackManager implementation. This setter method is for testing purpose only. 274 * @param rackManager rackManager impl 275 * @return RegionMoverBuilder object 276 */ 277 @InterfaceAudience.Private 278 public RegionMoverBuilder rackManager(RackManager rackManager) { 279 this.rackManager = rackManager; 280 return this; 281 } 282 283 /** 284 * This method builds the appropriate RegionMover object which can then be used to load/unload 285 * using load and unload methods 286 * @return RegionMover object 287 */ 288 public RegionMover build() throws IOException { 289 return new RegionMover(this); 290 } 291 } 292 293 /** 294 * Loads the specified {@link #hostname} with regions listed in the {@link #filename} RegionMover 295 * Object has to be created using {@link #RegionMover(RegionMoverBuilder)} 296 * @return true if loading succeeded, false otherwise 297 */ 298 public boolean load() throws ExecutionException, InterruptedException, TimeoutException { 299 ExecutorService loadPool = Executors.newFixedThreadPool(1); 300 Future<Boolean> loadTask = loadPool.submit(getMetaRegionMovePlan()); 301 boolean isMetaMoved = waitTaskToFinish(loadPool, loadTask, "loading"); 302 if (!isMetaMoved) { 303 return false; 304 } 305 loadPool = Executors.newFixedThreadPool(1); 306 loadTask = loadPool.submit(getNonMetaRegionsMovePlan()); 307 return waitTaskToFinish(loadPool, loadTask, "loading"); 308 } 309 310 private Callable<Boolean> getMetaRegionMovePlan() { 311 return getRegionsMovePlan(true); 312 } 313 314 private Callable<Boolean> getNonMetaRegionsMovePlan() { 315 return getRegionsMovePlan(false); 316 } 317 318 private Callable<Boolean> getRegionsMovePlan(boolean moveMetaRegion) { 319 return () -> { 320 try { 321 List<RegionInfo> regionsToMove = readRegionsFromFile(filename); 322 if (regionsToMove.isEmpty()) { 323 LOG.info("No regions to load.Exiting"); 324 return true; 325 } 326 Optional<RegionInfo> metaRegion = getMetaRegionInfoIfToBeMoved(regionsToMove); 327 if (moveMetaRegion) { 328 if (metaRegion.isPresent()) { 329 loadRegions(Collections.singletonList(metaRegion.get())); 330 } 331 } else { 332 metaRegion.ifPresent(regionsToMove::remove); 333 loadRegions(regionsToMove); 334 } 335 } catch (Exception e) { 336 LOG.error("Error while loading regions to " + hostname, e); 337 return false; 338 } 339 return true; 340 }; 341 } 342 343 private Optional<RegionInfo> getMetaRegionInfoIfToBeMoved(List<RegionInfo> regionsToMove) { 344 return regionsToMove.stream().filter(RegionInfo::isMetaRegion).findFirst(); 345 } 346 347 private void loadRegions(List<RegionInfo> regionsToMove) throws Exception { 348 ServerName server = getTargetServer(); 349 List<RegionInfo> movedRegions = Collections.synchronizedList(new ArrayList<>()); 350 LOG.info("Moving " + regionsToMove.size() + " regions to " + server + " using " 351 + this.maxthreads + " threads.Ack mode:" + this.ack); 352 353 final ExecutorService moveRegionsPool = Executors.newFixedThreadPool(this.maxthreads); 354 List<Future<Boolean>> taskList = new ArrayList<>(); 355 int counter = 0; 356 while (counter < regionsToMove.size()) { 357 RegionInfo region = regionsToMove.get(counter); 358 ServerName currentServer = MoveWithAck.getServerNameForRegion(region, admin, conn); 359 if (currentServer == null) { 360 LOG 361 .warn("Could not get server for Region:" + region.getRegionNameAsString() + " moving on"); 362 counter++; 363 continue; 364 } else if (server.equals(currentServer)) { 365 LOG.info( 366 "Region " + region.getRegionNameAsString() + " is already on target server=" + server); 367 counter++; 368 continue; 369 } 370 if (ack) { 371 Future<Boolean> task = moveRegionsPool 372 .submit(new MoveWithAck(conn, region, currentServer, server, movedRegions)); 373 taskList.add(task); 374 } else { 375 Future<Boolean> task = moveRegionsPool 376 .submit(new MoveWithoutAck(admin, region, currentServer, server, movedRegions)); 377 taskList.add(task); 378 } 379 counter++; 380 } 381 382 moveRegionsPool.shutdown(); 383 long timeoutInSeconds = regionsToMove.size() 384 * admin.getConfiguration().getLong(MOVE_WAIT_MAX_KEY, DEFAULT_MOVE_WAIT_MAX); 385 waitMoveTasksToFinish(moveRegionsPool, taskList, timeoutInSeconds); 386 } 387 388 /** 389 * Unload regions from given {@link #hostname} using ack/noAck mode and {@link #maxthreads}.In 390 * noAck mode we do not make sure that region is successfully online on the target region 391 * server,hence it is best effort.We do not unload regions to hostnames given in 392 * {@link #excludeFile}. If designatedFile is present with some contents, we will unload regions 393 * to hostnames provided in {@link #designatedFile} 394 * @return true if unloading succeeded, false otherwise 395 */ 396 public boolean unload() throws InterruptedException, ExecutionException, TimeoutException { 397 return unloadRegions(false); 398 } 399 400 /** 401 * Unload regions from given {@link #hostname} using ack/noAck mode and {@link #maxthreads}.In 402 * noAck mode we do not make sure that region is successfully online on the target region 403 * server,hence it is best effort.We do not unload regions to hostnames given in 404 * {@link #excludeFile}. If designatedFile is present with some contents, we will unload regions 405 * to hostnames provided in {@link #designatedFile}. While unloading regions, destination 406 * RegionServers are selected from different rack i.e regions should not move to any RegionServers 407 * that belong to same rack as source RegionServer. 408 * @return true if unloading succeeded, false otherwise 409 */ 410 public boolean unloadFromRack() 411 throws InterruptedException, ExecutionException, TimeoutException { 412 return unloadRegions(true); 413 } 414 415 private boolean unloadRegions(boolean unloadFromRack) 416 throws ExecutionException, InterruptedException, TimeoutException { 417 return unloadRegions(unloadFromRack, null); 418 } 419 420 /** 421 * Isolated regions specified in {@link #isolateRegionIdArray} on {@link #hostname} in ack Mode 422 * and Unload regions from given {@link #hostname} using ack/noAck mode and {@link #maxthreads}. 423 * In noAck mode we do not make sure that region is successfully online on the target region 424 * server,hence it is the best effort. We do not unload regions to hostnames given in 425 * {@link #excludeFile}. If designatedFile is present with some contents, we will unload regions 426 * to hostnames provided in {@link #designatedFile} 427 * @return true if region isolation succeeded, false otherwise 428 */ 429 public boolean isolateRegions() 430 throws ExecutionException, InterruptedException, TimeoutException { 431 return unloadRegions(false, isolateRegionIdArray); 432 } 433 434 private boolean unloadRegions(boolean unloadFromRack, List<String> isolateRegionIdArray) 435 throws InterruptedException, ExecutionException, TimeoutException { 436 deleteFile(this.filename); 437 ExecutorService unloadPool = Executors.newFixedThreadPool(1); 438 Future<Boolean> unloadTask = unloadPool.submit(() -> { 439 List<RegionInfo> movedRegions = Collections.synchronizedList(new ArrayList<>()); 440 try { 441 // Get Online RegionServers 442 List<ServerName> regionServers = new ArrayList<>(); 443 regionServers.addAll(admin.getRegionServers()); 444 // Remove the host Region server from target Region Servers list 445 ServerName server = stripServer(regionServers, hostname, port); 446 if (server == null) { 447 LOG.info("Could not find server '{}:{}' in the set of region servers. giving up.", 448 hostname, port); 449 LOG.debug("List of region servers: {}", regionServers); 450 return false; 451 } 452 // Remove RS not present in the designated file 453 includeExcludeRegionServers(designatedFile, regionServers, true); 454 455 // Remove RS present in the exclude file 456 includeExcludeRegionServers(excludeFile, regionServers, false); 457 458 if (unloadFromRack) { 459 // remove regionServers that belong to same rack (as source host) since the goal is to 460 // unload regions from source regionServer to destination regionServers 461 // that belong to different rack only. 462 String sourceRack = rackManager.getRack(server); 463 List<String> racks = rackManager.getRack(regionServers); 464 Iterator<ServerName> iterator = regionServers.iterator(); 465 int i = 0; 466 while (iterator.hasNext()) { 467 iterator.next(); 468 if (racks.size() > i && racks.get(i) != null && racks.get(i).equals(sourceRack)) { 469 iterator.remove(); 470 } 471 i++; 472 } 473 } 474 475 // Remove decommissioned RS 476 Set<ServerName> decommissionedRS = new HashSet<>(admin.listDecommissionedRegionServers()); 477 if (CollectionUtils.isNotEmpty(decommissionedRS)) { 478 regionServers.removeIf(decommissionedRS::contains); 479 LOG.debug("Excluded RegionServers from unloading regions to because they " 480 + "are marked as decommissioned. Servers: {}", decommissionedRS); 481 } 482 483 stripMaster(regionServers); 484 if (regionServers.isEmpty()) { 485 LOG.warn("No Regions were moved - no servers available"); 486 return false; 487 } 488 unloadRegions(server, regionServers, movedRegions, isolateRegionIdArray); 489 } catch (Exception e) { 490 LOG.error("Error while unloading regions ", e); 491 return false; 492 } finally { 493 if (movedRegions != null) { 494 writeFile(filename, movedRegions); 495 } 496 } 497 return true; 498 }); 499 return waitTaskToFinish(unloadPool, unloadTask, "unloading"); 500 } 501 502 private void unloadRegions(ServerName server, List<ServerName> regionServers, 503 List<RegionInfo> movedRegions, List<String> isolateRegionIdArray) throws Exception { 504 while (true) { 505 List<RegionInfo> isolateRegionInfoList = Collections.synchronizedList(new ArrayList<>()); 506 RegionInfo isolateRegionInfo = null; 507 if (isolateRegionIdArray != null && !isolateRegionIdArray.isEmpty()) { 508 // Region will be moved to target region server with Ack mode. 509 final ExecutorService isolateRegionPool = Executors.newFixedThreadPool(maxthreads); 510 List<Future<Boolean>> isolateRegionTaskList = new ArrayList<>(); 511 List<RegionInfo> recentlyIsolatedRegion = Collections.synchronizedList(new ArrayList<>()); 512 boolean allRegionOpsSuccessful = true; 513 boolean isMetaIsolated = false; 514 RegionInfo metaRegionInfo = RegionInfoBuilder.FIRST_META_REGIONINFO; 515 List<HRegionLocation> hRegionLocationRegionIsolation = 516 Collections.synchronizedList(new ArrayList<>()); 517 for (String isolateRegionId : isolateRegionIdArray) { 518 if (isolateRegionId.equalsIgnoreCase(metaRegionInfo.getEncodedName())) { 519 isMetaIsolated = true; 520 continue; 521 } 522 Result result = MetaTableAccessor.scanByRegionEncodedName(conn, isolateRegionId); 523 HRegionLocation hRegionLocation = 524 MetaTableAccessor.getRegionLocation(conn, result.getRow()); 525 if (hRegionLocation != null) { 526 hRegionLocationRegionIsolation.add(hRegionLocation); 527 } else { 528 LOG.error("Region " + isolateRegionId + " doesn't exists/can't fetch from" 529 + " meta...Quitting now"); 530 // We only move the regions if all the regions were found. 531 allRegionOpsSuccessful = false; 532 break; 533 } 534 } 535 536 if (!allRegionOpsSuccessful) { 537 break; 538 } 539 // If hbase:meta region was isolated, then it needs to be part of isolateRegionInfoList. 540 if (isMetaIsolated) { 541 ZKWatcher zkWatcher = new ZKWatcher(conf, null, null); 542 List<HRegionLocation> result = new ArrayList<>(); 543 for (String znode : zkWatcher.getMetaReplicaNodes()) { 544 String path = ZNodePaths.joinZNode(zkWatcher.getZNodePaths().baseZNode, znode); 545 int replicaId = zkWatcher.getZNodePaths().getMetaReplicaIdFromPath(path); 546 RegionState state = MetaTableLocator.getMetaRegionState(zkWatcher, replicaId); 547 result.add(new HRegionLocation(state.getRegion(), state.getServerName())); 548 } 549 ServerName metaSeverName = result.get(0).getServerName(); 550 // For isolating hbase:meta, it should move explicitly in Ack mode, 551 // hence the forceMoveRegionByAck = true. 552 if (!metaSeverName.equals(server)) { 553 LOG.info("Region of hbase:meta " + metaRegionInfo.getEncodedName() + " is on server " 554 + metaSeverName + " moving to " + server); 555 submitRegionMovesWhileUnloading(metaSeverName, Collections.singletonList(server), 556 movedRegions, Collections.singletonList(metaRegionInfo), true); 557 } else { 558 LOG.info("Region of hbase:meta " + metaRegionInfo.getEncodedName() + " already exists" 559 + " on server : " + server); 560 } 561 isolateRegionInfoList.add(RegionInfoBuilder.FIRST_META_REGIONINFO); 562 } 563 564 if (!hRegionLocationRegionIsolation.isEmpty()) { 565 for (HRegionLocation hRegionLocation : hRegionLocationRegionIsolation) { 566 isolateRegionInfo = hRegionLocation.getRegion(); 567 isolateRegionInfoList.add(isolateRegionInfo); 568 if (hRegionLocation.getServerName() == server) { 569 LOG.info("Region " + hRegionLocation.getRegion().getEncodedName() + " already exists" 570 + " on server : " + server.getHostname()); 571 } else { 572 Future<Boolean> isolateRegionTask = 573 isolateRegionPool.submit(new MoveWithAck(conn, isolateRegionInfo, 574 hRegionLocation.getServerName(), server, recentlyIsolatedRegion)); 575 isolateRegionTaskList.add(isolateRegionTask); 576 } 577 } 578 } 579 580 if (!isolateRegionTaskList.isEmpty()) { 581 isolateRegionPool.shutdown(); 582 // Now that we have fetched all the region's regionInfo, we can move them. 583 waitMoveTasksToFinish(isolateRegionPool, isolateRegionTaskList, 584 admin.getConfiguration().getLong(MOVE_WAIT_MAX_KEY, DEFAULT_MOVE_WAIT_MAX)); 585 586 Set<RegionInfo> currentRegionsOnTheServer = new HashSet<>(admin.getRegions(server)); 587 if (!currentRegionsOnTheServer.containsAll(isolateRegionInfoList)) { 588 // If all the regions are not online on the target server, 589 // we don't put RS in decommission mode and exit from here. 590 LOG.error("One of the Region move failed OR stuck in transition...Quitting now"); 591 break; 592 } 593 } else { 594 LOG.info("All regions already exists on server : " + server.getHostname()); 595 } 596 // Once region has been moved to target RS, put the target RS into decommission mode, 597 // so master doesn't assign new region to the target RS while we unload the target RS. 598 // Also pass 'offload' flag as false since we don't want master to offload the target RS. 599 List<ServerName> listOfServer = new ArrayList<>(); 600 listOfServer.add(server); 601 LOG.info("Putting server : " + server.getHostname() + " in decommission/draining mode"); 602 admin.decommissionRegionServers(listOfServer, false); 603 } 604 List<RegionInfo> regionsToMove = admin.getRegions(server); 605 // Remove all the regions from the online Region list, that we just isolated. 606 // This will also include hbase:meta if it was isolated. 607 regionsToMove.removeAll(isolateRegionInfoList); 608 regionsToMove.removeAll(movedRegions); 609 if (regionsToMove.isEmpty()) { 610 LOG.info("No Regions to move....Quitting now"); 611 break; 612 } 613 LOG.info("Moving {} regions from {} to {} servers using {} threads .Ack Mode: {}", 614 regionsToMove.size(), this.hostname, regionServers.size(), this.maxthreads, ack); 615 616 Optional<RegionInfo> metaRegion = getMetaRegionInfoIfToBeMoved(regionsToMove); 617 if (metaRegion.isPresent()) { 618 RegionInfo meta = metaRegion.get(); 619 // hbase:meta should move explicitly in Ack mode. 620 submitRegionMovesWhileUnloading(server, regionServers, movedRegions, 621 Collections.singletonList(meta), true); 622 regionsToMove.remove(meta); 623 } 624 submitRegionMovesWhileUnloading(server, regionServers, movedRegions, regionsToMove, false); 625 } 626 } 627 628 private void submitRegionMovesWhileUnloading(ServerName server, List<ServerName> regionServers, 629 List<RegionInfo> movedRegions, List<RegionInfo> regionsToMove, boolean forceMoveRegionByAck) 630 throws Exception { 631 final ExecutorService moveRegionsPool = Executors.newFixedThreadPool(this.maxthreads); 632 List<Future<Boolean>> taskList = new ArrayList<>(); 633 int serverIndex = 0; 634 for (RegionInfo regionToMove : regionsToMove) { 635 // To move/isolate hbase:meta on a server, it should happen explicitly by Ack mode, hence the 636 // forceMoveRegionByAck = true. 637 if (ack || forceMoveRegionByAck) { 638 Future<Boolean> task = moveRegionsPool.submit(new MoveWithAck(conn, regionToMove, server, 639 regionServers.get(serverIndex), movedRegions)); 640 taskList.add(task); 641 } else { 642 Future<Boolean> task = moveRegionsPool.submit(new MoveWithoutAck(admin, regionToMove, 643 server, regionServers.get(serverIndex), movedRegions)); 644 taskList.add(task); 645 } 646 serverIndex = (serverIndex + 1) % regionServers.size(); 647 } 648 moveRegionsPool.shutdown(); 649 long timeoutInSeconds = regionsToMove.size() 650 * admin.getConfiguration().getLong(MOVE_WAIT_MAX_KEY, DEFAULT_MOVE_WAIT_MAX); 651 waitMoveTasksToFinish(moveRegionsPool, taskList, timeoutInSeconds); 652 } 653 654 private boolean waitTaskToFinish(ExecutorService pool, Future<Boolean> task, String operation) 655 throws TimeoutException, InterruptedException, ExecutionException { 656 pool.shutdown(); 657 try { 658 if (!pool.awaitTermination((long) this.timeout, TimeUnit.SECONDS)) { 659 LOG.warn("Timed out before finishing the " + operation + " operation. Timeout: " 660 + this.timeout + "sec"); 661 pool.shutdownNow(); 662 } 663 } catch (InterruptedException e) { 664 pool.shutdownNow(); 665 Thread.currentThread().interrupt(); 666 } 667 try { 668 return task.get(5, TimeUnit.SECONDS); 669 } catch (InterruptedException e) { 670 LOG.warn("Interrupted while " + operation + " Regions on " + this.hostname, e); 671 throw e; 672 } catch (ExecutionException e) { 673 LOG.error("Error while " + operation + " regions on RegionServer " + this.hostname, e); 674 throw e; 675 } 676 } 677 678 private void waitMoveTasksToFinish(ExecutorService moveRegionsPool, 679 List<Future<Boolean>> taskList, long timeoutInSeconds) throws Exception { 680 try { 681 if (!moveRegionsPool.awaitTermination(timeoutInSeconds, TimeUnit.SECONDS)) { 682 moveRegionsPool.shutdownNow(); 683 } 684 } catch (InterruptedException e) { 685 moveRegionsPool.shutdownNow(); 686 Thread.currentThread().interrupt(); 687 } 688 for (Future<Boolean> future : taskList) { 689 try { 690 // if even after shutdownNow threads are stuck we wait for 5 secs max 691 if (!future.get(5, TimeUnit.SECONDS)) { 692 LOG.error("Was Not able to move region....Exiting Now"); 693 throw new Exception("Could not move region Exception"); 694 } 695 } catch (InterruptedException e) { 696 LOG.error("Interrupted while waiting for Thread to Complete " + e.getMessage(), e); 697 throw e; 698 } catch (ExecutionException e) { 699 boolean ignoreFailure = ignoreRegionMoveFailure(e); 700 if (ignoreFailure) { 701 LOG.debug("Ignore region move failure, it might have been split/merged.", e); 702 } else { 703 LOG.error("Got Exception From Thread While moving region {}", e.getMessage(), e); 704 throw e; 705 } 706 } catch (CancellationException e) { 707 LOG.error("Thread for moving region cancelled. Timeout for cancellation:" + timeoutInSeconds 708 + "secs", e); 709 throw e; 710 } 711 } 712 } 713 714 private boolean ignoreRegionMoveFailure(ExecutionException e) { 715 boolean ignoreFailure = false; 716 if (e.getCause() instanceof UnknownRegionException) { 717 // region does not exist anymore 718 ignoreFailure = true; 719 } else if ( 720 e.getCause() instanceof DoNotRetryRegionException && e.getCause().getMessage() != null 721 && e.getCause().getMessage() 722 .contains(AssignmentManager.UNEXPECTED_STATE_REGION + "state=SPLIT,") 723 ) { 724 // region is recently split 725 ignoreFailure = true; 726 } 727 return ignoreFailure; 728 } 729 730 private ServerName getTargetServer() throws Exception { 731 ServerName server = null; 732 int maxWaitInSeconds = 733 admin.getConfiguration().getInt(SERVERSTART_WAIT_MAX_KEY, DEFAULT_SERVERSTART_WAIT_MAX); 734 long maxWait = EnvironmentEdgeManager.currentTime() + maxWaitInSeconds * 1000; 735 while (EnvironmentEdgeManager.currentTime() < maxWait) { 736 try { 737 List<ServerName> regionServers = new ArrayList<>(); 738 regionServers.addAll(admin.getRegionServers()); 739 // Remove the host Region server from target Region Servers list 740 server = stripServer(regionServers, hostname, port); 741 if (server != null) { 742 break; 743 } else { 744 LOG.warn("Server " + hostname + ":" + port + " is not up yet, waiting"); 745 } 746 } catch (IOException e) { 747 LOG.warn("Could not get list of region servers", e); 748 } 749 Thread.sleep(500); 750 } 751 if (server == null) { 752 LOG.error("Server " + hostname + ":" + port + " is not up. Giving up."); 753 throw new Exception("Server " + hostname + ":" + port + " to load regions not online"); 754 } 755 return server; 756 } 757 758 private List<RegionInfo> readRegionsFromFile(String filename) throws IOException { 759 List<RegionInfo> regions = new ArrayList<>(); 760 File f = new File(filename); 761 if (!f.exists()) { 762 return regions; 763 } 764 try ( 765 DataInputStream dis = new DataInputStream(new BufferedInputStream(new FileInputStream(f)))) { 766 int numRegions = dis.readInt(); 767 int index = 0; 768 while (index < numRegions) { 769 regions.add(RegionInfo.parseFromOrNull(Bytes.readByteArray(dis))); 770 index++; 771 } 772 } catch (IOException e) { 773 LOG.error("Error while reading regions from file:" + filename, e); 774 throw e; 775 } 776 return regions; 777 } 778 779 /** 780 * Write the number of regions moved in the first line followed by regions moved in subsequent 781 * lines 782 */ 783 private void writeFile(String filename, List<RegionInfo> movedRegions) throws IOException { 784 try (DataOutputStream dos = 785 new DataOutputStream(new BufferedOutputStream(new FileOutputStream(filename)))) { 786 dos.writeInt(movedRegions.size()); 787 for (RegionInfo region : movedRegions) { 788 Bytes.writeByteArray(dos, RegionInfo.toByteArray(region)); 789 } 790 } catch (IOException e) { 791 LOG.error("ERROR: Was Not able to write regions moved to output file but moved " 792 + movedRegions.size() + " regions", e); 793 throw e; 794 } 795 } 796 797 private void deleteFile(String filename) { 798 File f = new File(filename); 799 if (f.exists()) { 800 f.delete(); 801 } 802 } 803 804 /** 805 * @param filename The file should have 'host:port' per line 806 * @return List of servers from the file in format 'hostname:port'. 807 */ 808 private List<String> readServersFromFile(String filename) throws IOException { 809 List<String> servers = new ArrayList<>(); 810 if (filename != null) { 811 try { 812 Files.readAllLines(Paths.get(filename)).stream().map(String::trim) 813 .filter(((Predicate<String>) String::isEmpty).negate()).map(String::toLowerCase) 814 .forEach(servers::add); 815 } catch (IOException e) { 816 LOG.error("Exception while reading servers from file,", e); 817 throw e; 818 } 819 } 820 return servers; 821 } 822 823 /** 824 * Designates or excludes the servername whose hostname and port portion matches the list given in 825 * the file. Example:<br> 826 * If you want to designated RSs, suppose designatedFile has RS1, regionServers has RS1, RS2 and 827 * RS3. When we call includeExcludeRegionServers(designatedFile, regionServers, true), RS2 and RS3 828 * are removed from regionServers list so that regions can move to only RS1. If you want to 829 * exclude RSs, suppose excludeFile has RS1, regionServers has RS1, RS2 and RS3. When we call 830 * includeExcludeRegionServers(excludeFile, servers, false), RS1 is removed from regionServers 831 * list so that regions can move to only RS2 and RS3. 832 */ 833 private void includeExcludeRegionServers(String fileName, List<ServerName> regionServers, 834 boolean isInclude) throws IOException { 835 if (fileName != null) { 836 List<String> servers = readServersFromFile(fileName); 837 if (servers.isEmpty()) { 838 LOG.warn("No servers provided in the file: {}." + fileName); 839 return; 840 } 841 Iterator<ServerName> i = regionServers.iterator(); 842 while (i.hasNext()) { 843 String rs = i.next().getServerName(); 844 String rsPort = rs.split(ServerName.SERVERNAME_SEPARATOR)[0].toLowerCase() + ":" 845 + rs.split(ServerName.SERVERNAME_SEPARATOR)[1]; 846 if (isInclude != servers.contains(rsPort)) { 847 i.remove(); 848 } 849 } 850 } 851 } 852 853 /** 854 * Exclude master from list of RSs to move regions to 855 */ 856 private void stripMaster(List<ServerName> regionServers) throws IOException { 857 ServerName master = admin.getClusterMetrics(EnumSet.of(Option.MASTER)).getMasterName(); 858 stripServer(regionServers, master.getHostname(), master.getPort()); 859 } 860 861 /** 862 * Remove the servername whose hostname and port portion matches from the passed array of servers. 863 * Returns as side-effect the servername removed. 864 * @return server removed from list of Region Servers 865 */ 866 private ServerName stripServer(List<ServerName> regionServers, String hostname, int port) { 867 for (Iterator<ServerName> iter = regionServers.iterator(); iter.hasNext();) { 868 ServerName server = iter.next(); 869 if ( 870 server.getAddress().getHostName().equalsIgnoreCase(hostname) 871 && server.getAddress().getPort() == port 872 ) { 873 iter.remove(); 874 return server; 875 } 876 } 877 return null; 878 } 879 880 @Override 881 protected void addOptions() { 882 this.addRequiredOptWithArg("r", "regionserverhost", "region server <hostname>|<hostname:port>"); 883 this.addRequiredOptWithArg("o", "operation", 884 "Expected: load/unload/unload_from_rack/isolate_regions"); 885 this.addOptWithArg("m", "maxthreads", 886 "Define the maximum number of threads to use to unload and reload the regions"); 887 this.addOptWithArg("i", "isolateRegionIds", 888 "Comma separated list of Region IDs hash to isolate on a RegionServer and put region server" 889 + " in draining mode. This option should only be used with '-o isolate_regions'." 890 + " By putting region server in decommission/draining mode, master can't assign any" 891 + " new region on this server. If one or more regions are not found OR failed to isolate" 892 + " successfully, utility will exist without putting RS in draining/decommission mode." 893 + " Ex. --isolateRegionIds id1,id2,id3 OR -i id1,id2,id3"); 894 this.addOptWithArg("x", "excludefile", 895 "File with <hostname:port> per line to exclude as unload targets; default excludes only " 896 + "target host; useful for rack decommisioning."); 897 this.addOptWithArg("d", "designatedfile", 898 "File with <hostname:port> per line as unload targets;" + "default is all online hosts"); 899 this.addOptWithArg("f", "filename", 900 "File to save regions list into unloading, or read from loading; " 901 + "default /tmp/<usernamehostname:port>"); 902 this.addOptNoArg("n", "noack", 903 "Turn on No-Ack mode(default: false) which won't check if region is online on target " 904 + "RegionServer, hence best effort. This is more performant in unloading and loading " 905 + "but might lead to region being unavailable for some time till master reassigns it " 906 + "in case the move failed"); 907 this.addOptWithArg("t", "timeout", "timeout in seconds after which the tool will exit " 908 + "irrespective of whether it finished or not;default Integer.MAX_VALUE"); 909 } 910 911 @Override 912 protected void processOptions(CommandLine cmd) { 913 String hostname = cmd.getOptionValue("r"); 914 rmbuilder = new RegionMoverBuilder(hostname); 915 this.loadUnload = cmd.getOptionValue("o").toLowerCase(Locale.ROOT); 916 if (cmd.hasOption('m')) { 917 rmbuilder.maxthreads(Integer.parseInt(cmd.getOptionValue('m'))); 918 } 919 if (this.loadUnload.equals("isolate_regions") && cmd.hasOption("isolateRegionIds")) { 920 rmbuilder 921 .isolateRegionIdArray(Arrays.asList(cmd.getOptionValue("isolateRegionIds").split(","))); 922 } 923 if (cmd.hasOption('n')) { 924 rmbuilder.ack(false); 925 } 926 if (cmd.hasOption('f')) { 927 rmbuilder.filename(cmd.getOptionValue('f')); 928 } 929 if (cmd.hasOption('x')) { 930 rmbuilder.excludeFile(cmd.getOptionValue('x')); 931 } 932 if (cmd.hasOption('d')) { 933 rmbuilder.designatedFile(cmd.getOptionValue('d')); 934 } 935 if (cmd.hasOption('t')) { 936 rmbuilder.timeout(Integer.parseInt(cmd.getOptionValue('t'))); 937 } 938 this.loadUnload = cmd.getOptionValue("o").toLowerCase(Locale.ROOT); 939 } 940 941 @Override 942 protected int doWork() throws Exception { 943 boolean success; 944 try (RegionMover rm = rmbuilder.build()) { 945 if (loadUnload.equalsIgnoreCase("load")) { 946 success = rm.load(); 947 } else if (loadUnload.equalsIgnoreCase("unload")) { 948 success = rm.unload(); 949 } else if (loadUnload.equalsIgnoreCase("unload_from_rack")) { 950 success = rm.unloadFromRack(); 951 } else if (loadUnload.equalsIgnoreCase("isolate_regions")) { 952 if (rm.isolateRegionIdArray != null && !rm.isolateRegionIdArray.isEmpty()) { 953 success = rm.isolateRegions(); 954 } else { 955 LOG.error("Missing -i/--isolate_regions option with '-o isolate_regions' option"); 956 LOG.error("Use -h or --help for usage instructions"); 957 printUsage(); 958 success = false; 959 } 960 } else { 961 printUsage(); 962 success = false; 963 } 964 } 965 return (success ? 0 : 1); 966 } 967 968 public static void main(String[] args) { 969 try (RegionMover mover = new RegionMover()) { 970 mover.doStaticMain(args); 971 } 972 } 973}