001/* 002 * Licensed to the Apache Software Foundation (ASF) under one 003 * or more contributor license agreements. See the NOTICE file 004 * distributed with this work for additional information 005 * regarding copyright ownership. The ASF licenses this file 006 * to you under the Apache License, Version 2.0 (the 007 * "License"); you may not use this file except in compliance 008 * with the License. You may obtain a copy of the License at 009 * 010 * http://www.apache.org/licenses/LICENSE-2.0 011 * 012 * Unless required by applicable law or agreed to in writing, software 013 * distributed under the License is distributed on an "AS IS" BASIS, 014 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 015 * See the License for the specific language governing permissions and 016 * limitations under the License. 017 */ 018package org.apache.hadoop.hbase; 019 020import java.io.Closeable; 021import java.io.IOException; 022import org.apache.hadoop.conf.Configurable; 023import org.apache.hadoop.conf.Configuration; 024import org.apache.hadoop.hbase.client.RegionInfoBuilder; 025import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; 026import org.apache.hadoop.hbase.util.Threads; 027import org.apache.yetus.audience.InterfaceAudience; 028import org.slf4j.Logger; 029import org.slf4j.LoggerFactory; 030 031import org.apache.hadoop.hbase.shaded.protobuf.generated.AdminProtos.AdminService; 032import org.apache.hadoop.hbase.shaded.protobuf.generated.ClientProtos.ClientService; 033import org.apache.hadoop.hbase.shaded.protobuf.generated.MasterProtos.MasterService; 034 035/** 036 * This class defines methods that can help with managing HBase clusters from unit tests and system 037 * tests. There are 3 types of cluster deployments: 038 * <ul> 039 * <li><b>MiniHBaseCluster:</b> each server is run in the same JVM in separate threads, used by unit 040 * tests</li> 041 * <li><b>DistributedHBaseCluster:</b> the cluster is pre-deployed, system and integration tests can 042 * interact with the cluster.</li> 043 * <li><b>ProcessBasedLocalHBaseCluster:</b> each server is deployed locally but in separate JVMs. 044 * </li> 045 * </ul> 046 * <p> 047 * HBaseCluster unifies the way tests interact with the cluster, so that the same test can be run 048 * against a mini-cluster during unit test execution, or a distributed cluster having tens/hundreds 049 * of nodes during execution of integration tests. 050 * <p> 051 * HBaseCluster exposes client-side public interfaces to tests, so that tests does not assume 052 * running in a particular mode. Not all the tests are suitable to be run on an actual cluster, and 053 * some tests will still need to mock stuff and introspect internal state. For those use cases from 054 * unit tests, or if more control is needed, you can use the subclasses directly. In that sense, 055 * this class does not abstract away <strong>every</strong> interface that MiniHBaseCluster or 056 * DistributedHBaseCluster provide. 057 */ 058@InterfaceAudience.Public 059public abstract class HBaseCluster implements Closeable, Configurable { 060 // Log is being used in DistributedHBaseCluster class, hence keeping it as package scope 061 static final Logger LOG = LoggerFactory.getLogger(HBaseCluster.class.getName()); 062 protected Configuration conf; 063 064 /** the status of the cluster before we begin */ 065 protected ClusterMetrics initialClusterStatus; 066 067 /** 068 * Construct an HBaseCluster 069 * @param conf Configuration to be used for cluster 070 */ 071 public HBaseCluster(Configuration conf) { 072 setConf(conf); 073 } 074 075 @Override 076 public void setConf(Configuration conf) { 077 this.conf = conf; 078 } 079 080 @Override 081 public Configuration getConf() { 082 return conf; 083 } 084 085 /** 086 * Returns a ClusterMetrics for this HBase cluster. 087 * @see #getInitialClusterMetrics() 088 */ 089 public abstract ClusterMetrics getClusterMetrics() throws IOException; 090 091 /** 092 * Returns a ClusterStatus for this HBase cluster as observed at the starting of the HBaseCluster 093 */ 094 public ClusterMetrics getInitialClusterMetrics() throws IOException { 095 return initialClusterStatus; 096 } 097 098 /** 099 * Returns an {@link MasterService.BlockingInterface} to the active master 100 */ 101 public abstract MasterService.BlockingInterface getMasterAdminService() throws IOException; 102 103 /** 104 * Returns an AdminProtocol interface to the regionserver 105 */ 106 public abstract AdminService.BlockingInterface getAdminProtocol(ServerName serverName) 107 throws IOException; 108 109 /** 110 * Returns a ClientProtocol interface to the regionserver 111 */ 112 public abstract ClientService.BlockingInterface getClientProtocol(ServerName serverName) 113 throws IOException; 114 115 /** 116 * Starts a new region server on the given hostname or if this is a mini/local cluster, starts a 117 * region server locally. 118 * @param hostname the hostname to start the regionserver on 119 * @throws IOException if something goes wrong 120 */ 121 public abstract void startRegionServer(String hostname, int port) throws IOException; 122 123 /** 124 * Kills the region server process if this is a distributed cluster, otherwise this causes the 125 * region server to exit doing basic clean up only. 126 * @throws IOException if something goes wrong 127 */ 128 public abstract void killRegionServer(ServerName serverName) throws IOException; 129 130 /** 131 * Keeping track of killed servers and being able to check if a particular server was killed makes 132 * it possible to do fault tolerance testing for dead servers in a deterministic way. A concrete 133 * example of such case is - killing servers and waiting for all regions of a particular table to 134 * be assigned. We can check for server column in META table and that its value is not one of the 135 * killed servers. 136 */ 137 public abstract boolean isKilledRS(ServerName serverName); 138 139 /** 140 * Stops the given region server, by attempting a gradual stop. 141 * @throws IOException if something goes wrong 142 */ 143 public abstract void stopRegionServer(ServerName serverName) throws IOException; 144 145 /** 146 * Wait for the specified region server to join the cluster 147 * @throws IOException if something goes wrong or timeout occurs 148 */ 149 public void waitForRegionServerToStart(String hostname, int port, long timeout) 150 throws IOException { 151 long start = EnvironmentEdgeManager.currentTime(); 152 while ((EnvironmentEdgeManager.currentTime() - start) < timeout) { 153 for (ServerName server : getClusterMetrics().getLiveServerMetrics().keySet()) { 154 if (server.getHostname().equals(hostname) && server.getPort() == port) { 155 return; 156 } 157 } 158 Threads.sleep(100); 159 } 160 throw new IOException( 161 "did timeout " + timeout + "ms waiting for region server to start: " + hostname); 162 } 163 164 /** 165 * Wait for the specified region server to stop the thread / process. 166 * @throws IOException if something goes wrong or timeout occurs 167 */ 168 public abstract void waitForRegionServerToStop(ServerName serverName, long timeout) 169 throws IOException; 170 171 /** 172 * Suspend the region server 173 * @param serverName the hostname to suspend the regionserver on 174 * @throws IOException if something goes wrong 175 */ 176 public abstract void suspendRegionServer(ServerName serverName) throws IOException; 177 178 /** 179 * Wait for the specified region server to suspend the thread / process. 180 * @throws IOException if something goes wrong or timeout occurs 181 */ 182 public abstract void waitForRegionServerToSuspend(ServerName serverName, long timeout) 183 throws IOException; 184 185 /** 186 * Resume the region server 187 * @param serverName the hostname to resume the regionserver on 188 * @throws IOException if something goes wrong 189 */ 190 public abstract void resumeRegionServer(ServerName serverName) throws IOException; 191 192 /** 193 * Wait for the specified region server to resume the thread / process. 194 * @throws IOException if something goes wrong or timeout occurs 195 */ 196 public abstract void waitForRegionServerToResume(ServerName serverName, long timeout) 197 throws IOException; 198 199 /** 200 * Starts a new zookeeper node on the given hostname or if this is a mini/local cluster, silently 201 * logs warning message. 202 * @param hostname the hostname to start the regionserver on 203 * @throws IOException if something goes wrong 204 */ 205 public abstract void startZkNode(String hostname, int port) throws IOException; 206 207 /** 208 * Kills the zookeeper node process if this is a distributed cluster, otherwise, this causes 209 * master to exit doing basic clean up only. 210 * @throws IOException if something goes wrong 211 */ 212 public abstract void killZkNode(ServerName serverName) throws IOException; 213 214 /** 215 * Stops the region zookeeper if this is a distributed cluster, otherwise silently logs warning 216 * message. 217 * @throws IOException if something goes wrong 218 */ 219 public abstract void stopZkNode(ServerName serverName) throws IOException; 220 221 /** 222 * Wait for the specified zookeeper node to join the cluster 223 * @throws IOException if something goes wrong or timeout occurs 224 */ 225 public abstract void waitForZkNodeToStart(ServerName serverName, long timeout) throws IOException; 226 227 /** 228 * Wait for the specified zookeeper node to stop the thread / process. 229 * @throws IOException if something goes wrong or timeout occurs 230 */ 231 public abstract void waitForZkNodeToStop(ServerName serverName, long timeout) throws IOException; 232 233 /** 234 * Starts a new datanode on the given hostname or if this is a mini/local cluster, silently logs 235 * warning message. 236 * @throws IOException if something goes wrong 237 */ 238 public abstract void startDataNode(ServerName serverName) throws IOException; 239 240 /** 241 * Kills the datanode process if this is a distributed cluster, otherwise, this causes master to 242 * exit doing basic clean up only. 243 * @throws IOException if something goes wrong 244 */ 245 public abstract void killDataNode(ServerName serverName) throws IOException; 246 247 /** 248 * Stops the datanode if this is a distributed cluster, otherwise silently logs warning message. 249 * @throws IOException if something goes wrong 250 */ 251 public abstract void stopDataNode(ServerName serverName) throws IOException; 252 253 /** 254 * Wait for the specified datanode to join the cluster 255 * @throws IOException if something goes wrong or timeout occurs 256 */ 257 public abstract void waitForDataNodeToStart(ServerName serverName, long timeout) 258 throws IOException; 259 260 /** 261 * Wait for the specified datanode to stop the thread / process. 262 * @throws IOException if something goes wrong or timeout occurs 263 */ 264 public abstract void waitForDataNodeToStop(ServerName serverName, long timeout) 265 throws IOException; 266 267 /** 268 * Starts a new namenode on the given hostname or if this is a mini/local cluster, silently logs 269 * warning message. 270 * @throws IOException if something goes wrong 271 */ 272 public abstract void startNameNode(ServerName serverName) throws IOException; 273 274 /** 275 * Kills the namenode process if this is a distributed cluster, otherwise, this causes master to 276 * exit doing basic clean up only. 277 * @throws IOException if something goes wrong 278 */ 279 public abstract void killNameNode(ServerName serverName) throws IOException; 280 281 /** 282 * Stops the namenode if this is a distributed cluster, otherwise silently logs warning message. 283 * @throws IOException if something goes wrong 284 */ 285 public abstract void stopNameNode(ServerName serverName) throws IOException; 286 287 /** 288 * Wait for the specified namenode to join the cluster 289 * @throws IOException if something goes wrong or timeout occurs 290 */ 291 public abstract void waitForNameNodeToStart(ServerName serverName, long timeout) 292 throws IOException; 293 294 /** 295 * Wait for the specified namenode to stop 296 * @throws IOException if something goes wrong or timeout occurs 297 */ 298 public abstract void waitForNameNodeToStop(ServerName serverName, long timeout) 299 throws IOException; 300 301 /** 302 * Starts a new journalnode on the given hostname or if this is a mini/local cluster, silently 303 * logs warning message. 304 * @throws IOException if something goes wrong 305 */ 306 public abstract void startJournalNode(ServerName serverName) throws IOException; 307 308 /** 309 * Kills the journalnode process if this is a distributed cluster, otherwise, this causes master 310 * to exit doing basic clean up only. 311 * @throws IOException if something goes wrong 312 */ 313 public abstract void killJournalNode(ServerName serverName) throws IOException; 314 315 /** 316 * Stops the journalnode if this is a distributed cluster, otherwise silently logs warning 317 * message. 318 * @throws IOException if something goes wrong 319 */ 320 public abstract void stopJournalNode(ServerName serverName) throws IOException; 321 322 /** 323 * Wait for the specified journalnode to join the cluster 324 * @throws IOException if something goes wrong or timeout occurs 325 */ 326 public abstract void waitForJournalNodeToStart(ServerName serverName, long timeout) 327 throws IOException; 328 329 /** 330 * Wait for the specified journalnode to stop 331 * @throws IOException if something goes wrong or timeout occurs 332 */ 333 public abstract void waitForJournalNodeToStop(ServerName serverName, long timeout) 334 throws IOException; 335 336 /** 337 * Starts a new master on the given hostname or if this is a mini/local cluster, starts a master 338 * locally. 339 * @param hostname the hostname to start the master on 340 * @throws IOException if something goes wrong 341 */ 342 public abstract void startMaster(String hostname, int port) throws IOException; 343 344 /** 345 * Kills the master process if this is a distributed cluster, otherwise, this causes master to 346 * exit doing basic clean up only. 347 * @throws IOException if something goes wrong 348 */ 349 public abstract void killMaster(ServerName serverName) throws IOException; 350 351 /** 352 * Stops the given master, by attempting a gradual stop. 353 * @throws IOException if something goes wrong 354 */ 355 public abstract void stopMaster(ServerName serverName) throws IOException; 356 357 /** 358 * Wait for the specified master to stop the thread / process. 359 * @throws IOException if something goes wrong or timeout occurs 360 */ 361 public abstract void waitForMasterToStop(ServerName serverName, long timeout) throws IOException; 362 363 /** 364 * Blocks until there is an active master and that master has completed initialization. 365 * @return true if an active master becomes available. false if there are no masters left. 366 * @throws IOException if something goes wrong or timeout occurs 367 */ 368 public boolean waitForActiveAndReadyMaster() throws IOException { 369 return waitForActiveAndReadyMaster(Long.MAX_VALUE); 370 } 371 372 /** 373 * Blocks until there is an active master and that master has completed initialization. 374 * @param timeout the timeout limit in ms 375 * @return true if an active master becomes available. false if there are no masters left. 376 */ 377 public abstract boolean waitForActiveAndReadyMaster(long timeout) throws IOException; 378 379 /** 380 * Wait for HBase Cluster to shut down. 381 */ 382 public abstract void waitUntilShutDown() throws IOException; 383 384 /** 385 * Shut down the HBase cluster 386 */ 387 public abstract void shutdown() throws IOException; 388 389 /** 390 * Restores the cluster to it's initial state if this is a real cluster, otherwise does nothing. 391 * This is a best effort restore. If the servers are not reachable, or insufficient permissions, 392 * etc. restoration might be partial. 393 * @return whether restoration is complete 394 */ 395 public boolean restoreInitialStatus() throws IOException { 396 return restoreClusterMetrics(getInitialClusterMetrics()); 397 } 398 399 /** 400 * Restores the cluster to given state if this is a real cluster, otherwise does nothing. This is 401 * a best effort restore. If the servers are not reachable, or insufficient permissions, etc. 402 * restoration might be partial. 403 * @return whether restoration is complete 404 */ 405 public boolean restoreClusterMetrics(ClusterMetrics desiredStatus) throws IOException { 406 return true; 407 } 408 409 /** 410 * Get the ServerName of region server serving the first hbase:meta region 411 */ 412 public ServerName getServerHoldingMeta() throws IOException { 413 return getServerHoldingRegion(TableName.META_TABLE_NAME, 414 RegionInfoBuilder.FIRST_META_REGIONINFO.getRegionName()); 415 } 416 417 /** 418 * Get the ServerName of region server serving the specified region 419 * @param regionName Name of the region in bytes 420 * @param tn Table name that has the region. 421 * @return ServerName that hosts the region or null 422 */ 423 public abstract ServerName getServerHoldingRegion(final TableName tn, byte[] regionName) 424 throws IOException; 425 426 /** 427 * @return whether we are interacting with a distributed cluster as opposed to an in-process 428 * mini/local cluster. 429 */ 430 public boolean isDistributedCluster() { 431 return false; 432 } 433 434 /** 435 * Closes all the resources held open for this cluster. Note that this call does not shutdown the 436 * cluster. 437 * @see #shutdown() 438 */ 439 @Override 440 public abstract void close() throws IOException; 441 442 /** 443 * Wait for the namenode. 444 */ 445 public void waitForNamenodeAvailable() throws InterruptedException { 446 } 447 448 public void waitForDatanodesRegistered(int nbDN) throws Exception { 449 } 450}