diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ClientScanner.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ClientScanner.java index 5e28859..31229e5 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ClientScanner.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ClientScanner.java @@ -18,7 +18,9 @@ package org.apache.hadoop.hbase.client; import java.io.IOException; +import java.io.InterruptedIOException; import java.util.LinkedList; +import java.util.concurrent.ExecutorService; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -34,6 +36,7 @@ import org.apache.hadoop.hbase.KeyValueUtil; import org.apache.hadoop.hbase.NotServingRegionException; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.UnknownScannerException; +import org.apache.hadoop.hbase.client.RpcRetryingCallerFactory; import org.apache.hadoop.hbase.exceptions.OutOfOrderScannerNextException; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.protobuf.generated.MapReduceProtos; @@ -54,23 +57,70 @@ public class ClientScanner extends AbstractClientScanner { // Current region scanner is against. Gets cleared if current region goes // wonky: e.g. if it splits on us. protected HRegionInfo currentRegion = null; - protected ScannerCallable callable = null; + protected ScannerCallableWithReplicas callable = null; protected final LinkedList cache = new LinkedList(); protected final int caching; protected long lastNext; // Keep lastResult returned successfully in case we have to reset scanner. protected Result lastResult = null; protected final long maxScannerResultSize; - private final HConnection connection; + private final ClusterConnection connection; private final TableName tableName; private final int scannerTimeout; protected boolean scanMetricsPublished = false; protected RpcRetryingCaller caller; + protected Configuration conf; + //The timeout on the primary. Applicable if there are multiple replicas for a region + //In that case, we will only wait for this much timeout on the primary before going + //to the replicas and trying the same scan. Note that the retries will still happen + //on each replica and the first successful results will be taken. A timeout of 0 is + //disallowed. + protected final int primaryOperationTimeout; + private int retries; + protected final ExecutorService pool; /** - * Create a new ClientScanner for the specified table. An HConnection will be + * Create a new ClientScanner for the specified table. + * Note that the passed {@link Scan}'s start row maybe changed. + * + * @param conf + * @param scan + * @param tableName + * @param connection + * @param pool + * @param primaryOperationTimeout + * @throws IOException + */ + public ClientScanner(final Configuration conf, final Scan scan, + final TableName tableName, ClusterConnection connection, + ExecutorService pool, int primaryOperationTimeout) + throws IOException { + this(conf, scan, tableName, connection, + new RpcRetryingCallerFactory(conf), pool, primaryOperationTimeout); + } + + /** + * Create a new ClientScanner for the specified table. + * Note that the passed {@link Scan}'s start row maybe changed. + * + * @param conf + * @param scan + * @param tableName + * @param connection + * @param rpcFactory + * @throws IOException + */ + public ClientScanner(final Configuration conf, final Scan scan, + final TableName tableName, ClusterConnection connection, + RpcRetryingCallerFactory rpcFactory) + throws IOException { + this(conf, scan, tableName, connection, rpcFactory, null, 0); + } + + /** + * Create a new ClientScanner for the specified table. A ClusterConnection will be * retrieved using the passed Configuration. - * Note that the passed {@link Scan}'s start row maybe changed changed. + * Note that the passed {@link Scan}'s start row maybe changed. * * @param conf The {@link Configuration} to use. * @param scan {@link Scan} to use in this scanner @@ -79,7 +129,7 @@ public class ClientScanner extends AbstractClientScanner { */ public ClientScanner(final Configuration conf, final Scan scan, final TableName tableName) throws IOException { - this(conf, scan, tableName, HConnectionManager.getConnection(conf)); + this(conf, scan, tableName, ConnectionManager.getConnectionInternal(conf)); } /** @@ -94,7 +144,7 @@ public class ClientScanner extends AbstractClientScanner { /** * Create a new ClientScanner for the specified table - * Note that the passed {@link Scan}'s start row maybe changed changed. + * Note that the passed {@link Scan}'s start row maybe changed. * * @param conf The {@link Configuration} to use. * @param scan {@link Scan} to use in this scanner @@ -103,8 +153,8 @@ public class ClientScanner extends AbstractClientScanner { * @throws IOException */ public ClientScanner(final Configuration conf, final Scan scan, final TableName tableName, - HConnection connection) throws IOException { - this(conf, scan, tableName, connection, new RpcRetryingCallerFactory(conf)); + ClusterConnection connection) throws IOException { + this(conf, scan, tableName, connection, new RpcRetryingCallerFactory(conf), null, 0); } /** @@ -112,8 +162,9 @@ public class ClientScanner extends AbstractClientScanner { */ @Deprecated public ClientScanner(final Configuration conf, final Scan scan, final byte [] tableName, - HConnection connection) throws IOException { - this(conf, scan, TableName.valueOf(tableName), connection, new RpcRetryingCallerFactory(conf)); + ClusterConnection connection) throws IOException { + this(conf, scan, TableName.valueOf(tableName), connection, new RpcRetryingCallerFactory(conf), + null, 0); } /** @@ -126,7 +177,8 @@ public class ClientScanner extends AbstractClientScanner { * @throws IOException */ public ClientScanner(final Configuration conf, final Scan scan, final TableName tableName, - HConnection connection, RpcRetryingCallerFactory rpcFactory) throws IOException { + ClusterConnection connection, RpcRetryingCallerFactory rpcFactory, ExecutorService pool, + int primaryOperationTimeout) throws IOException { if (LOG.isTraceEnabled()) { LOG.trace("Scan table=" + tableName + ", startRow=" + Bytes.toStringBinary(scan.getStartRow())); @@ -135,6 +187,10 @@ public class ClientScanner extends AbstractClientScanner { this.tableName = tableName; this.lastNext = System.currentTimeMillis(); this.connection = connection; + this.pool = pool; + this.primaryOperationTimeout = primaryOperationTimeout; + this.retries = conf.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, + HConstants.DEFAULT_HBASE_CLIENT_RETRIES_NUMBER); if (scan.getMaxResultSize() > 0) { this.maxScannerResultSize = scan.getMaxResultSize(); } else { @@ -159,8 +215,8 @@ public class ClientScanner extends AbstractClientScanner { HConstants.DEFAULT_HBASE_CLIENT_SCANNER_CACHING); } - this.caller = rpcFactory. newCaller(); - + this.caller = rpcFactory. newCaller(); + this.conf = conf; initializeScannerInConstruction(); } @@ -169,7 +225,7 @@ public class ClientScanner extends AbstractClientScanner { nextScanner(this.caching, false); } - protected HConnection getConnection() { + protected ClusterConnection getConnection() { return this.connection; } @@ -186,10 +242,34 @@ public class ClientScanner extends AbstractClientScanner { return this.tableName; } + protected int getRetries() { + return this.retries; + } + + protected int getScannerTimeout() { + return this.scannerTimeout; + } + + protected Configuration getConf() { + return this.conf; + } + protected Scan getScan() { return scan; } + protected ExecutorService getPool() { + return pool; + } + + protected int getPrimaryOperationTimeout() { + return primaryOperationTimeout; + } + + protected int getCaching() { + return caching; + } + protected long getTimestamp() { return lastNext; } @@ -210,6 +290,13 @@ public class ClientScanner extends AbstractClientScanner { return false; //unlikely. } + private boolean possiblyNextScanner(int nbRows, final boolean done) throws IOException { + // If we have just switched replica, don't go to the next scanner yet. Rather, try + // the scanner operations on the new replica. + if (callable != null && callable.switchedToADifferentReplica()) return true; + return nextScanner(nbRows, done); + } + /* * Gets a scanner for the next region. If this.currentRegion != null, then * we will move to the endrow of this.currentRegion. Else we will get @@ -224,7 +311,7 @@ public class ClientScanner extends AbstractClientScanner { // Close the previous scanner if it's open if (this.callable != null) { this.callable.setClose(); - this.caller.callWithRetries(callable); + call(scan, callable, caller); this.callable = null; } @@ -261,7 +348,7 @@ public class ClientScanner extends AbstractClientScanner { callable = getScannerCallable(localStartKey, nbRows); // Open a scanner on the region server starting at the // beginning of the region - this.caller.callWithRetries(callable); + call(scan, callable, caller); this.currentRegion = callable.getHRegionInfo(); if (this.scanMetrics != null) { this.scanMetrics.countOfRegions.incrementAndGet(); @@ -273,14 +360,28 @@ public class ClientScanner extends AbstractClientScanner { return true; } + static Result[] call(Scan scan, ScannerCallableWithReplicas callable, + RpcRetryingCaller caller) + throws IOException, RuntimeException { + if (Thread.interrupted()) { + throw new InterruptedIOException(); + } + // callWithoutRetries is at this layer. Within the ScannerCallableWithReplicas, + // we do a callWithRetries + return caller.callWithoutRetries(callable); + } + @InterfaceAudience.Private - protected ScannerCallable getScannerCallable(byte [] localStartKey, + protected ScannerCallableWithReplicas getScannerCallable(byte [] localStartKey, int nbRows) { scan.setStartRow(localStartKey); ScannerCallable s = new ScannerCallable(getConnection(), getTable(), scan, this.scanMetrics); s.setCaching(nbRows); - return s; + ScannerCallableWithReplicas sr = new ScannerCallableWithReplicas(tableName, getConnection(), + s, pool, primaryOperationTimeout, scan, + retries, scannerTimeout, caching, conf, caller); + return sr; } /** @@ -326,17 +427,27 @@ public class ClientScanner extends AbstractClientScanner { // Skip only the first row (which was the last row of the last // already-processed batch). callable.setCaching(1); - values = this.caller.callWithRetries(callable); + values = call(scan, callable, caller); callable.setCaching(this.caching); skipFirst = false; } // Server returns a null values if scanning is to stop. Else, // returns an empty array if scanning is to go on and we've just // exhausted current region. - values = this.caller.callWithRetries(callable); + values = call(scan, callable, caller); if (skipFirst && values != null && values.length == 1) { skipFirst = false; // Already skipped, unset it before scanning again - values = this.caller.callWithRetries(callable); + values = call(scan, callable, caller); + } + // When the replica switch happens, we need to do certain operations + // again. The callable will openScanner but we need to pick up from + // there. Bypass the rest of the loop and let the catch-up happen in + // the beginning of the loop as it happens for the cases where we see + // exceptions + if (values == null && callable.switchedToADifferentReplica()) { + skipFirst = true; + this.currentRegion = callable.getHRegionInfo(); + continue; } retryAfterOutOfOrderException = true; } catch (DoNotRetryIOException e) { @@ -410,7 +521,8 @@ public class ClientScanner extends AbstractClientScanner { } } // Values == null means server-side filter has determined we must STOP - } while (remainingResultSize > 0 && countdown > 0 && nextScanner(countdown, values == null)); + } while (remainingResultSize > 0 && countdown > 0 && + possiblyNextScanner(countdown, values == null)); } if (cache.size() > 0) { @@ -428,7 +540,7 @@ public class ClientScanner extends AbstractClientScanner { if (callable != null) { callable.setClose(); try { - this.caller.callWithRetries(callable); + call(scan, callable, caller); } catch (IOException e) { // We used to catch this error, interpret, and rethrow. However, we // have since decided that it's not nice for a scanner's close to diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ClientSmallScanner.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ClientSmallScanner.java index a17be55..ca2f431 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ClientSmallScanner.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ClientSmallScanner.java @@ -19,8 +19,8 @@ package org.apache.hadoop.hbase.client; import java.io.IOException; -import java.util.ArrayList; -import java.util.LinkedList; +import java.io.InterruptedIOException; +import java.util.concurrent.ExecutorService; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -29,7 +29,6 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.HConstants; -import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.KeyValueUtil; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.ipc.PayloadCarryingRpcController; @@ -53,7 +52,7 @@ import com.google.protobuf.ServiceException; @InterfaceStability.Evolving public class ClientSmallScanner extends ClientScanner { private final Log LOG = LogFactory.getLog(this.getClass()); - private RegionServerCallable smallScanCallable = null; + private ScannerCallableWithReplicas smallScanCallable = null; // When fetching results from server, skip the first result if it has the same // row with this one private byte[] skipRowOfFirstResult = null; @@ -70,7 +69,7 @@ public class ClientSmallScanner extends ClientScanner { */ public ClientSmallScanner(final Configuration conf, final Scan scan, final TableName tableName) throws IOException { - this(conf, scan, tableName, HConnectionManager.getConnection(conf)); + this(conf, scan, tableName, ConnectionManager.getConnectionInternal(conf)); } /** @@ -84,11 +83,28 @@ public class ClientSmallScanner extends ClientScanner { * @throws IOException */ public ClientSmallScanner(final Configuration conf, final Scan scan, - final TableName tableName, HConnection connection) throws IOException { + final TableName tableName, ClusterConnection connection) throws IOException { this(conf, scan, tableName, connection, new RpcRetryingCallerFactory(conf)); } /** + * Create a new ClientSmallScanner for the specified table. Note that the passed + * {@link Scan} 's start row maybe changed. + * @param conf + * @param scan + * @param tableName + * @param connection + * @param pool + * @param primaryOperationTimeout + * @throws IOException + */ + public ClientSmallScanner(final Configuration conf, final Scan scan, + final TableName tableName, ClusterConnection connection, ExecutorService pool, + int primaryOperationTimeout) throws IOException { + super(conf, scan, tableName, connection, pool, primaryOperationTimeout); + } + + /** * Create a new ShortClientScanner for the specified table Note that the * passed {@link Scan}'s start row maybe changed changed. * @@ -100,7 +116,7 @@ public class ClientSmallScanner extends ClientScanner { * @throws IOException */ public ClientSmallScanner(final Configuration conf, final Scan scan, - final TableName tableName, HConnection connection, + final TableName tableName, ClusterConnection connection, RpcRetryingCallerFactory rpcFactory) throws IOException { super(conf, scan, tableName, connection, rpcFactory); } @@ -160,27 +176,50 @@ public class ClientSmallScanner extends ClientScanner { return true; } - private RegionServerCallable getSmallScanCallable( + private ScannerCallableWithReplicas getSmallScanCallable( byte[] localStartKey, final int cacheNum) { this.scan.setStartRow(localStartKey); - RegionServerCallable callable = new RegionServerCallable( - getConnection(), getTable(), scan.getStartRow()) { - public Result[] call() throws IOException { - ScanRequest request = RequestConverter.buildScanRequest(getLocation() - .getRegionInfo().getRegionName(), scan, cacheNum, true); - ScanResponse response = null; - PayloadCarryingRpcController controller = new PayloadCarryingRpcController(); - try { - controller.setPriority(getTableName()); - response = getStub().scan(controller, request); - return ResponseConverter.getResults(controller.cellScanner(), - response); - } catch (ServiceException se) { - throw ProtobufUtil.getRemoteException(se); - } + SmallScannerCallable s = new SmallScannerCallable(0, cacheNum); + ScannerCallableWithReplicas scannerCallableWithReplicas = + new ScannerCallableWithReplicas(getTable(), getConnection(), + s, getPool(), getPrimaryOperationTimeout(), getScan(), getRetries(), + getScannerTimeout(), cacheNum, conf, caller); + return scannerCallableWithReplicas; + } + + class SmallScannerCallable extends ScannerCallable { + public SmallScannerCallable(int id, int caching) { + super(ClientSmallScanner.this.getConnection(), ClientSmallScanner.this.getTable(), + ClientSmallScanner.this.getScan(), null, id); + this.setCaching(caching); + } + + @Override + public Result[] call() throws IOException { + if (Thread.interrupted()) { + throw new InterruptedIOException(); } - }; - return callable; + ScanRequest request = RequestConverter.buildScanRequest(getLocation() + .getRegionInfo().getRegionName(), scan, getCaching(), true); + ScanResponse response = null; + PayloadCarryingRpcController controller = new PayloadCarryingRpcController(); + try { + controller.setPriority(getTableName()); + response = getStub().scan(controller, request); + return ResponseConverter.getResults(controller.cellScanner(), + response); + } catch (ServiceException se) { + throw ProtobufUtil.getRemoteException(se); + } + } + + @Override + public ScannerCallable getScannerCallableForReplica(int id) { + return new SmallScannerCallable(id, this.getCaching()); + } + + @Override + public void setClose(){} } @Override @@ -201,7 +240,9 @@ public class ClientSmallScanner extends ClientScanner { // Server returns a null values if scanning is to stop. Else, // returns an empty array if scanning is to go on and we've just // exhausted current region. - values = this.caller.callWithRetries(smallScanCallable); + // callWithoutRetries is at this layer. Within the ScannerCallableWithReplicas, + // we do a callWithRetries + values = this.caller.callWithoutRetries(smallScanCallable); this.currentRegion = smallScanCallable.getHRegionInfo(); long currentTime = System.currentTimeMillis(); if (this.scanMetrics != null) { diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ClusterConnection.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ClusterConnection.java index 2b8f886..aefdba7 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ClusterConnection.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ClusterConnection.java @@ -34,8 +34,9 @@ import org.apache.hadoop.hbase.protobuf.generated.MasterProtos.MasterService; /** Internal methods on HConnection that should not be used by user code. */ @InterfaceAudience.Private -// NOTE: DO NOT make this class public. It was made package-private on purpose. -interface ClusterConnection extends HConnection { +// NOTE: Although this class is public, this class is meant to be used directly from internal +// classes and unit tests only. +public interface ClusterConnection extends HConnection { /** @return - true if the master server is running */ @Override diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HTable.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HTable.java index b81b5ba..e065955 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HTable.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/HTable.java @@ -136,6 +136,7 @@ public class HTable implements HTableInterface { private final boolean cleanupConnectionOnClose; // close the connection in close() private Consistency defaultConsistency = Consistency.STRONG; private int primaryCallTimeoutMicroSecond; + private int replicaCallTimeoutMicroSecondScan; /** The Async process for puts with autoflush set to false or multiputs */ @@ -273,10 +274,14 @@ public class HTable implements HTableInterface { this.connection = ConnectionManager.getConnectionInternal(conf); this.configuration = conf; this.pool = pool; + if (pool == null) { + this.pool = getDefaultExecutor(conf); + this.cleanupPoolOnClose = true; + } else { + this.cleanupPoolOnClose = false; + } this.tableName = tableName; - this.cleanupPoolOnClose = false; this.cleanupConnectionOnClose = true; - this.finishSetup(); } @@ -323,10 +328,16 @@ public class HTable implements HTableInterface { throw new IllegalArgumentException("Connection is null or closed."); } this.tableName = tableName; - this.cleanupPoolOnClose = this.cleanupConnectionOnClose = false; + this.cleanupConnectionOnClose = false; this.connection = connection; this.configuration = connection.getConfiguration(); this.pool = pool; + if (pool == null) { + this.pool = getDefaultExecutor(this.configuration); + this.cleanupPoolOnClose = true; + } else { + this.cleanupPoolOnClose = false; + } this.finishSetup(); } @@ -360,6 +371,8 @@ public class HTable implements HTableInterface { HConstants.DEFAULT_HBASE_CLIENT_SCANNER_CACHING); this.primaryCallTimeoutMicroSecond = this.configuration.getInt("hbase.client.primaryCallTimeout.get", 10000); // 10 ms + this.replicaCallTimeoutMicroSecondScan = + this.configuration.getInt("hbase.client.replicaCallTimeout.scan", 1000000); // 1000 ms this.retries = configuration.getInt(HConstants.HBASE_CLIENT_RETRIES_NUMBER, HConstants.DEFAULT_HBASE_CLIENT_RETRIES_NUMBER); @@ -751,27 +764,30 @@ public class HTable implements HTableInterface { return rpcCallerFactory. newCaller().callWithRetries(callable, this.operationTimeout); } - /** - * {@inheritDoc} - */ + /** + * The underlying {@link HTable} must not be closed. + * {@link HTableInterface#getScanner(Scan)} has other usage details. + */ @Override public ResultScanner getScanner(final Scan scan) throws IOException { if (scan.getCaching() <= 0) { scan.setCaching(getScannerCaching()); } + if (scan.isSmall() && !scan.isReversed()) { return new ClientSmallScanner(getConfiguration(), scan, getName(), - this.connection); + this.connection, pool, replicaCallTimeoutMicroSecondScan); } else if (scan.isReversed()) { return new ReversedClientScanner(getConfiguration(), scan, getName(), - this.connection); + this.connection, pool, replicaCallTimeoutMicroSecondScan); } return new ClientScanner(getConfiguration(), scan, - getName(), this.connection); + getName(), this.connection, pool, replicaCallTimeoutMicroSecondScan); } /** - * {@inheritDoc} + * The underlying {@link HTable} must not be closed. + * {@link HTableInterface#getScanner(byte[])} has other usage details. */ @Override public ResultScanner getScanner(byte [] family) throws IOException { @@ -781,7 +797,8 @@ public class HTable implements HTableInterface { } /** - * {@inheritDoc} + * The underlying {@link HTable} must not be closed. + * {@link HTableInterface#getScanner(byte[], byte[])} has other usage details. */ @Override public ResultScanner getScanner(byte [] family, byte [] qualifier) @@ -1330,6 +1347,11 @@ public class HTable implements HTableInterface { flushCommits(); if (cleanupPoolOnClose) { this.pool.shutdown(); + try { + this.pool.awaitTermination(60, TimeUnit.SECONDS); + } catch (InterruptedException e) { + LOG.warn("Sleep interrupted"); + } } if (cleanupConnectionOnClose) { if (this.connection != null) { diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MultiServerCallable.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MultiServerCallable.java index 795a6f9..b011c26 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MultiServerCallable.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/MultiServerCallable.java @@ -54,7 +54,7 @@ class MultiServerCallable extends RegionServerCallable { private final MultiAction multiAction; private final boolean cellBlock; - MultiServerCallable(final HConnection connection, final TableName tableName, + MultiServerCallable(final ClusterConnection connection, final TableName tableName, final ServerName location, final MultiAction multi) { super(connection, tableName, null); this.multiAction = multi; diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ReversedClientScanner.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ReversedClientScanner.java index 618b3b3..c1940ae 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ReversedClientScanner.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ReversedClientScanner.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.client; import java.io.IOException; import java.util.Arrays; +import java.util.concurrent.ExecutorService; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -27,7 +28,9 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionLocation; import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.metrics.ScanMetrics; import org.apache.hadoop.hbase.util.Bytes; /** @@ -50,17 +53,36 @@ public class ReversedClientScanner extends ClientScanner { * @throws IOException */ public ReversedClientScanner(Configuration conf, Scan scan, - TableName tableName, HConnection connection) throws IOException { + TableName tableName, ClusterConnection connection) throws IOException { super(conf, scan, tableName, connection); } + /** + * Create a new ReversibleClientScanner for the specified table Note that the + * passed {@link Scan}'s start row maybe changed. + * @param conf + * @param scan + * @param tableName + * @param connection + * @param pool + * @param primaryOperationTimeout + * @throws IOException + */ + public ReversedClientScanner(Configuration conf, Scan scan, + TableName tableName, ClusterConnection connection, ExecutorService pool, + int primaryOperationTimeout) throws IOException { + super(conf, scan, tableName, connection, pool, primaryOperationTimeout); + } + @Override protected boolean nextScanner(int nbRows, final boolean done) throws IOException { // Close the previous scanner if it's open if (this.callable != null) { this.callable.setClose(); - this.caller.callWithRetries(callable); + // callWithoutRetries is at this layer. Within the ScannerCallableWithReplicas, + // we do a callWithRetries + this.caller.callWithoutRetries(callable); this.callable = null; } @@ -108,7 +130,9 @@ public class ReversedClientScanner extends ClientScanner { callable = getScannerCallable(localStartKey, nbRows, locateStartRow); // Open a scanner on the region server starting at the // beginning of the region - this.caller.callWithRetries(callable); + // callWithoutRetries is at this layer. Within the ScannerCallableWithReplicas, + // we do a callWithRetries + this.caller.callWithoutRetries(callable); this.currentRegion = callable.getHRegionInfo(); if (this.scanMetrics != null) { this.scanMetrics.countOfRegions.incrementAndGet(); @@ -120,13 +144,16 @@ public class ReversedClientScanner extends ClientScanner { return true; } - protected ScannerCallable getScannerCallable(byte[] localStartKey, + protected ScannerCallableWithReplicas getScannerCallable(byte[] localStartKey, int nbRows, byte[] locateStartRow) { scan.setStartRow(localStartKey); ScannerCallable s = new ReversedScannerCallable(getConnection(), getTable(), scan, this.scanMetrics, locateStartRow); s.setCaching(nbRows); - return s; + ScannerCallableWithReplicas sr = new ScannerCallableWithReplicas(getTable(), getConnection(), + s, pool, primaryOperationTimeout, scan, + getRetries(), getScannerTimeout(), caching, getConf(), caller); + return sr; } @Override @@ -167,5 +194,4 @@ public class ReversedClientScanner extends ClientScanner { return closestFrontRow; } } - } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ReversedScannerCallable.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ReversedScannerCallable.java index 487777f..bdcd08d 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ReversedScannerCallable.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ReversedScannerCallable.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.client; import java.io.IOException; +import java.io.InterruptedIOException; import java.util.ArrayList; import java.util.List; @@ -27,6 +28,7 @@ import org.apache.hadoop.classification.InterfaceStability; import org.apache.hadoop.hbase.DoNotRetryIOException; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionLocation; +import org.apache.hadoop.hbase.RegionLocations; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.client.metrics.ScanMetrics; import org.apache.hadoop.hbase.util.Bytes; @@ -52,9 +54,23 @@ public class ReversedScannerCallable extends ScannerCallable { * @param scanMetrics * @param locateStartRow The start row for locating regions */ - public ReversedScannerCallable(HConnection connection, TableName tableName, + public ReversedScannerCallable(ClusterConnection connection, TableName tableName, Scan scan, ScanMetrics scanMetrics, byte[] locateStartRow) { - super(connection, tableName, scan, scanMetrics); + this (connection, tableName, scan, scanMetrics, locateStartRow, 0); + } + + /** + * + * @param connection + * @param tableName + * @param scan + * @param scanMetrics + * @param locateStartRow + * @param id the replicaId + */ + public ReversedScannerCallable(ClusterConnection connection, TableName tableName, + Scan scan, ScanMetrics scanMetrics, byte[] locateStartRow, int id) { + super(connection, tableName, scan, scanMetrics, id); this.locateStartRow = locateStartRow; } @@ -64,10 +80,15 @@ public class ReversedScannerCallable extends ScannerCallable { */ @Override public void prepare(boolean reload) throws IOException { + if (Thread.interrupted()) { + throw new InterruptedIOException(); + } if (!instantiated || reload) { if (locateStartRow == null) { // Just locate the region with the row - this.location = connection.getRegionLocation(tableName, row, reload); + RegionLocations rl = RpcRetryingCallerWithReadReplicas.getRegionLocations(reload, id, + getConnection(), tableName, row); + this.location = id < rl.size() ? rl.getRegionLocation(id) : null; if (this.location == null) { throw new IOException("Failed to find location, tableName=" + tableName + ", row=" + Bytes.toString(row) + ", reload=" @@ -123,9 +144,10 @@ public class ReversedScannerCallable extends ScannerCallable { List regionList = new ArrayList(); byte[] currentKey = startKey; do { - HRegionLocation regionLocation = connection.getRegionLocation(tableName, - currentKey, reload); - if (regionLocation.getRegionInfo().containsRow(currentKey)) { + RegionLocations rl = RpcRetryingCallerWithReadReplicas.getRegionLocations(reload, id, + getConnection(), tableName, currentKey); + HRegionLocation regionLocation = id < rl.size() ? rl.getRegionLocation(id) : null; + if (regionLocation != null && regionLocation.getRegionInfo().containsRow(currentKey)) { regionList.add(regionLocation); } else { throw new DoNotRetryIOException("Does hbase:meta exist hole? Locating row " @@ -138,4 +160,11 @@ public class ReversedScannerCallable extends ScannerCallable { return regionList; } + @Override + public ScannerCallable getScannerCallableForReplica(int id) { + ReversedScannerCallable r = new ReversedScannerCallable(this.cConnection, this.tableName, + this.getScan(), this.scanMetrics, this.locateStartRow, id); + r.setCaching(this.getCaching()); + return r; + } } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RpcRetryingCaller.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RpcRetryingCaller.java index 6c51a70..507896a 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RpcRetryingCaller.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RpcRetryingCaller.java @@ -191,7 +191,7 @@ public class RpcRetryingCaller { if (t2 instanceof IOException) { throw (IOException)t2; } else { - throw new RuntimeException(t2); + t2.printStackTrace(); throw new RuntimeException(t2); } } finally { afterCall(); diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RpcRetryingCallerWithReadReplicas.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RpcRetryingCallerWithReadReplicas.java index c26629d..963d155 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RpcRetryingCallerWithReadReplicas.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/RpcRetryingCallerWithReadReplicas.java @@ -21,8 +21,10 @@ package org.apache.hadoop.hbase.client; +import java.io.ByteArrayOutputStream; import java.io.IOException; import java.io.InterruptedIOException; +import java.io.PrintWriter; import java.util.ArrayList; import java.util.Collections; import java.util.List; @@ -45,6 +47,7 @@ import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.util.BoundedCompletionService; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; +import org.apache.hadoop.util.ReflectionUtils; /** * Caller that goes to replica if the primary region does no answer within a configurable @@ -105,7 +108,7 @@ public class RpcRetryingCallerWithReadReplicas { } if (reload || location == null) { - RegionLocations rl = getRegionLocations(false, id); + RegionLocations rl = getRegionLocations(false, id, cConnection, tableName, get.getRow()); location = id < rl.size() ? rl.getRegionLocation(id) : null; } @@ -171,7 +174,8 @@ public class RpcRetryingCallerWithReadReplicas { */ public synchronized Result call() throws DoNotRetryIOException, InterruptedIOException, RetriesExhaustedException { - RegionLocations rl = getRegionLocations(true, RegionReplicaUtil.DEFAULT_REPLICA_ID); + RegionLocations rl = getRegionLocations(true, RegionReplicaUtil.DEFAULT_REPLICA_ID, + cConnection, tableName, get.getRow()); BoundedCompletionService cs = new BoundedCompletionService(pool, rl.size()); List exceptions = null; @@ -225,7 +229,7 @@ public class RpcRetryingCallerWithReadReplicas { } if (exceptions != null && !exceptions.isEmpty()) { - throwEnrichedException(exceptions.get(0)); // just rethrow the first exception for now. + throwEnrichedException(exceptions.get(0), retries, toString()); // just rethrow the first exception for now. } return null; // unreachable } @@ -234,7 +238,7 @@ public class RpcRetryingCallerWithReadReplicas { * Extract the real exception from the ExecutionException, and throws what makes more * sense. */ - private void throwEnrichedException(ExecutionException e) + static void throwEnrichedException(ExecutionException e, int retries, String str) throws RetriesExhaustedException, DoNotRetryIOException { Throwable t = e.getCause(); assert t != null; // That's what ExecutionException is about: holding an exception @@ -249,7 +253,7 @@ public class RpcRetryingCallerWithReadReplicas { RetriesExhaustedException.ThrowableWithExtraContext qt = new RetriesExhaustedException.ThrowableWithExtraContext(t, - EnvironmentEdgeManager.currentTimeMillis(), toString()); + EnvironmentEdgeManager.currentTimeMillis(), str); List exceptions = Collections.singletonList(qt); @@ -277,11 +281,12 @@ public class RpcRetryingCallerWithReadReplicas { return max - min + 1; } - private RegionLocations getRegionLocations(boolean useCache, int replicaId) + static RegionLocations getRegionLocations(boolean useCache, int replicaId, + ClusterConnection cConnection, TableName tableName, byte[] row) throws RetriesExhaustedException, DoNotRetryIOException, InterruptedIOException { RegionLocations rl; try { - rl = cConnection.locateRegion(tableName, get.getRow(), useCache, true, replicaId); + rl = cConnection.locateRegion(tableName, row, useCache, true, replicaId); } catch (DoNotRetryIOException e) { throw e; } catch (RetriesExhaustedException e) { diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Scan.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Scan.java index f7d1c51..a5c6622 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Scan.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/Scan.java @@ -115,7 +115,7 @@ public class Scan extends Query { private Map> familyMap = new TreeMap>(Bytes.BYTES_COMPARATOR); private Boolean loadColumnFamiliesOnDemand = null; - private Consistency consistency = null; + private Consistency consistency = Consistency.STRONG; /** * Set it true for small scan to get better performance diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ScannerCallable.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ScannerCallable.java index 1a3d7a7..b6ec9a4 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ScannerCallable.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ScannerCallable.java @@ -19,6 +19,7 @@ package org.apache.hadoop.hbase.client; import java.io.IOException; +import java.io.InterruptedIOException; import java.net.UnknownHostException; import org.apache.commons.logging.Log; @@ -28,11 +29,14 @@ import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellScanner; import org.apache.hadoop.hbase.DoNotRetryIOException; +import org.apache.hadoop.hbase.HBaseIOException; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HRegionLocation; import org.apache.hadoop.hbase.KeyValueUtil; import org.apache.hadoop.hbase.NotServingRegionException; +import org.apache.hadoop.hbase.RegionLocations; import org.apache.hadoop.hbase.RemoteExceptionHandler; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.UnknownScannerException; import org.apache.hadoop.hbase.client.metrics.ScanMetrics; @@ -61,15 +65,17 @@ public class ScannerCallable extends RegionServerCallable { public static final String LOG_SCANNER_ACTIVITY = "hbase.client.log.scanner.activity"; public static final Log LOG = LogFactory.getLog(ScannerCallable.class); - private long scannerId = -1L; + protected long scannerId = -1L; protected boolean instantiated = false; - private boolean closed = false; + protected boolean closed = false; private Scan scan; private int caching = 1; + protected final ClusterConnection cConnection; protected ScanMetrics scanMetrics; private boolean logScannerActivity = false; private int logCutOffLatency = 1000; private static String myAddress; + protected final int id; static { try { myAddress = DNS.getDefaultHost("default", "default"); @@ -89,9 +95,23 @@ public class ScannerCallable extends RegionServerCallable { * @param scanMetrics the ScanMetrics to used, if it is null, ScannerCallable * won't collect metrics */ - public ScannerCallable (HConnection connection, TableName tableName, Scan scan, + public ScannerCallable (ClusterConnection connection, TableName tableName, Scan scan, ScanMetrics scanMetrics) { + this(connection, tableName, scan, scanMetrics, 0); + } + /** + * + * @param connection + * @param tableName + * @param scan + * @param scanMetrics + * @param id the replicaId + */ + public ScannerCallable (ClusterConnection connection, TableName tableName, Scan scan, + ScanMetrics scanMetrics, int id) { super(connection, tableName, scan.getStartRow()); + this.id = id; + this.cConnection = connection; this.scan = scan; this.scanMetrics = scanMetrics; Configuration conf = connection.getConfiguration(); @@ -100,10 +120,10 @@ public class ScannerCallable extends RegionServerCallable { } /** - * @deprecated Use {@link #ScannerCallable(HConnection, TableName, Scan, ScanMetrics)} + * @deprecated Use {@link #ScannerCallable(ClusterConnection, TableName, Scan, ScanMetrics)} */ @Deprecated - public ScannerCallable (HConnection connection, final byte [] tableName, Scan scan, + public ScannerCallable (ClusterConnection connection, final byte [] tableName, Scan scan, ScanMetrics scanMetrics) { this(connection, TableName.valueOf(tableName), scan, scanMetrics); } @@ -114,8 +134,20 @@ public class ScannerCallable extends RegionServerCallable { */ @Override public void prepare(boolean reload) throws IOException { + if (Thread.interrupted()) { + throw new InterruptedIOException(); + } + RegionLocations rl = RpcRetryingCallerWithReadReplicas.getRegionLocations(!reload, + id, getConnection(), getTableName(), getRow()); + location = id < rl.size() ? rl.getRegionLocation(id) : null; + if (location == null || location.getServerName() == null) { + // With this exception, there will be a retry. The location can be null for a replica + // when the table is created or after a split. + throw new HBaseIOException("There is no location for replica id #" + id); + } + ServerName dest = location.getServerName(); + setStub(super.getConnection().getClient(dest)); if (!instantiated || reload) { - super.prepare(reload); checkIfRegionServerIsRemote(); instantiated = true; } @@ -148,6 +180,9 @@ public class ScannerCallable extends RegionServerCallable { */ @SuppressWarnings("deprecation") public Result [] call() throws IOException { + if (Thread.interrupted()) { + throw new InterruptedIOException(); + } if (closed) { if (scannerId != -1) { close(); @@ -340,6 +375,10 @@ public class ScannerCallable extends RegionServerCallable { return caching; } + public ClusterConnection getConnection() { + return cConnection; + } + /** * Set the number of rows that will be fetched on next * @param caching the number of rows for caching @@ -347,4 +386,11 @@ public class ScannerCallable extends RegionServerCallable { public void setCaching(int caching) { this.caching = caching; } + + public ScannerCallable getScannerCallableForReplica(int id) { + ScannerCallable s = new ScannerCallable(this.getConnection(), this.tableName, + this.getScan(), this.scanMetrics, id); + s.setCaching(this.caching); + return s; + } } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ScannerCallableWithReplicas.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ScannerCallableWithReplicas.java new file mode 100644 index 0000000..139b767 --- /dev/null +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/client/ScannerCallableWithReplicas.java @@ -0,0 +1,314 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.client; + +import java.io.IOException; +import java.io.InterruptedIOException; +import java.util.ArrayList; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.Callable; +import java.util.concurrent.CancellationException; +import java.util.concurrent.ExecutionException; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Future; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.RegionLocations; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.util.BoundedCompletionService; +import org.apache.hadoop.hbase.util.Pair; +/** + * This class has the logic for handling scanners for regions with and without replicas. + * 1. A scan is attempted on the default (primary) region + * 2. The scanner sends all the RPCs to the default region until it is done, or, there + * is a timeout on the default (a timeout of zero is disallowed). + * 3. If there is a timeout in (2) above, scanner(s) is opened on the non-default replica(s) + * 4. The results from the first successful scanner are taken, and it is stored which server + * returned the results. + * 5. The next RPCs are done on the above stored server until it is done or there is a timeout, + * in which case, the other replicas are queried (as in (3) above). + * + */ + +@InterfaceAudience.Private +class ScannerCallableWithReplicas implements RetryingCallable { + private final Log LOG = LogFactory.getLog(this.getClass()); + volatile ScannerCallable currentScannerCallable; + AtomicBoolean replicaSwitched = new AtomicBoolean(false); + final ClusterConnection cConnection; + protected final ExecutorService pool; + protected final int timeBeforeReplicas; + private final Scan scan; + private final int retries; + private Result lastResult; + private final RpcRetryingCaller caller; + private final TableName tableName; + private Configuration conf; + private int scannerTimeout; + private Set outstandingCallables = new HashSet(); + + public ScannerCallableWithReplicas (TableName tableName, ClusterConnection cConnection, + ScannerCallable baseCallable, ExecutorService pool, int timeBeforeReplicas, Scan scan, + int retries, int scannerTimeout, int caching, Configuration conf, + RpcRetryingCaller caller) { + this.currentScannerCallable = baseCallable; + this.cConnection = cConnection; + this.pool = pool; + if (timeBeforeReplicas <= 0) { + throw new IllegalArgumentException("Invalid value of operation timeout on the primary"); + } + this.timeBeforeReplicas = timeBeforeReplicas; + this.scan = scan; + this.retries = retries; + this.tableName = tableName; + this.conf = conf; + this.scannerTimeout = scannerTimeout; + this.caller = caller; + } + + public void setClose() { + currentScannerCallable.setClose(); + } + + public void setCaching(int caching) { + currentScannerCallable.setCaching(caching); + } + + public int getCaching() { + return currentScannerCallable.getCaching(); + } + + public HRegionInfo getHRegionInfo() { + return currentScannerCallable.getHRegionInfo(); + } + + @Override + public Result [] call() throws IOException { + // If the active replica callable was closed somewhere, invoke the RPC to + // really close it. In the case of regular scanners, this applies. We make couple + // of RPCs to a RegionServer, and when that region is exhausted, we set + // the closed flag. Then an RPC is required to actually close the scanner. + if (currentScannerCallable != null && currentScannerCallable.closed) { + // For closing we target that exact scanner (and not do replica fallback like in + // the case of normal reads) + if (LOG.isDebugEnabled()) { + LOG.debug("Closing scanner " + currentScannerCallable.scannerId); + } + Result[] r = currentScannerCallable.call(); + currentScannerCallable = null; + return r; + } + // We need to do the following: + //1. When a scan goes out to a certain replica (default or not), we need to + // continue to hit that until there is a failure. So store the last successfully invoked + // replica + //2. We should close the "losing" scanners (scanners other than the ones we hear back + // from first) + // + RegionLocations rl = RpcRetryingCallerWithReadReplicas.getRegionLocations(true, + RegionReplicaUtil.DEFAULT_REPLICA_ID, cConnection, tableName, + currentScannerCallable.getRow()); + BoundedCompletionService> cs = + new BoundedCompletionService>(pool, rl.size()); + + List exceptions = null; + int submitted = 0, completed = 0; + AtomicBoolean done = new AtomicBoolean(false); + replicaSwitched.set(false); + // submit call for the primary replica. + submitted += addCallsForCurrentReplica(cs, rl); + try { + // wait for the timeout to see whether the primary responds back + Future> f = cs.poll(timeBeforeReplicas, + TimeUnit.MICROSECONDS); // Yes, microseconds + if (f != null) { + Pair r = f.get(); + if (r != null && r.getSecond() != null) { + updateCurrentlyServingReplica(r.getSecond(), r.getFirst(), done, cs); + } + return r == null ? null : r.getFirst(); //great we got a response + } + } catch (ExecutionException e) { + // the primary call failed with RetriesExhaustedException or DoNotRetryIOException + // but the secondaries might still succeed. Continue on the replica RPCs. + exceptions = new ArrayList(rl.size()); + exceptions.add(e); + completed++; + } catch (CancellationException e) { + throw new InterruptedIOException(e.getMessage()); + } catch (InterruptedException e) { + throw new InterruptedIOException(e.getMessage()); + } + // submit call for the all of the secondaries at once + // TODO: this may be an overkill for large region replication + submitted += addCallsForOtherReplicas(cs, rl, 0, rl.size() - 1); + try { + while (completed < submitted) { + try { + Future> f = cs.take(); + Pair r = f.get(); + if (r != null && r.getSecond() != null) { + updateCurrentlyServingReplica(r.getSecond(), r.getFirst(), done, cs); + } + return r == null ? null : r.getFirst(); // great we got an answer + } catch (ExecutionException e) { + // if not cancel or interrupt, wait until all RPC's are done + // one of the tasks failed. Save the exception for later. + if (exceptions == null) exceptions = new ArrayList(rl.size()); + exceptions.add(e); + completed++; + } + } + } catch (CancellationException e) { + throw new InterruptedIOException(e.getMessage()); + } catch (InterruptedException e) { + throw new InterruptedIOException(e.getMessage()); + } finally { + // We get there because we were interrupted or because one or more of the + // calls succeeded or failed. In all case, we stop all our tasks. + cs.cancelAll(true); + } + + if (exceptions != null && !exceptions.isEmpty()) { + RpcRetryingCallerWithReadReplicas.throwEnrichedException(exceptions.get(0), + retries, toString()); // just rethrow the first exception for now. + } + return null; // unreachable + } + + private void updateCurrentlyServingReplica(ScannerCallable scanner, Result[] result, + AtomicBoolean done, BoundedCompletionService> cs) { + if (done.compareAndSet(false, true)) { + if (currentScannerCallable != scanner) replicaSwitched.set(true); + currentScannerCallable = scanner; + // store where to start the replica scanner from if we need to. + if (result != null && result.length != 0) this.lastResult = result[result.length - 1]; + // if the current result is empty or null then we don't need to do any + // special thing when we go to a replica (if needed) + else this.lastResult = null; + if (LOG.isDebugEnabled()) { + LOG.debug("Setting current scanner as " + currentScannerCallable.scannerId + + " associated with " + currentScannerCallable.getHRegionInfo().getReplicaId()); + } + // close all outstanding replica scanners but the one we heard back from + outstandingCallables.remove(scanner); + for (ScannerCallable s : outstandingCallables) { + if (LOG.isDebugEnabled()) { + LOG.debug("Closing scanner " + s.scannerId + + " because this was slow and another replica succeeded"); + } + // Submit the "close" to the pool since this might take time, and we don't + // want to wait for the "close" to happen yet. The "wait" will happen when + // the table is closed (when the awaitTermination of the underlying pool is called) + s.setClose(); + RetryingRPC r = new RetryingRPC(s); + cs.submit(r); + } + } + } + + /** + * When a scanner switches in the middle of scanning (the 'next' call fails + * for example), the upper layer {@link ClientScanner} needs to know + * @return + */ + public boolean switchedToADifferentReplica() { + return replicaSwitched.get(); + } + + private int addCallsForCurrentReplica( + BoundedCompletionService> cs, RegionLocations rl) { + RetryingRPC retryingOnReplica = new RetryingRPC(currentScannerCallable); + outstandingCallables.add(currentScannerCallable); + cs.submit(retryingOnReplica); + return 1; + } + + private int addCallsForOtherReplicas( + BoundedCompletionService> cs, RegionLocations rl, int min, + int max) { + if (scan.getConsistency() == Consistency.STRONG) { + return 0; // not scheduling on other replicas for strong consistency + } + for (int id = min; id <= max; id++) { + if (currentScannerCallable.getHRegionInfo().getReplicaId() == id) { + continue; //this was already scheduled earlier + } + if (this.lastResult != null) { + currentScannerCallable.getScan().setStartRow(this.lastResult.getRow()); + } + ScannerCallable s = currentScannerCallable.getScannerCallableForReplica(id); + outstandingCallables.add(s); + RetryingRPC retryingOnReplica = new RetryingRPC(s); + cs.submit(retryingOnReplica); + } + return max - min + 1; + } + + class RetryingRPC implements Callable> { + final ScannerCallable callable; + + RetryingRPC(ScannerCallable callable) { + this.callable = callable; + } + + @Override + public Pair call() throws IOException { + // For the Consistency.STRONG (default case), we reuse the caller + // to keep compatibility with what is done in the past + // For the Consistency.TIMELINE case, we can't reuse the caller + // since we could be making parallel RPCs (caller.callWithRetries is synchronized + // and we can't invoke it multiple times at the same time) + RpcRetryingCaller caller = ScannerCallableWithReplicas.this.caller; + if (scan.getConsistency() == Consistency.TIMELINE) { + caller = new RpcRetryingCallerFactory(ScannerCallableWithReplicas.this.conf). + newCaller(); + } + Result[] res = caller.callWithRetries(callable, scannerTimeout); + return new Pair(res, callable); + } + } + + @Override + public void prepare(boolean reload) throws IOException { + } + + @Override + public void throwable(Throwable t, boolean retrying) { + currentScannerCallable.throwable(t, retrying); + } + + @Override + public String getExceptionMessageAdditionalDetail() { + return currentScannerCallable.getExceptionMessageAdditionalDetail(); + } + + @Override + public long sleep(long pause, int tries) { + return currentScannerCallable.sleep(pause, tries); + } +} diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java index 76c9815..286ff0b 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/protobuf/ProtobufUtil.java @@ -898,6 +898,9 @@ public final class ProtobufUtil { if (scan.isReversed()) { scanBuilder.setReversed(scan.isReversed()); } + if (scan.getConsistency() == Consistency.TIMELINE) { + scanBuilder.setConsistency(toConsistency(scan.getConsistency())); + } return scanBuilder.build(); } @@ -977,6 +980,9 @@ public final class ProtobufUtil { if (proto.hasReversed()) { scan.setReversed(proto.getReversed()); } + if (proto.hasConsistency()) { + scan.setConsistency(toConsistency(proto.getConsistency())); + } return scan; } diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/protobuf/ResponseConverter.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/protobuf/ResponseConverter.java index e9d5070..3d3505d 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/protobuf/ResponseConverter.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/protobuf/ResponseConverter.java @@ -368,7 +368,7 @@ public final class ResponseConverter { } cells.add(cellScanner.current()); } - results[i] = Result.create(cells); + results[i] = Result.create(cells, null, response.getStale()); } else { // Result is pure pb. results[i] = ProtobufUtil.toResult(response.getResults(i)); diff --git a/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ClientProtos.java b/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ClientProtos.java index 741bf84..12bcfde 100644 --- a/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ClientProtos.java +++ b/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ClientProtos.java @@ -17404,6 +17404,16 @@ public final class ClientProtos { */ org.apache.hadoop.hbase.protobuf.generated.ClientProtos.ResultOrBuilder getResultsOrBuilder( int index); + + // optional bool stale = 6; + /** + * optional bool stale = 6; + */ + boolean hasStale(); + /** + * optional bool stale = 6; + */ + boolean getStale(); } /** * Protobuf type {@code ScanResponse} @@ -17506,6 +17516,11 @@ public final class ClientProtos { results_.add(input.readMessage(org.apache.hadoop.hbase.protobuf.generated.ClientProtos.Result.PARSER, extensionRegistry)); break; } + case 48: { + bitField0_ |= 0x00000008; + stale_ = input.readBool(); + break; + } } } } catch (com.google.protobuf.InvalidProtocolBufferException e) { @@ -17719,12 +17734,29 @@ public final class ClientProtos { return results_.get(index); } + // optional bool stale = 6; + public static final int STALE_FIELD_NUMBER = 6; + private boolean stale_; + /** + * optional bool stale = 6; + */ + public boolean hasStale() { + return ((bitField0_ & 0x00000008) == 0x00000008); + } + /** + * optional bool stale = 6; + */ + public boolean getStale() { + return stale_; + } + private void initFields() { cellsPerResult_ = java.util.Collections.emptyList(); scannerId_ = 0L; moreResults_ = false; ttl_ = 0; results_ = java.util.Collections.emptyList(); + stale_ = false; } private byte memoizedIsInitialized = -1; public final boolean isInitialized() { @@ -17753,6 +17785,9 @@ public final class ClientProtos { for (int i = 0; i < results_.size(); i++) { output.writeMessage(5, results_.get(i)); } + if (((bitField0_ & 0x00000008) == 0x00000008)) { + output.writeBool(6, stale_); + } getUnknownFields().writeTo(output); } @@ -17787,6 +17822,10 @@ public final class ClientProtos { size += com.google.protobuf.CodedOutputStream .computeMessageSize(5, results_.get(i)); } + if (((bitField0_ & 0x00000008) == 0x00000008)) { + size += com.google.protobuf.CodedOutputStream + .computeBoolSize(6, stale_); + } size += getUnknownFields().getSerializedSize(); memoizedSerializedSize = size; return size; @@ -17829,6 +17868,11 @@ public final class ClientProtos { } result = result && getResultsList() .equals(other.getResultsList()); + result = result && (hasStale() == other.hasStale()); + if (hasStale()) { + result = result && (getStale() + == other.getStale()); + } result = result && getUnknownFields().equals(other.getUnknownFields()); return result; @@ -17862,6 +17906,10 @@ public final class ClientProtos { hash = (37 * hash) + RESULTS_FIELD_NUMBER; hash = (53 * hash) + getResultsList().hashCode(); } + if (hasStale()) { + hash = (37 * hash) + STALE_FIELD_NUMBER; + hash = (53 * hash) + hashBoolean(getStale()); + } hash = (29 * hash) + getUnknownFields().hashCode(); memoizedHashCode = hash; return hash; @@ -17992,6 +18040,8 @@ public final class ClientProtos { } else { resultsBuilder_.clear(); } + stale_ = false; + bitField0_ = (bitField0_ & ~0x00000020); return this; } @@ -18046,6 +18096,10 @@ public final class ClientProtos { } else { result.results_ = resultsBuilder_.build(); } + if (((from_bitField0_ & 0x00000020) == 0x00000020)) { + to_bitField0_ |= 0x00000008; + } + result.stale_ = stale_; result.bitField0_ = to_bitField0_; onBuilt(); return result; @@ -18107,6 +18161,9 @@ public final class ClientProtos { } } } + if (other.hasStale()) { + setStale(other.getStale()); + } this.mergeUnknownFields(other.getUnknownFields()); return this; } @@ -18717,6 +18774,39 @@ public final class ClientProtos { return resultsBuilder_; } + // optional bool stale = 6; + private boolean stale_ ; + /** + * optional bool stale = 6; + */ + public boolean hasStale() { + return ((bitField0_ & 0x00000020) == 0x00000020); + } + /** + * optional bool stale = 6; + */ + public boolean getStale() { + return stale_; + } + /** + * optional bool stale = 6; + */ + public Builder setStale(boolean value) { + bitField0_ |= 0x00000020; + stale_ = value; + onChanged(); + return this; + } + /** + * optional bool stale = 6; + */ + public Builder clearStale() { + bitField0_ = (bitField0_ & ~0x00000020); + stale_ = false; + onChanged(); + return this; + } + // @@protoc_insertion_point(builder_scope:ScanResponse) } @@ -29520,46 +29610,46 @@ public final class ClientProtos { "Specifier\022\023\n\004scan\030\002 \001(\0132\005.Scan\022\022\n\nscanne", "r_id\030\003 \001(\004\022\026\n\016number_of_rows\030\004 \001(\r\022\025\n\rcl" + "ose_scanner\030\005 \001(\010\022\025\n\rnext_call_seq\030\006 \001(\004" + - "\"y\n\014ScanResponse\022\030\n\020cells_per_result\030\001 \003" + - "(\r\022\022\n\nscanner_id\030\002 \001(\004\022\024\n\014more_results\030\003" + - " \001(\010\022\013\n\003ttl\030\004 \001(\r\022\030\n\007results\030\005 \003(\0132\007.Res" + - "ult\"\263\001\n\024BulkLoadHFileRequest\022 \n\006region\030\001" + - " \002(\0132\020.RegionSpecifier\0225\n\013family_path\030\002 " + - "\003(\0132 .BulkLoadHFileRequest.FamilyPath\022\026\n" + - "\016assign_seq_num\030\003 \001(\010\032*\n\nFamilyPath\022\016\n\006f" + - "amily\030\001 \002(\014\022\014\n\004path\030\002 \002(\t\"\'\n\025BulkLoadHFi", - "leResponse\022\016\n\006loaded\030\001 \002(\010\"a\n\026Coprocesso" + - "rServiceCall\022\013\n\003row\030\001 \002(\014\022\024\n\014service_nam" + - "e\030\002 \002(\t\022\023\n\013method_name\030\003 \002(\t\022\017\n\007request\030" + - "\004 \002(\014\"d\n\031CoprocessorServiceRequest\022 \n\006re" + - "gion\030\001 \002(\0132\020.RegionSpecifier\022%\n\004call\030\002 \002" + - "(\0132\027.CoprocessorServiceCall\"]\n\032Coprocess" + - "orServiceResponse\022 \n\006region\030\001 \002(\0132\020.Regi" + - "onSpecifier\022\035\n\005value\030\002 \002(\0132\016.NameBytesPa" + - "ir\"L\n\006Action\022\r\n\005index\030\001 \001(\r\022 \n\010mutation\030" + - "\002 \001(\0132\016.MutationProto\022\021\n\003get\030\003 \001(\0132\004.Get", - "\"Y\n\014RegionAction\022 \n\006region\030\001 \002(\0132\020.Regio" + - "nSpecifier\022\016\n\006atomic\030\002 \001(\010\022\027\n\006action\030\003 \003" + - "(\0132\007.Action\"^\n\021ResultOrException\022\r\n\005inde" + - "x\030\001 \001(\r\022\027\n\006result\030\002 \001(\0132\007.Result\022!\n\texce" + - "ption\030\003 \001(\0132\016.NameBytesPair\"f\n\022RegionAct" + - "ionResult\022-\n\021resultOrException\030\001 \003(\0132\022.R" + - "esultOrException\022!\n\texception\030\002 \001(\0132\016.Na" + - "meBytesPair\"G\n\014MultiRequest\022#\n\014regionAct" + - "ion\030\001 \003(\0132\r.RegionAction\022\022\n\nnonceGroup\030\002" + - " \001(\004\"@\n\rMultiResponse\022/\n\022regionActionRes", - "ult\030\001 \003(\0132\023.RegionActionResult*\'\n\013Consis" + - "tency\022\n\n\006STRONG\020\000\022\014\n\010TIMELINE\020\0012\261\002\n\rClie" + - "ntService\022 \n\003Get\022\013.GetRequest\032\014.GetRespo" + - "nse\022)\n\006Mutate\022\016.MutateRequest\032\017.MutateRe" + - "sponse\022#\n\004Scan\022\014.ScanRequest\032\r.ScanRespo" + - "nse\022>\n\rBulkLoadHFile\022\025.BulkLoadHFileRequ" + - "est\032\026.BulkLoadHFileResponse\022F\n\013ExecServi" + - "ce\022\032.CoprocessorServiceRequest\032\033.Coproce" + - "ssorServiceResponse\022&\n\005Multi\022\r.MultiRequ" + - "est\032\016.MultiResponseBB\n*org.apache.hadoop", - ".hbase.protobuf.generatedB\014ClientProtosH" + - "\001\210\001\001\240\001\001" + "\"\210\001\n\014ScanResponse\022\030\n\020cells_per_result\030\001 " + + "\003(\r\022\022\n\nscanner_id\030\002 \001(\004\022\024\n\014more_results\030" + + "\003 \001(\010\022\013\n\003ttl\030\004 \001(\r\022\030\n\007results\030\005 \003(\0132\007.Re" + + "sult\022\r\n\005stale\030\006 \001(\010\"\263\001\n\024BulkLoadHFileReq" + + "uest\022 \n\006region\030\001 \002(\0132\020.RegionSpecifier\0225" + + "\n\013family_path\030\002 \003(\0132 .BulkLoadHFileReque" + + "st.FamilyPath\022\026\n\016assign_seq_num\030\003 \001(\010\032*\n" + + "\nFamilyPath\022\016\n\006family\030\001 \002(\014\022\014\n\004path\030\002 \002(", + "\t\"\'\n\025BulkLoadHFileResponse\022\016\n\006loaded\030\001 \002" + + "(\010\"a\n\026CoprocessorServiceCall\022\013\n\003row\030\001 \002(" + + "\014\022\024\n\014service_name\030\002 \002(\t\022\023\n\013method_name\030\003" + + " \002(\t\022\017\n\007request\030\004 \002(\014\"d\n\031CoprocessorServ" + + "iceRequest\022 \n\006region\030\001 \002(\0132\020.RegionSpeci" + + "fier\022%\n\004call\030\002 \002(\0132\027.CoprocessorServiceC" + + "all\"]\n\032CoprocessorServiceResponse\022 \n\006reg" + + "ion\030\001 \002(\0132\020.RegionSpecifier\022\035\n\005value\030\002 \002" + + "(\0132\016.NameBytesPair\"L\n\006Action\022\r\n\005index\030\001 " + + "\001(\r\022 \n\010mutation\030\002 \001(\0132\016.MutationProto\022\021\n", + "\003get\030\003 \001(\0132\004.Get\"Y\n\014RegionAction\022 \n\006regi" + + "on\030\001 \002(\0132\020.RegionSpecifier\022\016\n\006atomic\030\002 \001" + + "(\010\022\027\n\006action\030\003 \003(\0132\007.Action\"^\n\021ResultOrE" + + "xception\022\r\n\005index\030\001 \001(\r\022\027\n\006result\030\002 \001(\0132" + + "\007.Result\022!\n\texception\030\003 \001(\0132\016.NameBytesP" + + "air\"f\n\022RegionActionResult\022-\n\021resultOrExc" + + "eption\030\001 \003(\0132\022.ResultOrException\022!\n\texce" + + "ption\030\002 \001(\0132\016.NameBytesPair\"G\n\014MultiRequ" + + "est\022#\n\014regionAction\030\001 \003(\0132\r.RegionAction" + + "\022\022\n\nnonceGroup\030\002 \001(\004\"@\n\rMultiResponse\022/\n", + "\022regionActionResult\030\001 \003(\0132\023.RegionAction" + + "Result*\'\n\013Consistency\022\n\n\006STRONG\020\000\022\014\n\010TIM" + + "ELINE\020\0012\261\002\n\rClientService\022 \n\003Get\022\013.GetRe" + + "quest\032\014.GetResponse\022)\n\006Mutate\022\016.MutateRe" + + "quest\032\017.MutateResponse\022#\n\004Scan\022\014.ScanReq" + + "uest\032\r.ScanResponse\022>\n\rBulkLoadHFile\022\025.B" + + "ulkLoadHFileRequest\032\026.BulkLoadHFileRespo" + + "nse\022F\n\013ExecService\022\032.CoprocessorServiceR" + + "equest\032\033.CoprocessorServiceResponse\022&\n\005M" + + "ulti\022\r.MultiRequest\032\016.MultiResponseBB\n*o", + "rg.apache.hadoop.hbase.protobuf.generate" + + "dB\014ClientProtosH\001\210\001\001\240\001\001" }; com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner = new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() { @@ -29661,7 +29751,7 @@ public final class ClientProtos { internal_static_ScanResponse_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_ScanResponse_descriptor, - new java.lang.String[] { "CellsPerResult", "ScannerId", "MoreResults", "Ttl", "Results", }); + new java.lang.String[] { "CellsPerResult", "ScannerId", "MoreResults", "Ttl", "Results", "Stale", }); internal_static_BulkLoadHFileRequest_descriptor = getDescriptor().getMessageTypes().get(14); internal_static_BulkLoadHFileRequest_fieldAccessorTable = new diff --git a/hbase-protocol/src/main/protobuf/Client.proto b/hbase-protocol/src/main/protobuf/Client.proto index 2529c0c..24ab2f7 100644 --- a/hbase-protocol/src/main/protobuf/Client.proto +++ b/hbase-protocol/src/main/protobuf/Client.proto @@ -289,6 +289,7 @@ message ScanResponse { // This field is mutually exclusive with cells_per_result (since the Cells will // be inside the pb'd Result) repeated Result results = 5; + optional bool stale = 6; } /** diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java index ebf1325..41a761b 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java @@ -211,7 +211,6 @@ import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.util.InfoServer; import org.apache.hadoop.hbase.util.JvmPauseMonitor; import org.apache.hadoop.hbase.util.Pair; -import org.apache.hadoop.hbase.util.ServerRegionReplicaUtil; import org.apache.hadoop.hbase.util.Sleeper; import org.apache.hadoop.hbase.util.Strings; import org.apache.hadoop.hbase.util.Threads; @@ -3152,6 +3151,7 @@ public class HRegionServer implements ClientProtos.ClientService.BlockingInterfa try { int i = 0; synchronized(scanner) { + boolean stale = (region.getRegionInfo().getReplicaId() != 0); for (; i < rows && currentScanResultSize < maxResultSize; ) { // Collect values to be returned here @@ -3162,7 +3162,7 @@ public class HRegionServer implements ClientProtos.ClientService.BlockingInterfa currentScanResultSize += KeyValueUtil.ensureKeyValue(kv).heapSize(); } } - results.add(Result.create(values)); + results.add(Result.create(values, null, stale)); i++; } if (!moreRows) { @@ -3189,7 +3189,7 @@ public class HRegionServer implements ClientProtos.ClientService.BlockingInterfa moreResults = false; results = null; } else { - addResults(builder, results, controller); + addResults(builder, results, controller, region.getRegionInfo().getReplicaId() == 0); } } finally { // We're done. On way out re-add the above removed lease. @@ -3235,8 +3235,9 @@ public class HRegionServer implements ClientProtos.ClientService.BlockingInterfa } private void addResults(final ScanResponse.Builder builder, final List results, - final RpcController controller) { + final RpcController controller, boolean isDefaultRegion) { if (results == null || results.isEmpty()) return; + builder.setStale(!isDefaultRegion); if (isClientCellBlockSupport()) { for (Result res : results) { builder.addCellsPerResult(res.size()); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestMultiVersions.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestMultiVersions.java index 5f45be3..c04edc1 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/TestMultiVersions.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/TestMultiVersions.java @@ -241,7 +241,7 @@ public class TestMultiVersions { } assertTrue(cellCount == 1); } - table.close(); + table.flushCommits(); } // Case 1: scan with LATEST_TIMESTAMP. Should get two rows diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/catalog/TestMetaReaderEditorNoCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/catalog/TestMetaReaderEditorNoCluster.java index 6ae0ecd..ea2324c 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/catalog/TestMetaReaderEditorNoCluster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/catalog/TestMetaReaderEditorNoCluster.java @@ -33,6 +33,7 @@ import org.apache.hadoop.hbase.Abortable; import org.apache.hadoop.hbase.Cell; import org.apache.hadoop.hbase.CellScannable; import org.apache.hadoop.hbase.CellUtil; +import org.apache.hadoop.hbase.RegionLocations; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HConstants; @@ -41,6 +42,7 @@ import org.apache.hadoop.hbase.HRegionLocation; import org.apache.hadoop.hbase.KeyValue; import org.apache.hadoop.hbase.MediumTests; import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.client.ClusterConnection; import org.apache.hadoop.hbase.client.HConnection; import org.apache.hadoop.hbase.client.HConnectionManager; import org.apache.hadoop.hbase.client.HConnectionTestingUtility; @@ -186,19 +188,20 @@ public class TestMetaReaderEditorNoCluster { // to shove this in here first so it gets picked up all over; e.g. by // HTable. connection = HConnectionTestingUtility.getSpiedConnection(UTIL.getConfiguration()); + // Fix the location lookup so it 'works' though no network. First // make an 'any location' object. final HRegionLocation anyLocation = new HRegionLocation(HRegionInfo.FIRST_META_REGIONINFO, sn); - // Return the any location object when locateRegion is called in HTable - // constructor and when its called by ServerCallable (it uses getRegionLocation). + final RegionLocations rl = new RegionLocations(anyLocation); + // Return the RegionLocations object when locateRegion // The ugly format below comes of 'Important gotcha on spying real objects!' from // http://mockito.googlecode.com/svn/branches/1.6/javadoc/org/mockito/Mockito.html - Mockito.doReturn(anyLocation). - when(connection).locateRegion((TableName) Mockito.any(), (byte[]) Mockito.any()); - Mockito.doReturn(anyLocation). - when(connection).getRegionLocation((TableName) Mockito.any(), - (byte[]) Mockito.any(), Mockito.anyBoolean()); + ClusterConnection cConnection = + HConnectionTestingUtility.getSpiedClusterConnection(UTIL.getConfiguration()); + Mockito.doReturn(rl).when + (cConnection).locateRegion((TableName)Mockito.any(), (byte[])Mockito.any(), + Mockito.anyBoolean(), Mockito.anyBoolean(), Mockito.anyInt()); // Now shove our HRI implementation into the spied-upon connection. Mockito.doReturn(implementation). diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/HConnectionTestingUtility.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/HConnectionTestingUtility.java index 490bd7f..d356896 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/HConnectionTestingUtility.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/HConnectionTestingUtility.java @@ -154,6 +154,20 @@ public class HConnectionTestingUtility { } } + public static ClusterConnection getSpiedClusterConnection(final Configuration conf) + throws IOException { + HConnectionKey connectionKey = new HConnectionKey(conf); + synchronized (ConnectionManager.CONNECTION_INSTANCES) { + HConnectionImplementation connection = + ConnectionManager.CONNECTION_INSTANCES.get(connectionKey); + if (connection == null) { + connection = Mockito.spy(new HConnectionImplementation(conf, true)); + ConnectionManager.CONNECTION_INSTANCES.put(connectionKey, connection); + } + return connection; + } + } + /** * @return Count of extant connection instances */ diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestReplicasClient.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestReplicasClient.java index f8d7cf5..96bcdf5 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestReplicasClient.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestReplicasClient.java @@ -38,7 +38,10 @@ import org.apache.hadoop.hbase.coprocessor.RegionCoprocessorEnvironment; import org.apache.hadoop.hbase.protobuf.RequestConverter; import org.apache.hadoop.hbase.protobuf.generated.AdminProtos; import org.apache.hadoop.hbase.regionserver.HRegionServer; +import org.apache.hadoop.hbase.regionserver.InternalScanner; +import org.apache.hadoop.hbase.regionserver.RegionScanner; import org.apache.hadoop.hbase.regionserver.StorefileRefresherChore; +import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.zookeeper.ZKAssign; import org.apache.zookeeper.KeeperException; import org.junit.After; @@ -50,9 +53,13 @@ import org.junit.Test; import org.junit.experimental.categories.Category; import java.io.IOException; +import java.util.HashMap; +import java.util.Iterator; import java.util.List; import java.util.concurrent.CountDownLatch; import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicInteger; import java.util.concurrent.atomic.AtomicLong; import java.util.concurrent.atomic.AtomicReference; @@ -81,6 +88,8 @@ public class TestReplicasClient { */ public static class SlowMeCopro extends BaseRegionObserver { static final AtomicLong sleepTime = new AtomicLong(0); + static final AtomicBoolean slowDownNext = new AtomicBoolean(false); + static final AtomicInteger countOfNext = new AtomicInteger(0); static final AtomicReference cdl = new AtomicReference(new CountDownLatch(0)); @@ -90,7 +99,32 @@ public class TestReplicasClient { @Override public void preGetOp(final ObserverContext e, final Get get, final List results) throws IOException { + slowdownCode(e); + } + + @Override + public RegionScanner preScannerOpen(final ObserverContext e, + final Scan scan, final RegionScanner s) throws IOException { + slowdownCode(e); + return s; + } + + @Override + public boolean preScannerNext(final ObserverContext e, + final InternalScanner s, final List results, + final int limit, final boolean hasMore) throws IOException { + //this will slow down a certain next operation if the conditions are met. The slowness + //will allow the call to go to a replica + if (slowDownNext.get()) { + if (countOfNext.incrementAndGet() == 2) { + sleepTime.set(5000); + slowdownCode(e); + } + } + return true; + } + private void slowdownCode(final ObserverContext e) { if (e.getEnvironment().getRegion().getRegionInfo().getReplicaId() == 0) { CountDownLatch latch = cdl.get(); try { @@ -118,7 +152,7 @@ public class TestReplicasClient { // enable store file refreshing HTU.getConfiguration().setInt( StorefileRefresherChore.REGIONSERVER_STOREFILE_REFRESH_PERIOD, REFRESH_PERIOD); - + HTU.getConfiguration().setBoolean("hbase.client.log.scanner.activity", true); HTU.startMiniCluster(NB_SERVERS); // Create table then get the single region for our new table. @@ -158,6 +192,14 @@ public class TestReplicasClient { @Before public void before() throws IOException { HTU.getHBaseAdmin().getConnection().clearRegionCache(); + try { + openRegion(hriPrimary); + } catch (Exception ignored) { + } + try { + openRegion(hriSecondary); + } catch (Exception ignored) { + } } @After @@ -166,6 +208,10 @@ public class TestReplicasClient { closeRegion(hriSecondary); } catch (Exception ignored) { } + try { + closeRegion(hriPrimary); + } catch (Exception ignored) { + } ZKAssign.deleteNodeFailSilent(HTU.getZooKeeperWatcher(), hriPrimary); ZKAssign.deleteNodeFailSilent(HTU.getZooKeeperWatcher(), hriSecondary); @@ -177,6 +223,9 @@ public class TestReplicasClient { } private void openRegion(HRegionInfo hri) throws Exception { + try { + if (isRegionOpened(hri)) return; + } catch (Exception e){} ZKAssign.createNodeOffline(HTU.getZooKeeperWatcher(), hri, getRS().getServerName()); // first version is '0' AdminProtos.OpenRegionRequest orr = @@ -211,6 +260,10 @@ public class TestReplicasClient { ZKAssign.deleteOpenedNode(HTU.getZooKeeperWatcher(), hri.getEncodedName(), null)); } + private boolean isRegionOpened(HRegionInfo hri) throws Exception { + return getRS().getRegionByEncodedName(hri.getEncodedName()).isAvailable(); + } + private void checkRegionIsClosed(String encodedRegionName) throws Exception { while (!getRS().getRegionsInTransitionInRS().isEmpty()) { @@ -464,4 +517,104 @@ public class TestReplicasClient { closeRegion(hriSecondary); } } + + @Test + public void testScanWithReplicas() throws Exception { + //simple scan + runMultipleScansOfOneType(false, false); + } + + @Test + public void testSmallScanWithReplicas() throws Exception { + //small scan + runMultipleScansOfOneType(false, true); + } + + @Test + public void testReverseScanWithReplicas() throws Exception { + //reverse scan + runMultipleScansOfOneType(true, false); + } + + private void runMultipleScansOfOneType(boolean reversed, boolean small) throws Exception { + openRegion(hriSecondary); + int NUMROWS = 100; + try { + for (int i = 0; i < NUMROWS; i++) { + byte[] b1 = Bytes.toBytes("testUseRegionWithReplica" + i); + Put p = new Put(b1); + p.add(f, b1, b1); + table.put(p); + } + LOG.debug("PUT done"); + int caching = 20; + byte[] start; + if (reversed) start = Bytes.toBytes("testUseRegionWithReplica" + (NUMROWS - 1)); + else start = Bytes.toBytes("testUseRegionWithReplica" + 0); + + scanWithReplicas(reversed, small, Consistency.TIMELINE, caching, start, NUMROWS, false, false); + + //Even if we were to slow the server down, unless we ask for stale + //we won't get it + SlowMeCopro.sleepTime.set(5000); + scanWithReplicas(reversed, small, Consistency.STRONG, caching, start, NUMROWS, false, false); + SlowMeCopro.sleepTime.set(0); + + HTU.getHBaseAdmin().flush(table.getTableName()); + LOG.info("flush done"); + Thread.sleep(1000 + REFRESH_PERIOD * 2); + + //Now set the flag to get a response even if stale + SlowMeCopro.sleepTime.set(5000); + scanWithReplicas(reversed, small, Consistency.TIMELINE, caching, start, NUMROWS, true, false); + SlowMeCopro.sleepTime.set(0); + + // now make some 'next' calls slow + SlowMeCopro.slowDownNext.set(true); + SlowMeCopro.countOfNext.set(0); + scanWithReplicas(reversed, small, Consistency.TIMELINE, caching, start, NUMROWS, true, true); + SlowMeCopro.slowDownNext.set(false); + SlowMeCopro.countOfNext.set(0); + } finally { + SlowMeCopro.cdl.get().countDown(); + SlowMeCopro.sleepTime.set(0); + SlowMeCopro.slowDownNext.set(false); + SlowMeCopro.countOfNext.set(0); + for (int i = 0; i < NUMROWS; i++) { + byte[] b1 = Bytes.toBytes("testUseRegionWithReplica" + i); + Delete d = new Delete(b1); + table.delete(d); + } + closeRegion(hriSecondary); + } + } + + private void scanWithReplicas(boolean reversed, boolean small, Consistency consistency, + int caching, byte[] startRow, int numRows, boolean staleExpected, boolean slowNext) + throws Exception { + Scan scan = new Scan(startRow); + scan.setCaching(caching); + scan.setReversed(reversed); + scan.setSmall(small); + scan.setConsistency(consistency); + ResultScanner scanner = table.getScanner(scan); + Iterator iter = scanner.iterator(); + int count = 0; + HashMap map = new HashMap(); + int countOfStale = 0; + while (iter.hasNext()) { + count++; + Result r = iter.next(); + if (map.containsKey(r.getRow())) { + throw new Exception("Unexpected scan result. Repeated row " + Bytes.toString(r.getRow())); + } + map.put(r.getRow(), true); + if (!slowNext) Assert.assertTrue(r.isStale() == staleExpected); + if (r.isStale()) countOfStale++; + } + Assert.assertTrue(count == numRows); + if (slowNext) { + Assert.assertTrue(countOfStale > 1 && countOfStale < numRows); + } + } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestRestoreSnapshotFromClient.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestRestoreSnapshotFromClient.java index fa59b6d..4cdc815 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestRestoreSnapshotFromClient.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/client/TestRestoreSnapshotFromClient.java @@ -196,7 +196,6 @@ public class TestRestoreSnapshotFromClient { assertEquals(500, TEST_UTIL.countRows(table, TEST_FAMILY2)); Set fsFamilies = getFamiliesFromFS(tableName); assertEquals(2, fsFamilies.size()); - table.close(); // Take a snapshot admin.disableTable(tableName); @@ -217,7 +216,6 @@ public class TestRestoreSnapshotFromClient { assertEquals(1, htd.getFamilies().size()); fsFamilies = getFamiliesFromFS(tableName); assertEquals(1, fsFamilies.size()); - table.close(); // Restore back the snapshot (with the cf) admin.disableTable(tableName); diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/security/visibility/TestVisibilityLabels.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/security/visibility/TestVisibilityLabels.java index 45671bc..6ff9991 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/security/visibility/TestVisibilityLabels.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/security/visibility/TestVisibilityLabels.java @@ -781,7 +781,7 @@ public class TestVisibilityLabels { table.put(puts); } finally { if (table != null) { - table.close(); + table.flushCommits(); } } return table; diff --git a/pom.xml.hadoop2 b/pom.xml.hadoop2 index 25c0079..e56d1b5 100644 --- a/pom.xml.hadoop2 +++ b/pom.xml.hadoop2 @@ -889,7 +889,7 @@ 2.4 2.6 1.1.1 - 2.2 + 2.1 3.2.1 3.1 2.1.2 @@ -1443,7 +1443,7 @@ org.apache.maven.plugins maven-surefire-plugin - -enableassertions -Xmx1900m -Djava.security.egd=file:/dev/./urandom -Djava.net.preferIPv4Stack=true "-Djava.library.path=${hadoop.library.path};${java.library.path}" + -enableassertions -Xmx1900m -XX:MaxPermSize=256m -Djava.security.egd=file:/dev/./urandom -Djava.net.preferIPv4Stack=true "-Djava.library.path=${hadoop.library.path};${java.library.path}" java.net.preferIPv4Stack