Index: hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKTable.java =================================================================== --- hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKTable.java (revision 1461042) +++ hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKTable.java (working copy) @@ -21,10 +21,16 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.exceptions.DeserializationException; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos; +import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.Table; +import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.Table.State; import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.KeeperException.NoNodeException; +import com.google.protobuf.InvalidProtocolBufferException; + import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -58,6 +64,11 @@ */ private final Map cache = new HashMap(); + private byte[] data; + + private final static Table.State createTableStates[] = { Table.State.CREATING, Table.State.CREATING_TD, + Table.State.CREATING_REGIONINFO, Table.State.MOVING_TO_ORIG_LOCATION, + Table.State.ADDING_TO_META }; // TODO: Make it so always a table znode. Put table schema here as well as table state. // Have watcher on table znode so all are notified of state or schema change. @@ -79,6 +90,12 @@ if (children == null) return; for (String child: children) { ZooKeeperProtos.Table.State state = ZKTableReadOnly.getTableState(this.watcher, child); + if(state == ZooKeeperProtos.Table.State.CREATING){ + List listChildrenNoWatch = ZKUtil.listChildrenNoWatch(this.watcher, this.watcher.tableZNode+"/"+child); + for (String string : listChildrenNoWatch) { + System.out.println("Status node should be prenet "+string); + } + } if (state != null) this.cache.put(child, state); } } @@ -134,7 +151,72 @@ setTableState(tableName, ZooKeeperProtos.Table.State.ENABLING); } } + + public void createTableCreationStatusNode(final String tableName) throws KeeperException{ + String tableStatusZNode = pathOfTableStatusZNode(tableName); + if (ZKUtil.checkExists(this.watcher, tableStatusZNode) == -1) { + ZKUtil.createAndFailSilent(this.watcher, tableStatusZNode); + } + String tableZNode = ZKUtil.joinZNode(this.watcher.tableZNode, tableName); + int numberOfChildren = ZKUtil.getNumberOfChildren(this.watcher, tableZNode); + LOG.info("Created children node "+numberOfChildren); + } + + public void cleanUpTableStatusNode(final String tableName) throws KeeperException { + String tableStatusZNode = pathOfTableStatusZNode(tableName); + ZKUtil.deleteNodeFailSilent(this.watcher, tableStatusZNode); + } + + + /** + * If the table is found in CREATING_TABLE state the inmemory state is + * removed. This helps in cases where CreateTable is to be retried by the + * client incase of failures + * + * @param tableName + */ + public void removeCreateTableStates(final String tableName) { + synchronized (this.cache) { + if (isTableCreationState(tableName)) { + this.cache.remove(tableName); + } + } + } + + public static Set getCreatingTables(ZooKeeperWatcher zkw) throws KeeperException { + return getAllTables(zkw, createTableStates); + } + + + public void setStatusCreatingTableDescriptor(final String tableName) throws KeeperException { + setTableState(tableName, Table.State.CREATING_TD); + } + + public void setCurrentCreateTableStatus(final String tableName, final Table.State state) + throws KeeperException { + setTableState(tableName, state); + } + + public void setStatusCreatingRegionInfo(final String tableName) throws NoNodeException, + KeeperException { + setTableState(tableName, Table.State.CREATING_REGIONINFO); + } + + public void setStatusMovingToOriginalLocation(final String tableName) throws KeeperException { + setTableState(tableName, Table.State.MOVING_TO_ORIG_LOCATION); + } + + public void setStatusToAddingToMeta(final String tableName) throws KeeperException { + setTableState(tableName, Table.State.ADDING_TO_META); + } + + private String pathOfTableStatusZNode(final String tableName) { + String tableZNode = ZKUtil.joinZNode(this.watcher.tableZNode, tableName); + String tableStatusZNode = ZKUtil.joinZNode(tableZNode, this.watcher.tableCreationStatusNode); + return tableStatusZNode; + } + /** * Sets the specified table as ENABLING in zookeeper atomically * If the table is already in ENABLING state, no operation is performed @@ -152,7 +234,36 @@ return true; } } + + /** + * Sets the specified table in CREATING Table states in zookeeper atomically + * If the table is already in CREATING tables state, no operation is performed + * @param tableName + * @return if the operation succeeds or not + * @throws KeeperException unexpected zookeeper exception + */ + public boolean checkAndSetCreatingTableStates(final String tableName) throws KeeperException { + synchronized (this.cache) { + if (isTableCreationState(tableName)) { + return false; + } + setTableState(tableName, ZooKeeperProtos.Table.State.CREATING); + return true; + } + } + + public Table.State getCurrentTableCreationState(String tableName) throws KeeperException { + return ZKTableReadOnly.getTableState(this.watcher, tableName); + } + private boolean isZKInTableCreationState(String tableName) throws KeeperException { + Set creatingTables = getCreatingTables(this.watcher); + if (creatingTables.contains(tableName)) { + return true; + } + return false; + } + /** * Sets the specified table as ENABLING in zookeeper atomically * If the table isn't in DISABLED state, no operation is performed @@ -215,7 +326,22 @@ public boolean isEnablingTable(final String tableName) { return isTableState(tableName, ZooKeeperProtos.Table.State.ENABLING); } + + public boolean isTableCreationState (final String tableName){ + return isTableState(tableName, this.createTableStates); + } + + private boolean isTableState(String tableName, State[] createTableStates) { + synchronized (this.cache) { + ZooKeeperProtos.Table.State currentState = this.cache.get(tableName); + return ZKTableReadOnly.isTableState(currentState, createTableStates); + } + } + public boolean isCreatingTable(final String tableName) { + return isTableState(tableName, ZooKeeperProtos.Table.State.CREATING); + } + public boolean isEnabledTable(String tableName) { return isTableState(tableName, ZooKeeperProtos.Table.State.ENABLED); } @@ -273,6 +399,8 @@ public void setEnabledTable(final String tableName) throws KeeperException { setTableState(tableName, ZooKeeperProtos.Table.State.ENABLED); } + + /** * check if table is present . @@ -374,6 +502,7 @@ ZooKeeperProtos.Table.State state = ZKTableReadOnly.getTableState(zkw, child); for (ZooKeeperProtos.Table.State expectedState: states) { if (state == expectedState) { + LOG.debug("The table "+child+" found in "+state+" state"); allTables.add(child); break; } @@ -381,4 +510,5 @@ } return allTables; } + } Index: hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKTableReadOnly.java =================================================================== --- hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKTableReadOnly.java (revision 1461042) +++ hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKTableReadOnly.java (working copy) @@ -23,6 +23,7 @@ import org.apache.hadoop.hbase.exceptions.DeserializationException; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos; +import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.Table.State; import org.apache.zookeeper.KeeperException; import java.util.HashSet; @@ -156,4 +157,18 @@ throw ZKUtil.convert(e); } } + + public static boolean isTableState(State currentState) { + return false; + } + + public static boolean isTableState(State currentState, State[] createTableStates) { + for (State state : createTableStates) { + if (currentState != null) { + if (state.equals(currentState)) + return true; + } + } + return false; + } } Index: hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java =================================================================== --- hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java (revision 1461042) +++ hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java (working copy) @@ -105,6 +105,8 @@ public String balancerZNode; // znode containing the lock for the tables public String tableLockZNode; + // znode that has the current state of the create table + public String tableCreationStatusNode = "status"; // Certain ZooKeeper nodes need to be world-readable public static final ArrayList CREATOR_ALL_AND_WORLD_READABLE = @@ -220,6 +222,9 @@ conf.get("zookeeper.znode.balancer", "balancer")); tableLockZNode = ZKUtil.joinZNode(baseZNode, conf.get("zookeeper.znode.tableLock", "table-lock")); + + tableLockZNode = ZKUtil.joinZNode(tableZNode, + conf.get("zookeeper.znode.tatbleStatus", "table-status")); } /** Index: hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java =================================================================== --- hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java (revision 1461042) +++ hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java (working copy) @@ -324,6 +324,9 @@ /** Default value for cluster ID */ public static final String CLUSTER_ID_DEFAULT = "default-cluster"; + + /** Count of Split keys file **/ + public static final String SPLIT_KEYS_FILE = "splitKeys"; // Always store the location of the root table's HRegion. // This HRegion is never split. Index: hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ZooKeeperProtos.java =================================================================== --- hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ZooKeeperProtos.java (revision 1461042) +++ hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ZooKeeperProtos.java (working copy) @@ -2812,12 +2812,22 @@ DISABLED(1, 1), DISABLING(2, 2), ENABLING(3, 3), + CREATING(4, 4), + CREATING_TD(5, 5), + CREATING_REGIONINFO(6, 6), + MOVING_TO_ORIG_LOCATION(7, 7), + ADDING_TO_META(8, 8), ; public static final int ENABLED_VALUE = 0; public static final int DISABLED_VALUE = 1; public static final int DISABLING_VALUE = 2; public static final int ENABLING_VALUE = 3; + public static final int CREATING_VALUE = 4; + public static final int CREATING_TD_VALUE = 5; + public static final int CREATING_REGIONINFO_VALUE = 6; + public static final int MOVING_TO_ORIG_LOCATION_VALUE = 7; + public static final int ADDING_TO_META_VALUE = 8; public final int getNumber() { return value; } @@ -2828,6 +2838,11 @@ case 1: return DISABLED; case 2: return DISABLING; case 3: return ENABLING; + case 4: return CREATING; + case 5: return CREATING_TD; + case 6: return CREATING_REGIONINFO; + case 7: return MOVING_TO_ORIG_LOCATION; + case 8: return ADDING_TO_META; default: return null; } } @@ -2858,7 +2873,7 @@ } private static final State[] VALUES = { - ENABLED, DISABLED, DISABLING, ENABLING, + ENABLED, DISABLED, DISABLING, ENABLING, CREATING, CREATING_TD, CREATING_REGIONINFO, MOVING_TO_ORIG_LOCATION, ADDING_TO_META, }; public static State valueOf( @@ -5750,19 +5765,22 @@ "plitLogTask.State\022\037\n\nserverName\030\002 \002(\0132\013." + "ServerName\"C\n\005State\022\016\n\nUNASSIGNED\020\000\022\t\n\005O", "WNED\020\001\022\014\n\010RESIGNED\020\002\022\010\n\004DONE\020\003\022\007\n\003ERR\020\004\"" + - "n\n\005Table\022$\n\005state\030\001 \002(\0162\014.Table.State:\007E" + - "NABLED\"?\n\005State\022\013\n\007ENABLED\020\000\022\014\n\010DISABLED" + - "\020\001\022\r\n\tDISABLING\020\002\022\014\n\010ENABLING\020\003\"%\n\017Repli" + - "cationPeer\022\022\n\nclusterkey\030\001 \002(\t\"^\n\020Replic" + - "ationState\022&\n\005state\030\001 \002(\0162\027.ReplicationS" + - "tate.State\"\"\n\005State\022\013\n\007ENABLED\020\000\022\014\n\010DISA" + - "BLED\020\001\"+\n\027ReplicationHLogPosition\022\020\n\010pos" + - "ition\030\001 \002(\003\"$\n\017ReplicationLock\022\021\n\tlockOw" + - "ner\030\001 \002(\t\"s\n\tTableLock\022\021\n\ttableName\030\001 \001(", - "\014\022\036\n\tlockOwner\030\002 \001(\0132\013.ServerName\022\020\n\010thr" + - "eadId\030\003 \001(\003\022\020\n\010isShared\030\004 \001(\010\022\017\n\007purpose" + - "\030\005 \001(\tBE\n*org.apache.hadoop.hbase.protob" + - "uf.generatedB\017ZooKeeperProtosH\001\210\001\001\240\001\001" + "\330\001\n\005Table\022$\n\005state\030\001 \002(\0162\014.Table.State:\007" + + "ENABLED\"\250\001\n\005State\022\013\n\007ENABLED\020\000\022\014\n\010DISABL" + + "ED\020\001\022\r\n\tDISABLING\020\002\022\014\n\010ENABLING\020\003\022\014\n\010CRE" + + "ATING\020\004\022\017\n\013CREATING_TD\020\005\022\027\n\023CREATING_REG" + + "IONINFO\020\006\022\033\n\027MOVING_TO_ORIG_LOCATION\020\007\022\022" + + "\n\016ADDING_TO_META\020\010\"%\n\017ReplicationPeer\022\022\n" + + "\nclusterkey\030\001 \002(\t\"^\n\020ReplicationState\022&\n" + + "\005state\030\001 \002(\0162\027.ReplicationState.State\"\"\n" + + "\005State\022\013\n\007ENABLED\020\000\022\014\n\010DISABLED\020\001\"+\n\027Rep", + "licationHLogPosition\022\020\n\010position\030\001 \002(\003\"$" + + "\n\017ReplicationLock\022\021\n\tlockOwner\030\001 \002(\t\"s\n\t" + + "TableLock\022\021\n\ttableName\030\001 \001(\014\022\036\n\tlockOwne" + + "r\030\002 \001(\0132\013.ServerName\022\020\n\010threadId\030\003 \001(\003\022\020" + + "\n\010isShared\030\004 \001(\010\022\017\n\007purpose\030\005 \001(\tBE\n*org" + + ".apache.hadoop.hbase.protobuf.generatedB" + + "\017ZooKeeperProtosH\001\210\001\001\240\001\001" }; com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner = new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() { Index: hbase-protocol/src/main/protobuf/ZooKeeper.proto =================================================================== --- hbase-protocol/src/main/protobuf/ZooKeeper.proto (revision 1461042) +++ hbase-protocol/src/main/protobuf/ZooKeeper.proto (working copy) @@ -93,6 +93,11 @@ DISABLED = 1; DISABLING = 2; ENABLING = 3; + CREATING = 4; + CREATING_TD = 5; + CREATING_REGIONINFO = 6; + MOVING_TO_ORIG_LOCATION = 7; + ADDING_TO_META = 8; } // This is the table's state. If no znode for a table, // its state is presumed enabled. See o.a.h.h.zookeeper.ZKTable class Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (revision 1461042) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (working copy) @@ -27,6 +27,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.NavigableMap; import java.util.Set; import java.util.TreeMap; @@ -54,15 +55,18 @@ import org.apache.hadoop.hbase.catalog.MetaReader; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.exceptions.DeserializationException; +import org.apache.hadoop.hbase.exceptions.NotAllMetaRegionsOnlineException; import org.apache.hadoop.hbase.exceptions.NotServingRegionException; import org.apache.hadoop.hbase.exceptions.RegionAlreadyInTransitionException; import org.apache.hadoop.hbase.exceptions.RegionServerStoppedException; import org.apache.hadoop.hbase.exceptions.ServerNotRunningYetException; +import org.apache.hadoop.hbase.exceptions.TableExistsException; import org.apache.hadoop.hbase.exceptions.TableNotFoundException; import org.apache.hadoop.hbase.executor.EventHandler; import org.apache.hadoop.hbase.executor.EventType; import org.apache.hadoop.hbase.executor.ExecutorService; import org.apache.hadoop.hbase.master.handler.ClosedRegionHandler; +import org.apache.hadoop.hbase.master.handler.CreateTableHandler; import org.apache.hadoop.hbase.master.handler.DisableTableHandler; import org.apache.hadoop.hbase.master.handler.EnableTableHandler; import org.apache.hadoop.hbase.master.handler.MergedRegionHandler; @@ -187,6 +191,16 @@ private final boolean tomActivated; /** + * A map to track the count a region fails to open in a row. + * So that we don't try to open a region forever if the failure is + * unrecoverable. We don't put this information in region states + * because we don't expect this to happen frequently; we don't + * want to copy this information over during each state transition either. + */ + private final ConcurrentHashMap + failedOpenTracker = new ConcurrentHashMap(); + + /** * Constructs a new assignment manager. * * @param server @@ -369,7 +383,9 @@ // Scan META to build list of existing regions, servers, and assignment // Returns servers who have not checked in (assumed dead) and their regions - Map> deadServers = rebuildUserRegions(); + Pair>, Map>> recoveryInfo + = rebuildUserRegions(); + Map> deadServers = recoveryInfo.getFirst(); // This method will assign all user regions if a clean server startup or // it will reconstruct master state and cleanup any leftovers from @@ -378,6 +394,8 @@ recoverTableInDisablingState(); recoverTableInEnablingState(); + + recoverTableInCreation(recoveryInfo.getSecond()); } /** @@ -880,9 +898,25 @@ // When there are more than one region server a new RS is selected as the // destination and the same is updated in the regionplan. (HBASE-5546) if (regionState != null) { - getRegionPlan(regionState.getRegion(), sn, true); - this.executorService.submit(new ClosedRegionHandler(server, - this, regionState.getRegion())); + AtomicInteger failedOpenCount = failedOpenTracker.get(encodedName); + if (failedOpenCount == null) { + failedOpenCount = new AtomicInteger(); + // No need to use putIfAbsent, or extra synchronization since + // this whole handleRegion block is locked on the encoded region + // name, and failedOpenTracker is updated only in this block + failedOpenTracker.put(encodedName, failedOpenCount); + } + if (failedOpenCount.incrementAndGet() >= maximumAttempts) { + regionStates.updateRegionState( + regionState.getRegion(), RegionState.State.FAILED_OPEN); + // remove the tracking info to save memory, also reset + // the count for next open initiative + failedOpenTracker.remove(encodedName); + } else { + getRegionPlan(regionState.getRegion(), sn, true); + this.executorService.submit(new ClosedRegionHandler(server, + this, regionState.getRegion())); + } } break; @@ -914,6 +948,7 @@ // Handle OPENED by removing from transition and deleted zk node regionState = regionStates.updateRegionState(rt, RegionState.State.OPEN); if (regionState != null) { + failedOpenTracker.remove(encodedName); // reset the count, if any this.executorService.submit(new OpenedRegionHandler( server, this, regionState.getRegion(), sn, expectedVersion)); } @@ -1151,6 +1186,8 @@ } } }); + }else { + System.out.println("Node deleted "+path); } } @@ -1718,13 +1755,16 @@ } if (setOfflineInZK && versionOfOfflineNode == -1) { LOG.warn("Unable to set offline in ZooKeeper to assign " + region); - if (!tomActivated) { - regionStates.updateRegionState(region, RegionState.State.FAILED_OPEN); + // Setting offline in ZK must have been failed due to ZK racing or some + // exception which may make the server to abort. If it is ZK racing, + // we should retry since we already reset the region state, + // existing (re)assignment will fail anyway. + if (!server.isAborted()) { + continue; } - return; } - if (this.server.isStopped()) { - LOG.debug("Server stopped; skipping assign of " + region); + if (this.server.isStopped() || this.server.isAborted()) { + LOG.debug("Server stopped/aborted; skipping assign of " + region); return; } LOG.info("Assigning region " + region.getRegionNameAsString() + @@ -2408,7 +2448,9 @@ * in META * @throws IOException */ - Map> rebuildUserRegions() throws IOException, KeeperException { + Pair>, Map>> rebuildUserRegions() + throws IOException, KeeperException { + Set creatingTables = ZKTable.getCreatingTables(watcher); Set enablingTables = ZKTable.getEnablingTables(watcher); Set disabledOrEnablingTables = ZKTable.getDisabledTables(watcher); disabledOrEnablingTables.addAll(enablingTables); @@ -2419,9 +2461,12 @@ List results = MetaReader.fullScan(this.catalogTracker); // Get any new but slow to checkin region server that joined the cluster Set onlineServers = serverManager.getOnlineServers().keySet(); + Pair>, Map>> pair = + new Pair>, Map>>(); // Map of offline servers and their regions to be returned Map> offlineServers = new TreeMap>(); + Map> hriOfCreatingTables = new HashMap>(); // Iterate regions in META for (Result result : results) { Pair region = HRegionInfo.getHRegionInfoAndServerName(result); @@ -2447,6 +2492,14 @@ " has null regionLocation." + " But its table " + tableName + " isn't in ENABLING state."); } + // If the table is in CREATING_TABLE state and if the regionLocation is null + // it means they are not yet assigned while table creation + + // Collect the hris for the regions of the table that is under creation + if (creatingTables.contains(tableName)) { + populateCreatingTableHRIs(hriOfCreatingTables, regionInfo, tableName, true); + } + } else if (!onlineServers.contains(regionLocation)) { // Region is located on a server that isn't online List offlineRegions = offlineServers.get(regionLocation); @@ -2455,11 +2508,14 @@ offlineServers.put(regionLocation, offlineRegions); } offlineRegions.add(regionInfo); + populateCreatingTableHRIs(hriOfCreatingTables, regionInfo, tableName, false); // need to enable the table if not disabled or disabling or enabling // this will be used in rolling restarts - if (!disabledOrDisablingOrEnabling.contains(tableName) - && !getZKTable().isEnabledTable(tableName)) { - setEnabledTable(tableName); + if (!creatingTables.contains(tableName)) { + if (!disabledOrDisablingOrEnabling.contains(tableName) + && !getZKTable().isEnabledTable(tableName)) { + setEnabledTable(tableName); + } } } else { // If region is in offline and split state check the ZKNode @@ -2477,20 +2533,54 @@ } // Region is being served and on an active server // add only if region not in disabled or enabling table + // Even if in CreatingTable state we can make the region as online + // Ensure we take care while recovery if (!disabledOrEnablingTables.contains(tableName)) { regionStates.regionOnline(regionInfo, regionLocation); + populateCreatingTableHRIs(hriOfCreatingTables, regionInfo, tableName, false); } // need to enable the table if not disabled or disabling or enabling // this will be used in rolling restarts - if (!disabledOrDisablingOrEnabling.contains(tableName) - && !getZKTable().isEnabledTable(tableName)) { - setEnabledTable(tableName); + if (!creatingTables.contains(tableName)) { + if (!disabledOrDisablingOrEnabling.contains(tableName) + && !getZKTable().isEnabledTable(tableName)) { + setEnabledTable(tableName); + } } } } - return offlineServers; + pair.setFirst(offlineServers); + if(creatingTables.size() != 0 && hriOfCreatingTables.size() == 0){ + // There are some tables that is partially created and not added to the META + for (String tableName : creatingTables) { + LOG.debug("The table "+ tableName+ " found in CREATINGTABLE"); + hriOfCreatingTables.put(tableName, null); + } + } + pair.setSecond(hriOfCreatingTables); + return pair; } + + public Set getCreatingTable() throws KeeperException { + return ZKTable.getCreatingTables(this.watcher); + } + private void populateCreatingTableHRIs( + Map> hriOfCreatingTables, HRegionInfo regionInfo, + String tableName, boolean process) { + if (hriOfCreatingTables.get(tableName) == null) { + Map infoDetails = new HashMap(); + infoDetails.put(regionInfo, process); + LOG.debug("The region " + regionInfo + " is part of partially created table" + + ((process) ? (" that is not assigned to any RS") : ("that is already assigned"))); + hriOfCreatingTables.put(tableName, infoDetails); + } else { + LOG.debug("The region " + regionInfo + " is part of partially created table" + + ((process) ? (" that is not assigned to any RS") : ("that is already assigned"))); + hriOfCreatingTables.get(tableName).put(regionInfo, process); + } + } + /** * Recover the tables that were not fully moved to DISABLED state. These * tables are in DISABLING state when the master restarted/switched. @@ -2538,6 +2628,39 @@ } } } + + private void recoverTableInCreation(Map> hriOfCreatingTable) + throws NotAllMetaRegionsOnlineException, TableExistsException, IOException { + if (hriOfCreatingTable != null) { + Iterator>> iterator = hriOfCreatingTable.entrySet() + .iterator(); + HRegionInfo[] newRegions = null; + while (iterator.hasNext()) { + // Recover by calling CreateTableHandler + Entry> details = iterator.next(); + String tableName = details.getKey(); + Map hriDetails = details.getValue(); + if (hriDetails != null) { + Iterator> hriIterator = hriDetails + .entrySet().iterator(); + newRegions = new HRegionInfo[hriDetails.size()]; + int count = 0; + while (hriIterator.hasNext()) { + Entry hri = hriIterator.next(); + if (hri.getValue() == true) { + newRegions[count] = hri.getKey(); + count++; + } + } + } + LOG.info("The table " + tableName + + " is in partially created state. Hence trying to create the table"); + // Note that the table descriptor is passed as null + new CreateTableHandler(this.server, ((HMaster) this.server).getMasterFileSystem(), null, + server.getConfiguration(), newRegions, (MasterServices) this.server, true, tableName).prepare().process(); + } + } + } /** * Processes list of dead servers from result of META scan and regions in RIT Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/CreateTableHandler.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/CreateTableHandler.java (revision 1461042) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/CreateTableHandler.java (working copy) @@ -20,22 +20,24 @@ import java.io.IOException; import java.io.InterruptedIOException; +import java.util.Arrays; import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HTableDescriptor; -import org.apache.hadoop.hbase.exceptions.NotAllMetaRegionsOnlineException; import org.apache.hadoop.hbase.Server; -import org.apache.hadoop.hbase.exceptions.TableExistsException; import org.apache.hadoop.hbase.catalog.CatalogTracker; import org.apache.hadoop.hbase.catalog.MetaEditor; import org.apache.hadoop.hbase.catalog.MetaReader; +import org.apache.hadoop.hbase.exceptions.NotAllMetaRegionsOnlineException; +import org.apache.hadoop.hbase.exceptions.TableExistsException; import org.apache.hadoop.hbase.executor.EventHandler; import org.apache.hadoop.hbase.executor.EventType; import org.apache.hadoop.hbase.master.AssignmentManager; @@ -45,9 +47,14 @@ import org.apache.hadoop.hbase.master.MasterServices; import org.apache.hadoop.hbase.master.TableLockManager; import org.apache.hadoop.hbase.master.TableLockManager.TableLock; +import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.Table; +import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.Table.State; +import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.FSTableDescriptors; import org.apache.hadoop.hbase.util.ModifyRegionUtils; +import org.apache.hadoop.hbase.zookeeper.ZKTable; import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.KeeperException.NoNodeException; /** * Handler to create a table. @@ -63,21 +70,29 @@ private final TableLockManager tableLockManager; private final HRegionInfo [] newRegions; private final TableLock tableLock; + private final boolean isRecovery; + private final String tableName; + private Table.State currentState = null; public CreateTableHandler(Server server, MasterFileSystem fileSystemManager, HTableDescriptor hTableDescriptor, Configuration conf, HRegionInfo [] newRegions, - MasterServices masterServices) { + MasterServices masterServices, boolean isRecovery, String tableName) { super(server, EventType.C_M_CREATE_TABLE); this.fileSystemManager = fileSystemManager; + // This can be null when the constructor is called from the recovery flow. this.hTableDescriptor = hTableDescriptor; this.conf = conf; this.newRegions = newRegions; this.catalogTracker = masterServices.getCatalogTracker(); this.assignmentManager = masterServices.getAssignmentManager(); this.tableLockManager = masterServices.getTableLockManager(); + this.isRecovery = isRecovery; + this.tableName = (this.hTableDescriptor == null) ? tableName : this.hTableDescriptor + .getNameAsString(); - this.tableLock = this.tableLockManager.writeLock(this.hTableDescriptor.getName() + byte[] name = ((this.hTableDescriptor == null)? Bytes.toBytes(tableName):this.hTableDescriptor.getName()); + this.tableLock = this.tableLockManager.writeLock(name , EventType.C_M_CREATE_TABLE.toString()); } @@ -100,26 +115,46 @@ this.tableLock.acquire(); boolean success = false; try { - String tableName = this.hTableDescriptor.getNameAsString(); - if (MetaReader.tableExists(catalogTracker, tableName)) { - throw new TableExistsException(tableName); + String tableName = this.tableName; + if (!isRecovery) { + if (MetaReader.tableExists(catalogTracker, tableName)) { + throw new TableExistsException(tableName); + } } // If we have multiple client threads trying to create the table at the // same time, given the async nature of the operation, the table // could be in a state where .META. table hasn't been updated yet in // the process() function. - // Use enabling state to tell if there is already a request for the same + // Use CREATINGTABLE state to tell if there is already a request for the same // table in progress. This will introduce a new zookeeper call. Given // createTable isn't a frequent operation, that should be ok. //TODO: now that we have table locks, re-evaluate above - try { - if (!this.assignmentManager.getZKTable().checkAndSetEnablingTable(tableName)) { - throw new TableExistsException(tableName); + ZKTable zkTable = this.assignmentManager.getZKTable(); + if (!this.isRecovery) { + try { + if (!zkTable.checkAndSetCreatingTableStates(tableName)) { + throw new TableExistsException(tableName); + } + } catch (KeeperException e) { + throw new IOException("Unable to ensure that the table will be" + + " enabling because of a ZooKeeper issue", e); } - } catch (KeeperException e) { - throw new IOException("Unable to ensure that the table will be" + - " enabling because of a ZooKeeper issue", e); + } else { + try { + this.currentState = zkTable.getCurrentTableCreationState(tableName); + if (this.currentState != null) { + zkTable.setCurrentCreateTableStatus(tableName, this.currentState); + LOG.debug("The table " + + tableName + + " found in " + + this.currentState + + " is partially created and the new master is trying to complete the table creation."); + } + } catch (KeeperException e) { + throw new IOException("Unable to ensure that the table will be" + + " enabling because of a ZooKeeper issue", e); + } } success = true; } finally { @@ -130,6 +165,140 @@ return this; } + private void handlePartiallyCreatedTable(String tableName) throws IOException, KeeperException { + ZKTable zkTable = this.assignmentManager.getZKTable(); + State currentTableCreationState = zkTable.getCurrentTableCreationState(tableName); + try { + Path tempDir = null; + FileSystem fs = null; + Path rootDir = null; + Path tableDir = null; + if (currentTableCreationState == null) { + LOG.error("The table " + tableName + " could not be recovered. Try recreating the table."); + return; + } + RegionInfoExtractor infoExtractor = null; + List regionInfos = null; + List currentHRegions = null; + switch (currentTableCreationState) { + case CREATING: + case CREATING_TD : + // Just clear the table znode and tableStatus znode. Upto the client + // to recreate the table + // I think handling this case would be difficult without knowing the + // split keys + LOG.error("The table " + tableName + + " could not be recovered. Try recreating the table."); + break; + case CREATING_REGIONINFO: + // Try forming the region infos from the tmp directory. Try creating + // the table once again + tempDir = this.fileSystemManager.getTempDir(); + rootDir = this.fileSystemManager.getRootDir(); + fs = this.fileSystemManager.getFileSystem(); + + Path tempTableDir = new Path(tempDir, tableName); + FileStatus[] listStatus = fs.listStatus(tempTableDir); + if (listStatus != null) { + infoExtractor = new RegionInfoExtractor(tempTableDir, fs, tableName); + // Need to extract the table info here. The tableInfo has to be + // moved to the regiondir + // before we create the regions + regionInfos = infoExtractor.collectRegionInfos(true); + + } + if (regionInfos.size() == 0) { + LOG.error("The table " + tableName + + " could not be recovered. Try recreating the table."); + break; + } + LOG.debug("Actual no of regioninfos found in tabledir is " + regionInfos.size()); + // Remove those which are already assigned + // Set the status in the zookeeper as MOVED_TO_ORIG_LOCATION + setMoveToOrigLocation(tableName, zkTable); + tableDir = HTableDescriptor.getTableDir(rootDir, Bytes.toBytes(tableName)); + // Move Table temp directory to the hbase root location + if (!fs.rename(tempTableDir, tableDir)) { + throw new IOException("Unable to move table from temp=" + tempTableDir + + " to hbase root=" + tableDir); + } + if (this.newRegions != null) { + currentHRegions = Arrays.asList(this.newRegions); + regionInfos.retainAll(currentHRegions); + } + LOG.debug("Regions to be processed is " + regionInfos.size()); + addToMetaAndBulkAssign(tableName, zkTable, regionInfos); + + setEnabledState(tableName); + + break; + case MOVING_TO_ORIG_LOCATION: + rootDir = this.fileSystemManager.getRootDir(); + fs = this.fileSystemManager.getFileSystem(); + try { + // Try forming the region infos from the table directory. Try + // creating the table once again + tableDir = HTableDescriptor.getTableDir(rootDir, Bytes.toBytes(tableName)); + infoExtractor = new RegionInfoExtractor(tableDir, fs, tableName); + regionInfos = infoExtractor.collectRegionInfos(false); + LOG.debug("Actual no of regioninfos found in tabledir is " + regionInfos.size()); + // Remove those which are already assigned + if (this.newRegions != null) { + currentHRegions = Arrays.asList(this.newRegions); + regionInfos.retainAll(currentHRegions); + } + LOG.debug("Regions to be processed is " + regionInfos.size()); + addToMetaAndBulkAssign(tableName, zkTable, regionInfos); + + setEnabledState(tableName); + + } catch (IOException ioe) { + LOG.error("Cannot obtain the tabledescriptor from the tempdir for the table " + + tableName); + } + break; + case ADDING_TO_META: + // Do the assignment of all the regions added to META + try { + regionInfos = MetaReader.getTableRegions(this.catalogTracker, Bytes.toBytes(tableName)); + LOG.debug("Actual no of regioninfos found in tabledir is " + regionInfos.size()); + // Remove those which are already assigned + if (this.newRegions != null) { + currentHRegions = Arrays.asList(this.newRegions); + regionInfos.retainAll(currentHRegions); + } + LOG.debug("Regions to be processed is " + regionInfos.size()); + try { + assignmentManager.getRegionStates().createRegionStates(regionInfos); + assignmentManager.assign(regionInfos); + } catch (InterruptedException e) { + LOG.error("Caught " + e + " during round-robin assignment"); + InterruptedIOException ie = new InterruptedIOException(e.getMessage()); + ie.initCause(e); + throw ie; + } + } catch (IOException e) { + LOG.error("Error while collecting the regions for the table " + tableName + + " from META.", e); + } + // zkTable.setStatusAssignUserRegions(tableName); + setEnabledState(tableName); + break; + default: + throw new IllegalArgumentException("Invalid state from table status node for the table " + + tableName); + } + } catch (KeeperException e) { + LOG.error("Unable to get data from the znode ", e); + } + } + + private void cleanUpFailedCreation() { + releaseTableLock(); + this.assignmentManager.getZKTable().removeCreateTableStates( + this.tableName); + } + @Override public String toString() { String name = "UnknownServerName"; @@ -137,27 +306,41 @@ name = server.getServerName().toString(); } return getClass().getSimpleName() + "-" + name + "-" + getSeqid() + "-" + - this.hTableDescriptor.getNameAsString(); + this.tableName; } @Override public void process() { - String tableName = this.hTableDescriptor.getNameAsString(); - LOG.info("Attempting to create the table " + tableName); + if (!this.isRecovery) { + String tableName = this.tableName; + LOG.info("Attempting to create the table " + tableName); - try { - MasterCoprocessorHost cpHost = ((HMaster) this.server).getCoprocessorHost(); - if (cpHost != null) { - cpHost.preCreateTableHandler(this.hTableDescriptor, this.newRegions); + try { + MasterCoprocessorHost cpHost = ((HMaster) this.server).getCoprocessorHost(); + if (cpHost != null) { + cpHost.preCreateTableHandler(this.hTableDescriptor, this.newRegions); + } + handleCreateTable(tableName); + completed(null); + if (cpHost != null) { + cpHost.postCreateTableHandler(this.hTableDescriptor, this.newRegions); + } + } catch (Throwable e) { + LOG.error("Error trying to create the table " + tableName, e); + completed(e); } - handleCreateTable(tableName); - completed(null); - if (cpHost != null) { - cpHost.postCreateTableHandler(this.hTableDescriptor, this.newRegions); + } else { + // This is called when the backup master comes alive and sees a table + // that was partially created + Throwable t = null; + try { + LOG.info("Found table "+tableName +" in partially created state"); + handlePartiallyCreatedTable(tableName); + } catch (Throwable e) { + t = e; + } finally { + completed(t); } - } catch (Throwable e) { - LOG.error("Error trying to create the table " + tableName, e); - completed(e); } } @@ -166,14 +349,16 @@ * @param exception null if process() is successful or not null if something has failed. */ protected void completed(final Throwable exception) { + String tableName = this.tableName; releaseTableLock(); - if (exception != null) { + // Create the CREATINGTABLE znode under /hbase/table if it is recovery + if (exception != null || isRecovery) { // Try deleting the enabling node in case of error // If this does not happen then if the client tries to create the table // again with the same Active master // It will block the creation saying TableAlreadyExists. - this.assignmentManager.getZKTable().removeEnablingTable( - this.hTableDescriptor.getNameAsString()); + this.assignmentManager.getZKTable().removeCreateTableStates( + tableName); } } @@ -194,26 +379,78 @@ private void handleCreateTable(String tableName) throws IOException, KeeperException { Path tempdir = fileSystemManager.getTempDir(); FileSystem fs = fileSystemManager.getFileSystem(); + // 1. Set the status in the zookeeper as CREATED_TD + ZKTable zkTable = this.assignmentManager.getZKTable(); + setCreatedTD(tableName, zkTable); - // 1. Create Table Descriptor + // 2. Create Table Descriptor FSTableDescriptors.createTableDescriptor(fs, tempdir, this.hTableDescriptor); + + Path tempTableDir = new Path(tempdir, tableName); Path tableDir = new Path(fileSystemManager.getRootDir(), tableName); + + // 3. Set the status in the zookeeper as CREATED_REGIONINFO + setCreatedRegionInfo(tableName, zkTable); - // 2. Create Regions + // 4. Create Regions List regionInfos = handleCreateHdfsRegions(tempdir, tableName); - // 3. Move Table temp directory to the hbase root location + // 5. Set the status in the zookeeper as MOVED_TO_ORIG_LOCATION + setMoveToOrigLocation(tableName, zkTable); + + // 6. Move Table temp directory to the hbase root location if (!fs.rename(tempTableDir, tableDir)) { throw new IOException("Unable to move table from temp=" + tempTableDir + " to hbase root=" + tableDir); } + + // 7. Add to meta and assign the regions + addToMetaAndBulkAssign(tableName, zkTable, regionInfos); + + // 9. Set table enabled flag up in zk. + setEnabledState(tableName); + } + void setMoveToOrigLocation(String tableName, ZKTable zkTable) throws NoNodeException, + KeeperException, IOException { + zkTable.setStatusMovingToOriginalLocation(tableName); + } + + void setCreatedRegionInfo(String tableName, ZKTable zkTable) throws NoNodeException, + KeeperException, IOException { + zkTable.setStatusCreatingRegionInfo(tableName); + State currentTableCreationState = zkTable.getCurrentTableCreationState(tableName); + System.out.println("Should get the correct state " + currentTableCreationState); + } + + void setCreatedTD(String tableName, ZKTable zkTable) throws NoNodeException, KeeperException, + IOException { + zkTable.setStatusCreatingTableDescriptor(tableName); + State currentTableCreationState = zkTable.getCurrentTableCreationState(tableName); + } + + private void setEnabledState(String tableName) throws IOException { + try { + assignmentManager.getZKTable().setEnabledTable(tableName); + } catch (KeeperException e) { + throw new IOException("Unable to ensure that " + tableName + " will be" + + " enabled because of a ZooKeeper issue", e); + } + } + + void addToMetaAndBulkAssign(String tableName, ZKTable zkTable, + List regionInfos) throws IOException, NoNodeException, KeeperException, + InterruptedIOException { + // 8. Set the status in the zookeeper as ADD_TO_META + setAddedToMeta(tableName, zkTable); + // Note that the regionInfo cannot be 0. The HMaster creation of HRegionInfo[] takes care of it. if (regionInfos != null && regionInfos.size() > 0) { - // 4. Add regions to META + // Add regions to META MetaEditor.addRegionsToMeta(this.catalogTracker, regionInfos); + - // 5. Trigger immediate assignment of the regions in round-robin fashion + // Trigger immediate assignment of the regions in round-robin fashion try { assignmentManager.getRegionStates().createRegionStates(regionInfos); assignmentManager.assign(regionInfos); @@ -224,14 +461,11 @@ throw ie; } } + } - // 6. Set table enabled flag up in zk. - try { - assignmentManager.getZKTable().setEnabledTable(tableName); - } catch (KeeperException e) { - throw new IOException("Unable to ensure that " + tableName + " will be" + - " enabled because of a ZooKeeper issue", e); - } + void setAddedToMeta(String tableName, ZKTable zkTable) throws NoNodeException, KeeperException, + IOException { + zkTable.setStatusToAddingToMeta(tableName); } private void releaseTableLock() { @@ -254,6 +488,6 @@ final String tableName) throws IOException { return ModifyRegionUtils.createRegions(conf, tableRootDir, - hTableDescriptor, newRegions, null); + hTableDescriptor, newRegions, null, true); } } Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java (revision 1461042) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java (working copy) @@ -726,6 +726,9 @@ status.setStatus("Initializing ZK system trackers"); initializeZKBasedSystemTrackers(); + Set creatingTable = this.assignmentManager.getCreatingTable(); + // Deletes tmp directory only if there are no tables in partially created state + this.fileSystemManager.checkTempDir(creatingTable); if (!masterRecovery) { // initialize master side coprocessors before we start handling requests @@ -1537,7 +1540,7 @@ this.executorService.submit(new CreateTableHandler(this, this.fileSystemManager, hTableDescriptor, conf, - newRegions, this).prepare()); + newRegions, this, false, hTableDescriptor.getNameAsString()).prepare()); if (cpHost != null) { cpHost.postCreateTable(hTableDescriptor, newRegions); } Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java (revision 1461042) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java (working copy) @@ -148,7 +148,7 @@ checkRootDir(this.rootdir, conf, this.fs); // check if temp directory exists and clean it - checkTempDir(this.tempdir, conf, this.fs); + //checkTempDir(this.tempdir, conf, this.fs); Path oldLogDir = new Path(this.rootdir, HConstants.HREGION_OLDLOGDIR_NAME); @@ -452,25 +452,28 @@ /** * Make sure the hbase temp directory exists and is empty. * NOTE that this method is only executed once just after the master becomes the active one. + * @param creatingTable */ - private void checkTempDir(final Path tmpdir, final Configuration c, final FileSystem fs) + private void checkTempDir(final Path tmpdir, final Configuration c, final FileSystem fs, Set creatingTable) throws IOException { // If the temp directory exists, clear the content (left over, from the previous run) if (fs.exists(tmpdir)) { // Archive table in temp, maybe left over from failed deletion, // if not the cleaner will take care of them. for (Path tabledir: FSUtils.getTableDirs(fs, tmpdir)) { - for (Path regiondir: FSUtils.getRegionDirs(fs, tabledir)) { - HFileArchiver.archiveRegion(fs, this.rootdir, tabledir, regiondir); + if (!creatingTable.contains(tabledir.getName())) { + for (Path regiondir : FSUtils.getRegionDirs(fs, tabledir)) { + HFileArchiver.archiveRegion(fs, this.rootdir, tabledir, regiondir); + } } } - if (!fs.delete(tmpdir, true)) { + if (creatingTable.size() == 0 && !fs.delete(tmpdir, true)) { throw new IOException("Unable to clean the temp directory: " + tmpdir); } } // Create the temp directory - if (!fs.mkdirs(tmpdir)) { + if (creatingTable.size() == 0 && !fs.mkdirs(tmpdir)) { throw new IOException("HBase temp directory '" + tmpdir + "' creation failure."); } } @@ -648,4 +651,9 @@ this.services.getTableDescriptors().add(htd); return htd; } + + // Deletes the temp directory if there are not tables in partially created state + public void checkTempDir(Set creatingTable) throws IOException { + checkTempDir(this.tempdir, conf, this.fs, creatingTable); + } } Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java (revision 1461042) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java (working copy) @@ -253,6 +253,10 @@ newServerName = null; } + if (state == State.FAILED_CLOSE || state == State.FAILED_OPEN) { + LOG.warn("Failed to transition " + hri + " on " + serverName + ": " + state); + } + String regionName = hri.getEncodedName(); RegionState regionState = new RegionState( hri, state, System.currentTimeMillis(), newServerName); Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/CloneSnapshotHandler.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/CloneSnapshotHandler.java (revision 1461042) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/CloneSnapshotHandler.java (working copy) @@ -67,7 +67,7 @@ final SnapshotDescription snapshot, final HTableDescriptor hTableDescriptor) throws NotAllMetaRegionsOnlineException, TableExistsException, IOException { super(masterServices, masterServices.getMasterFileSystem(), hTableDescriptor, - masterServices.getConfiguration(), null, masterServices); + masterServices.getConfiguration(), null, masterServices, false, hTableDescriptor.getNameAsString()); // Snapshot information this.snapshot = snapshot; Index: hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/RestoreSnapshotHelper.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/RestoreSnapshotHelper.java (revision 1461042) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/RestoreSnapshotHelper.java (working copy) @@ -405,7 +405,7 @@ public void fillRegion(final HRegion region) throws IOException { cloneRegion(region, snapshotRegions.get(region.getRegionInfo().getEncodedName())); } - }); + }, false); return clonedRegionsInfo; } Index: hbase-server/src/main/java/org/apache/hadoop/hbase/util/ModifyRegionUtils.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/util/ModifyRegionUtils.java (revision 1461042) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/util/ModifyRegionUtils.java (working copy) @@ -38,8 +38,10 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.backup.HFileArchiver; @@ -75,7 +77,7 @@ */ public static List createRegions(final Configuration conf, final Path rootDir, final HTableDescriptor hTableDescriptor, final HRegionInfo[] newRegions) throws IOException { - return createRegions(conf, rootDir, hTableDescriptor, newRegions, null); + return createRegions(conf, rootDir, hTableDescriptor, newRegions, null, false); } /** @@ -91,7 +93,7 @@ */ public static List createRegions(final Configuration conf, final Path rootDir, final HTableDescriptor hTableDescriptor, final HRegionInfo[] newRegions, - final RegionFillTask task) throws IOException { + final RegionFillTask task, boolean isTableCreation) throws IOException { if (newRegions == null) return null; int regionNumber = newRegions.length; ThreadPoolExecutor regionOpenAndInitThreadPool = getRegionOpenAndInitThreadPool(conf, @@ -99,6 +101,15 @@ CompletionService completionService = new ExecutorCompletionService( regionOpenAndInitThreadPool); List regionInfos = new ArrayList(); + // Create a file that has the number of splitkeys that is given by the user. + // Do this only during table creation + // On failure check for this file first. If this file is found and the number of regioninfos does not match + // and it is in the tmp location we cannot recreate the table. + // But if the region infos matches then we can recreate the table even if it is in the tmp folder + if (isTableCreation) { + Path tableDir = HTableDescriptor.getTableDir(rootDir, hTableDescriptor.getName()); + writeSplitKeysCount(conf, tableDir, newRegions); + } for (final HRegionInfo newRegion : newRegions) { completionService.submit(new Callable() { public HRegionInfo call() throws IOException { @@ -137,6 +148,21 @@ return regionInfos; } + private static void writeSplitKeysCount(final Configuration conf, final Path rootDir, + final HRegionInfo[] newRegions) throws IOException { + FileSystem fs = FileSystem.get(conf); + FSDataOutputStream splitKeyFile = null; + try { + splitKeyFile = fs.create(new Path(rootDir.toString() + Path.SEPARATOR + + HConstants.SPLIT_KEYS_FILE)); + splitKeyFile.writeInt(newRegions.length); + } finally { + if (splitKeyFile != null) { + splitKeyFile.close(); + } + } + } + /* * used by createRegions() to get the thread pool executor based on the * "hbase.hregion.open.and.init.threads.max" property. Index: hbase-server/src/test/java/org/apache/hadoop/hbase/master/handler/TestCreateTableHandler.java =================================================================== --- hbase-server/src/test/java/org/apache/hadoop/hbase/master/handler/TestCreateTableHandler.java (revision 1461042) +++ hbase-server/src/test/java/org/apache/hadoop/hbase/master/handler/TestCreateTableHandler.java (working copy) @@ -18,22 +18,29 @@ */ package org.apache.hadoop.hbase.master.handler; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import java.io.IOException; +import java.io.InterruptedIOException; import java.util.List; +import junit.framework.Assert; + import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.MediumTests; import org.apache.hadoop.hbase.MiniHBaseCluster; import org.apache.hadoop.hbase.Server; +import org.apache.hadoop.hbase.catalog.MetaReader; import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.master.MasterFileSystem; import org.apache.hadoop.hbase.master.MasterServices; @@ -41,7 +48,11 @@ import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.zookeeper.ZKTable; import org.apache.hadoop.hbase.zookeeper.ZKUtil; +import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.KeeperException.NoNodeException; +import org.junit.After; import org.junit.AfterClass; +import org.junit.Before; import org.junit.BeforeClass; import org.junit.Test; import org.junit.experimental.categories.Category; @@ -54,21 +65,36 @@ private static final byte[] FAMILYNAME = Bytes.toBytes("fam"); public static boolean throwException = false; - @BeforeClass - public static void setUp() throws Exception { - TEST_UTIL.startMiniCluster(1); + public static boolean creating_td = false; + + public static boolean created_regioninfo = false; + public static boolean moved_to_orig = false; + public static boolean added_to_meta = false; + public static boolean put_to_meta = false; + + @Before + public void setUp() throws Exception { + TEST_UTIL.startMiniCluster(2, 1); + creating_td = false; + throwException = false; + created_regioninfo = false; + moved_to_orig = false; + added_to_meta = false; + put_to_meta = false; + } - @AfterClass - public static void tearDown() throws Exception { + @After + public void tearDown() throws Exception { TEST_UTIL.shutdownMiniCluster(); } @Test public void testCreateTableHandlerIfCalledTwoTimesAndFirstOneIsUnderProgress() throws Exception { + byte[] tableName = Bytes.toBytes("testCreateTableHandlerIfCalledTwoTimesAndFirstOneIsUnderProgress"); final MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); final HMaster m = cluster.getMaster(); - final HTableDescriptor desc = new HTableDescriptor(TABLENAME); + final HTableDescriptor desc = new HTableDescriptor(tableName); desc.addFamily(new HColumnDescriptor(FAMILYNAME)); final HRegionInfo[] hRegionInfos = new HRegionInfo[] { new HRegionInfo(desc.getName(), null, null) }; @@ -83,19 +109,231 @@ handler1.prepare(); handler1.process(); for (int i = 0; i < 100; i++) { - if (!TEST_UTIL.getHBaseAdmin().isTableAvailable(TABLENAME)) { + if (!TEST_UTIL.getHBaseAdmin().isTableAvailable(tableName)) { Thread.sleep(200); } } - assertTrue(TEST_UTIL.getHBaseAdmin().isTableEnabled(TABLENAME)); + assertTrue(TEST_UTIL.getHBaseAdmin().isTableEnabled(tableName)); } + @Test + public void testMasterRestartOnCreateTableAfterCreatingTD() throws Exception { + byte[] tableName = Bytes.toBytes("testMasterRestartOnCreateTableAfterCreatingTD"); + final MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + final HMaster m = cluster.getMaster(); + final HTableDescriptor desc = new HTableDescriptor(tableName); + desc.addFamily(new HColumnDescriptor(FAMILYNAME)); + final HRegionInfo[] hRegionInfos = new HRegionInfo[] { new HRegionInfo(desc.getName(), null, + null) }; + CustomCreateTableHandler handler = new CustomCreateTableHandler(m, m.getMasterFileSystem(), + desc, cluster.getConfiguration(), hRegionInfos, m); + creating_td = true; + handler = (CustomCreateTableHandler) handler.prepare(); + handler.process(); + + m.stop("Abort for master"); + cluster.startMaster(); + LOG.info("Waiting for master to become active."); + cluster.waitForActiveAndReadyMaster(); + + for (int i = 0; i < 100; i++) { + if (!TEST_UTIL.getHBaseAdmin().isTableAvailable(tableName)) { + Thread.sleep(20); + } + } + assertFalse(TEST_UTIL.getHBaseAdmin().isTableAvailable(tableName)); + } + + @Test + public void testMasterRestartOnCreateTableAfterCreatingRegionInfo() throws Exception { + byte[] tableName = Bytes.toBytes("testMasterRestartOnCreateTableAfterCreatingRegionInfo"); + final MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + final HMaster m = cluster.getMaster(); + final HTableDescriptor desc = new HTableDescriptor(tableName); + desc.addFamily(new HColumnDescriptor(FAMILYNAME)); + final HRegionInfo[] hRegionInfos = new HRegionInfo[] { new HRegionInfo(desc.getName(), null, + null) }; + CustomCreateTableHandler handler = new CustomCreateTableHandler(m, m.getMasterFileSystem(), + desc, cluster.getConfiguration(), hRegionInfos, m); + created_regioninfo = true; + handler = (CustomCreateTableHandler) handler.prepare(); + handler.process(); + + m.stop("Abort for master"); + cluster.startMaster(); + LOG.info("Waiting for master to become active."); + cluster.waitForActiveAndReadyMaster(); + + for (int i = 0; i < 100; i++) { + if (!TEST_UTIL.getHBaseAdmin().isTableAvailable(tableName)) { + Thread.sleep(20); + } + } + assertFalse(TEST_UTIL.getHBaseAdmin().isTableAvailable(tableName)); + } + + @Test + public void testMasterRestartOnCreateTableAfterCreatingAddedToMeta() throws Exception { + byte[] tableName = Bytes.toBytes("testMasterRestartOnCreateTableAfterCreatingAddedToMeta"); + final MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + final HMaster m = cluster.getMaster(); + final HTableDescriptor desc = new HTableDescriptor(tableName); + desc.addFamily(new HColumnDescriptor(FAMILYNAME)); + final HRegionInfo[] hRegionInfos = new HRegionInfo[] { new HRegionInfo(desc.getName(), null, + null) }; + CustomCreateTableHandler handler = new CustomCreateTableHandler(m, m.getMasterFileSystem(), + desc, cluster.getConfiguration(), hRegionInfos, m); + added_to_meta = true; + handler = (CustomCreateTableHandler) handler.prepare(); + handler.process(); + + m.stop("Abort for master"); + cluster.startMaster(); + LOG.info("Waiting for master to become active."); + cluster.waitForActiveAndReadyMaster(); + + for (int i = 0; i < 100; i++) { + if (!TEST_UTIL.getHBaseAdmin().isTableAvailable(tableName)) { + Thread.sleep(20); + } + } + assertFalse(TEST_UTIL.getHBaseAdmin().isTableAvailable(tableName)); + } + + @Test + public void testMasterRestartOnCreateTableAfterCreatingAddedToMetaWithMoreSplitKeys() + throws Exception { + byte[] tableName = Bytes.toBytes("testMasterRestartOnCreateTableAfterCreatingAddedToMetaWithMoreSplitKeys"); + final MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + final HMaster m = cluster.getMaster(); + final HTableDescriptor desc = new HTableDescriptor(tableName); + desc.addFamily(new HColumnDescriptor(FAMILYNAME)); + final HRegionInfo[] hRegionInfos = new HRegionInfo[] { + new HRegionInfo(desc.getName(), HConstants.EMPTY_BYTE_ARRAY, Bytes.toBytes("a")), + new HRegionInfo(desc.getName(), Bytes.toBytes("a"), Bytes.toBytes("b")), + new HRegionInfo(desc.getName(), Bytes.toBytes("b"), Bytes.toBytes("c")), + new HRegionInfo(desc.getName(), Bytes.toBytes("c"), HConstants.EMPTY_BYTE_ARRAY) }; + CustomCreateTableHandler handler = new CustomCreateTableHandler(m, m.getMasterFileSystem(), + desc, cluster.getConfiguration(), hRegionInfos, m); + added_to_meta = true; + handler = (CustomCreateTableHandler) handler.prepare(); + handler.process(); + + m.stop("Abort for master"); + cluster.startMaster(); + LOG.info("Waiting for master to become active."); + cluster.waitForActiveAndReadyMaster(); + + for (int i = 0; i < 100; i++) { + if (!TEST_UTIL.getHBaseAdmin().isTableAvailable(tableName)) { + Thread.sleep(20); + } + } + int regionCount = MetaReader.getRegionCount(cluster.getConf(), Bytes.toString(tableName)); + assertEquals(4, regionCount); + assertTrue(TEST_UTIL.getHBaseAdmin().isTableEnabled(tableName)); + } + + @Test + public void testMasterRestartOnCreateTableWhenAddingToMetaFails() throws Exception { + byte[] tableName = Bytes.toBytes("testMasterRestartOnCreateTableWhenAddingToMetaFails"); + final MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + final HMaster m = cluster.getMaster(); + final HTableDescriptor desc = new HTableDescriptor(tableName); + desc.addFamily(new HColumnDescriptor(FAMILYNAME)); + final HRegionInfo[] hRegionInfos = new HRegionInfo[] { + new HRegionInfo(desc.getName(), HConstants.EMPTY_BYTE_ARRAY, Bytes.toBytes("a")), + new HRegionInfo(desc.getName(), Bytes.toBytes("a"), Bytes.toBytes("b")), + new HRegionInfo(desc.getName(), Bytes.toBytes("b"), Bytes.toBytes("c")), + new HRegionInfo(desc.getName(), Bytes.toBytes("c"), HConstants.EMPTY_BYTE_ARRAY) }; + CustomCreateTableHandler handler = new CustomCreateTableHandler(m, m.getMasterFileSystem(), + desc, cluster.getConfiguration(), hRegionInfos, m); + put_to_meta = true; + handler = (CustomCreateTableHandler) handler.prepare(); + handler.process(); + + m.stop("Abort for master"); + cluster.startMaster(); + LOG.info("Waiting for master to become active."); + cluster.waitForActiveAndReadyMaster(); + + for (int i = 0; i < 100; i++) { + if (!TEST_UTIL.getHBaseAdmin().isTableAvailable(tableName)) { + Thread.sleep(20); + } + } + int regionCount = MetaReader.getRegionCount(cluster.getConf(), Bytes.toString(tableName)); + assertEquals(4, regionCount); + assertTrue(TEST_UTIL.getHBaseAdmin().isTableEnabled(tableName)); + } + + @Test + public void testMasterRestartAfterMovedToOrigLocation() throws Exception { + byte[] tableName = Bytes.toBytes("testMasterRestartAfterMovedToOrigLocation"); + final MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + final HMaster m = cluster.getMaster(); + final HTableDescriptor desc = new HTableDescriptor(tableName); + desc.addFamily(new HColumnDescriptor(FAMILYNAME)); + final HRegionInfo[] hRegionInfos = new HRegionInfo[] { new HRegionInfo(desc.getName(), null, + null) }; + CustomCreateTableHandler handler = new CustomCreateTableHandler(m, m.getMasterFileSystem(), + desc, cluster.getConfiguration(), hRegionInfos, m); + moved_to_orig = true; + handler = (CustomCreateTableHandler) handler.prepare(); + handler.process(); + + m.stop("Abort for master"); + cluster.startMaster(); + LOG.info("Waiting for master to become active."); + cluster.waitForActiveAndReadyMaster(); + + for (int i = 0; i < 100; i++) { + if (!TEST_UTIL.getHBaseAdmin().isTableAvailable(tableName)) { + Thread.sleep(20); + } + } + assertTrue(TEST_UTIL.getHBaseAdmin().isTableAvailable(tableName)); + } + + @Test + public void testMasterRestartAfterMovedToOrigLocationWithMoreSplitKeys() throws Exception { + byte[] tableName = Bytes.toBytes("testMasterRestartAfterMovedToOrigLocationWithMoreSplitKeys"); + final MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + final HMaster m = cluster.getMaster(); + final HTableDescriptor desc = new HTableDescriptor(tableName); + desc.addFamily(new HColumnDescriptor(FAMILYNAME)); + final HRegionInfo[] hRegionInfos = new HRegionInfo[] { + new HRegionInfo(desc.getName(), HConstants.EMPTY_BYTE_ARRAY, Bytes.toBytes("a")), + new HRegionInfo(desc.getName(), Bytes.toBytes("a"), Bytes.toBytes("b")), + new HRegionInfo(desc.getName(), Bytes.toBytes("b"), Bytes.toBytes("c")), + new HRegionInfo(desc.getName(), Bytes.toBytes("c"), HConstants.EMPTY_BYTE_ARRAY) }; + CustomCreateTableHandler handler = new CustomCreateTableHandler(m, m.getMasterFileSystem(), + desc, cluster.getConfiguration(), hRegionInfos, m); + moved_to_orig = true; + handler = (CustomCreateTableHandler) handler.prepare(); + handler.process(); + + m.stop("Abort for master"); + cluster.startMaster(); + LOG.info("Waiting for master to become active."); + cluster.waitForActiveAndReadyMaster(); + + for (int i = 0; i < 100; i++) { + if (!TEST_UTIL.getHBaseAdmin().isTableAvailable(tableName)) { + Thread.sleep(20); + } + } + int regionCount = MetaReader.getRegionCount(cluster.getConf(), Bytes.toString(tableName)); + assertEquals(4, regionCount); + } + private static class CustomCreateTableHandler extends CreateTableHandler { public CustomCreateTableHandler(Server server, MasterFileSystem fileSystemManager, HTableDescriptor hTableDescriptor, Configuration conf, HRegionInfo[] newRegions, MasterServices masterServices) { - super(server, fileSystemManager, hTableDescriptor, conf, newRegions, masterServices); + super(server, fileSystemManager, hTableDescriptor, conf, newRegions, masterServices, false, + hTableDescriptor.getNameAsString()); } @Override @@ -106,5 +344,55 @@ } return super.handleCreateHdfsRegions(tableRootDir, tableName); } + + @Override + void setCreatedTD(String tableName, ZKTable zkTable) throws NoNodeException, KeeperException, + IOException { + if (creating_td) { + throw new IOException("Failure on Set creating table descriptor"); + } else { + super.setCreatedTD(tableName, zkTable); + } + } + + @Override + void setMoveToOrigLocation(String tableName, ZKTable zkTable) throws NoNodeException, + KeeperException, IOException { + if (moved_to_orig) { + throw new IOException("Failure on Set Moving to original location"); + } else { + super.setMoveToOrigLocation(tableName, zkTable); + } + } + + @Override + void addToMetaAndBulkAssign(String tableName, ZKTable zkTable, List regionInfos) + throws IOException, NoNodeException, KeeperException, InterruptedIOException { + if (put_to_meta) { + throw new IOException("Failure on doing put to META."); + } else { + super.addToMetaAndBulkAssign(tableName, zkTable, regionInfos); + } + } + + @Override + void setCreatedRegionInfo(String tableName, ZKTable zkTable) throws NoNodeException, + KeeperException, IOException { + if (created_regioninfo) { + throw new IOException("Failure on Set Creating region info"); + } else { + super.setCreatedRegionInfo(tableName, zkTable); + } + } + + @Override + void setAddedToMeta(String tableName, ZKTable zkTable) throws NoNodeException, KeeperException, + IOException { + if (added_to_meta) { + throw new IOException("Failure on adding to meta"); + } else { + super.setAddedToMeta(tableName, zkTable); + } + } } }