Index: hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKTable.java =================================================================== --- hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKTable.java (revision 1463118) +++ hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKTable.java (working copy) @@ -23,7 +23,10 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos; +import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.Table; +import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.Table.State; import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.KeeperException.NoNodeException; import java.util.HashMap; import java.util.HashSet; @@ -58,14 +61,18 @@ */ private final Map cache = new HashMap(); - + + private final static Table.State createTableStates[] = { Table.State.CREATING, Table.State.CREATING_TD, + Table.State.CREATING_REGIONINFO, Table.State.MOVING_TO_ORIG_LOCATION, + Table.State.ADDING_TO_META }; + // TODO: Make it so always a table znode. Put table schema here as well as table state. // Have watcher on table znode so all are notified of state or schema change. public ZKTable(final ZooKeeperWatcher zkw) throws KeeperException { super(); this.watcher = zkw; - populateTableStates(); + populateTableStates(); } /** @@ -79,6 +86,12 @@ if (children == null) return; for (String child: children) { ZooKeeperProtos.Table.State state = ZKTableReadOnly.getTableState(this.watcher, child); + if(state == ZooKeeperProtos.Table.State.CREATING){ + List listChildrenNoWatch = ZKUtil.listChildrenNoWatch(this.watcher, this.watcher.tableZNode+"/"+child); + for (String string : listChildrenNoWatch) { + System.out.println("Status node should be prenet "+string); + } + } if (state != null) this.cache.put(child, state); } } @@ -136,6 +149,49 @@ } /** + * If the table is found in CREATING_TABLE state the inmemory state is + * removed. This helps in cases where CreateTable is to be retried by the + * client incase of failures + * + * @param tableName + */ + public void removeTableStateForTableInCreation(final String tableName) { + synchronized (this.cache) { + if (isTableCreationState(tableName)) { + this.cache.remove(tableName); + } + + } + } + + public static Set getTablesInCreation(ZooKeeperWatcher zkw) throws KeeperException { + return getAllTables(zkw, createTableStates); + } + + + public void setStatusCreatingTableDescriptor(final String tableName) throws KeeperException { + setTableState(tableName, Table.State.CREATING_TD); + } + + public void setCurrentCreateTableStatus(final String tableName, final Table.State state) + throws KeeperException { + setTableState(tableName, state); + } + + public void setStatusCreatingRegionInfo(final String tableName) throws NoNodeException, + KeeperException { + setTableState(tableName, Table.State.CREATING_REGIONINFO); + } + + public void setStatusMovingToOriginalLocation(final String tableName) throws KeeperException { + setTableState(tableName, Table.State.MOVING_TO_ORIG_LOCATION); + } + + public void setStatusToAddingToMeta(final String tableName) throws KeeperException { + setTableState(tableName, Table.State.ADDING_TO_META); + } + + /** * Sets the specified table as ENABLING in zookeeper atomically * If the table is already in ENABLING state, no operation is performed * @param tableName @@ -152,6 +208,27 @@ return true; } } + + /** + * Sets the specified table in CREATING Table states in zookeeper atomically + * If the table is already in CREATING tables state, no operation is performed + * @param tableName + * @return if the operation succeeds or not + * @throws KeeperException unexpected zookeeper exception + */ + public boolean checkAndSetCreatingTableStates(final String tableName) throws KeeperException { + synchronized (this.cache) { + if (isTableCreationState(tableName)) { + return false; + } + setTableState(tableName, ZooKeeperProtos.Table.State.CREATING); + return true; + } + } + + public Table.State getCurrentTableCreationState(String tableName) throws KeeperException { + return ZKTableReadOnly.getTableState(this.watcher, tableName); + } /** * Sets the specified table as ENABLING in zookeeper atomically @@ -215,7 +292,22 @@ public boolean isEnablingTable(final String tableName) { return isTableState(tableName, ZooKeeperProtos.Table.State.ENABLING); } + + public boolean isTableCreationState (final String tableName){ + return isTableInCreation(tableName, this.createTableStates); + } + + private boolean isTableInCreation(String tableName, State[] createTableStates) { + synchronized (this.cache) { + ZooKeeperProtos.Table.State currentState = this.cache.get(tableName); + return ZKTableReadOnly.isTableState(currentState, createTableStates); + } + } + public boolean isCreatingTable(final String tableName) { + return isTableState(tableName, ZooKeeperProtos.Table.State.CREATING); + } + public boolean isEnabledTable(String tableName) { return isTableState(tableName, ZooKeeperProtos.Table.State.ENABLED); } @@ -273,6 +365,8 @@ public void setEnabledTable(final String tableName) throws KeeperException { setTableState(tableName, ZooKeeperProtos.Table.State.ENABLED); } + + /** * check if table is present . @@ -374,6 +468,7 @@ ZooKeeperProtos.Table.State state = ZKTableReadOnly.getTableState(zkw, child); for (ZooKeeperProtos.Table.State expectedState: states) { if (state == expectedState) { + LOG.debug("The table "+child+" found in "+state+" state"); allTables.add(child); break; } @@ -381,4 +476,5 @@ } return allTables; } + } Index: hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKTableReadOnly.java =================================================================== --- hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKTableReadOnly.java (revision 1463118) +++ hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKTableReadOnly.java (working copy) @@ -23,6 +23,7 @@ import org.apache.hadoop.hbase.exceptions.DeserializationException; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos; +import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.Table.State; import org.apache.zookeeper.KeeperException; import java.util.HashSet; @@ -156,4 +157,18 @@ throw ZKUtil.convert(e); } } + + public static boolean isTableState(State currentState) { + return false; + } + + public static boolean isTableState(State currentState, State[] createTableStates) { + for (State state : createTableStates) { + if (currentState != null) { + if (state.equals(currentState)) + return true; + } + } + return false; + } } Index: hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java =================================================================== --- hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java (revision 1463118) +++ hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java (working copy) @@ -105,6 +105,8 @@ public String balancerZNode; // znode containing the lock for the tables public String tableLockZNode; + // znode that has the current state of the create table + public String tableCreationStatusNode = "status"; // Certain ZooKeeper nodes need to be world-readable public static final ArrayList CREATOR_ALL_AND_WORLD_READABLE = @@ -220,6 +222,9 @@ conf.get("zookeeper.znode.balancer", "balancer")); tableLockZNode = ZKUtil.joinZNode(baseZNode, conf.get("zookeeper.znode.tableLock", "table-lock")); + + tableLockZNode = ZKUtil.joinZNode(tableZNode, + conf.get("zookeeper.znode.tatbleStatus", "table-status")); } /** Index: hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java =================================================================== --- hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java (revision 1463118) +++ hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java (working copy) @@ -324,6 +324,9 @@ /** Default value for cluster ID */ public static final String CLUSTER_ID_DEFAULT = "default-cluster"; + + /** Contains the count of Split keys specified during table creations **/ + public static final String SPLIT_KEYS_FILE = "splitKeys"; // Always store the location of the root table's HRegion. // This HRegion is never split. Index: hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ZooKeeperProtos.java =================================================================== --- hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ZooKeeperProtos.java (revision 1463118) +++ hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ZooKeeperProtos.java (working copy) @@ -2812,12 +2812,22 @@ DISABLED(1, 1), DISABLING(2, 2), ENABLING(3, 3), + CREATING(4, 4), + CREATING_TD(5, 5), + CREATING_REGIONINFO(6, 6), + MOVING_TO_ORIG_LOCATION(7, 7), + ADDING_TO_META(8, 8), ; public static final int ENABLED_VALUE = 0; public static final int DISABLED_VALUE = 1; public static final int DISABLING_VALUE = 2; public static final int ENABLING_VALUE = 3; + public static final int CREATING_VALUE = 4; + public static final int CREATING_TD_VALUE = 5; + public static final int CREATING_REGIONINFO_VALUE = 6; + public static final int MOVING_TO_ORIG_LOCATION_VALUE = 7; + public static final int ADDING_TO_META_VALUE = 8; public final int getNumber() { return value; } @@ -2828,6 +2838,11 @@ case 1: return DISABLED; case 2: return DISABLING; case 3: return ENABLING; + case 4: return CREATING; + case 5: return CREATING_TD; + case 6: return CREATING_REGIONINFO; + case 7: return MOVING_TO_ORIG_LOCATION; + case 8: return ADDING_TO_META; default: return null; } } @@ -2858,7 +2873,7 @@ } private static final State[] VALUES = { - ENABLED, DISABLED, DISABLING, ENABLING, + ENABLED, DISABLED, DISABLING, ENABLING, CREATING, CREATING_TD, CREATING_REGIONINFO, MOVING_TO_ORIG_LOCATION, ADDING_TO_META, }; public static State valueOf( @@ -5750,19 +5765,22 @@ "plitLogTask.State\022\037\n\nserverName\030\002 \002(\0132\013." + "ServerName\"C\n\005State\022\016\n\nUNASSIGNED\020\000\022\t\n\005O", "WNED\020\001\022\014\n\010RESIGNED\020\002\022\010\n\004DONE\020\003\022\007\n\003ERR\020\004\"" + - "n\n\005Table\022$\n\005state\030\001 \002(\0162\014.Table.State:\007E" + - "NABLED\"?\n\005State\022\013\n\007ENABLED\020\000\022\014\n\010DISABLED" + - "\020\001\022\r\n\tDISABLING\020\002\022\014\n\010ENABLING\020\003\"%\n\017Repli" + - "cationPeer\022\022\n\nclusterkey\030\001 \002(\t\"^\n\020Replic" + - "ationState\022&\n\005state\030\001 \002(\0162\027.ReplicationS" + - "tate.State\"\"\n\005State\022\013\n\007ENABLED\020\000\022\014\n\010DISA" + - "BLED\020\001\"+\n\027ReplicationHLogPosition\022\020\n\010pos" + - "ition\030\001 \002(\003\"$\n\017ReplicationLock\022\021\n\tlockOw" + - "ner\030\001 \002(\t\"s\n\tTableLock\022\021\n\ttableName\030\001 \001(", - "\014\022\036\n\tlockOwner\030\002 \001(\0132\013.ServerName\022\020\n\010thr" + - "eadId\030\003 \001(\003\022\020\n\010isShared\030\004 \001(\010\022\017\n\007purpose" + - "\030\005 \001(\tBE\n*org.apache.hadoop.hbase.protob" + - "uf.generatedB\017ZooKeeperProtosH\001\210\001\001\240\001\001" + "\330\001\n\005Table\022$\n\005state\030\001 \002(\0162\014.Table.State:\007" + + "ENABLED\"\250\001\n\005State\022\013\n\007ENABLED\020\000\022\014\n\010DISABL" + + "ED\020\001\022\r\n\tDISABLING\020\002\022\014\n\010ENABLING\020\003\022\014\n\010CRE" + + "ATING\020\004\022\017\n\013CREATING_TD\020\005\022\027\n\023CREATING_REG" + + "IONINFO\020\006\022\033\n\027MOVING_TO_ORIG_LOCATION\020\007\022\022" + + "\n\016ADDING_TO_META\020\010\"%\n\017ReplicationPeer\022\022\n" + + "\nclusterkey\030\001 \002(\t\"^\n\020ReplicationState\022&\n" + + "\005state\030\001 \002(\0162\027.ReplicationState.State\"\"\n" + + "\005State\022\013\n\007ENABLED\020\000\022\014\n\010DISABLED\020\001\"+\n\027Rep", + "licationHLogPosition\022\020\n\010position\030\001 \002(\003\"$" + + "\n\017ReplicationLock\022\021\n\tlockOwner\030\001 \002(\t\"s\n\t" + + "TableLock\022\021\n\ttableName\030\001 \001(\014\022\036\n\tlockOwne" + + "r\030\002 \001(\0132\013.ServerName\022\020\n\010threadId\030\003 \001(\003\022\020" + + "\n\010isShared\030\004 \001(\010\022\017\n\007purpose\030\005 \001(\tBE\n*org" + + ".apache.hadoop.hbase.protobuf.generatedB" + + "\017ZooKeeperProtosH\001\210\001\001\240\001\001" }; com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner = new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() { Index: hbase-protocol/src/main/protobuf/ZooKeeper.proto =================================================================== --- hbase-protocol/src/main/protobuf/ZooKeeper.proto (revision 1463118) +++ hbase-protocol/src/main/protobuf/ZooKeeper.proto (working copy) @@ -93,6 +93,11 @@ DISABLED = 1; DISABLING = 2; ENABLING = 3; + CREATING = 4; + CREATING_TD = 5; + CREATING_REGIONINFO = 6; + MOVING_TO_ORIG_LOCATION = 7; + ADDING_TO_META = 8; } // This is the table's state. If no znode for a table, // its state is presumed enabled. See o.a.h.h.zookeeper.ZKTable class Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (revision 1463118) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (working copy) @@ -27,6 +27,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.NavigableMap; import java.util.Set; import java.util.TreeMap; @@ -54,15 +55,18 @@ import org.apache.hadoop.hbase.catalog.MetaReader; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.exceptions.DeserializationException; +import org.apache.hadoop.hbase.exceptions.NotAllMetaRegionsOnlineException; import org.apache.hadoop.hbase.exceptions.NotServingRegionException; import org.apache.hadoop.hbase.exceptions.RegionAlreadyInTransitionException; import org.apache.hadoop.hbase.exceptions.RegionServerStoppedException; import org.apache.hadoop.hbase.exceptions.ServerNotRunningYetException; +import org.apache.hadoop.hbase.exceptions.TableExistsException; import org.apache.hadoop.hbase.exceptions.TableNotFoundException; import org.apache.hadoop.hbase.executor.EventHandler; import org.apache.hadoop.hbase.executor.EventType; import org.apache.hadoop.hbase.executor.ExecutorService; import org.apache.hadoop.hbase.master.handler.ClosedRegionHandler; +import org.apache.hadoop.hbase.master.handler.CreateTableHandler; import org.apache.hadoop.hbase.master.handler.DisableTableHandler; import org.apache.hadoop.hbase.master.handler.EnableTableHandler; import org.apache.hadoop.hbase.master.handler.MergedRegionHandler; @@ -363,11 +367,12 @@ /** * Called on startup. * Figures whether a fresh cluster start of we are joining extant running cluster. + * @param tableUnderCreation * @throws IOException * @throws KeeperException * @throws InterruptedException */ - void joinCluster() throws IOException, + void joinCluster(Set tableUnderCreation) throws IOException, KeeperException, InterruptedException { // Concurrency note: In the below the accesses on regionsInTransition are // outside of a synchronization block where usually all accesses to RIT are @@ -379,7 +384,9 @@ // Scan META to build list of existing regions, servers, and assignment // Returns servers who have not checked in (assumed dead) and their regions - Map> deadServers = rebuildUserRegions(); + Pair>, Map>> recoveryInfo + = rebuildUserRegions(tableUnderCreation); + Map> deadServers = recoveryInfo.getFirst(); // This method will assign all user regions if a clean server startup or // it will reconstruct master state and cleanup any leftovers from @@ -388,6 +395,8 @@ recoverTableInDisablingState(); recoverTableInEnablingState(); + + recoverTableInCreation(recoveryInfo.getSecond()); } /** @@ -2454,11 +2463,13 @@ *

* Returns a map of servers that are not found to be online and the regions * they were hosting. + * @param tableUnderCreation * @return map of servers not online to their assigned regions, as stored * in META * @throws IOException */ - Map> rebuildUserRegions() throws IOException, KeeperException { + Pair>, Map>> rebuildUserRegions( + Set tableUnderCreation) throws IOException, KeeperException { Set enablingTables = ZKTable.getEnablingTables(watcher); Set disabledOrEnablingTables = ZKTable.getDisabledTables(watcher); disabledOrEnablingTables.addAll(enablingTables); @@ -2469,9 +2480,12 @@ List results = MetaReader.fullScan(this.catalogTracker); // Get any new but slow to checkin region server that joined the cluster Set onlineServers = serverManager.getOnlineServers().keySet(); + Pair>, Map>> pair = + new Pair>, Map>>(); // Map of offline servers and their regions to be returned Map> offlineServers = new TreeMap>(); + Map> hriOfTableUnderCreation = new HashMap>(); // Iterate regions in META for (Result result : results) { Pair region = HRegionInfo.getHRegionInfoAndServerName(result); @@ -2497,6 +2511,14 @@ " has null regionLocation." + " But its table " + tableName + " isn't in ENABLING state."); } + // If the table is under creation and if the regionLocation is null + // it means they are not yet assigned while table creation + + // Collect the hris for the regions of the table that is under creation + if (tableUnderCreation.contains(tableName)) { + populateCreatingTableHRIs(hriOfTableUnderCreation, regionInfo, tableName, true); + } + } else if (!onlineServers.contains(regionLocation)) { // Region is located on a server that isn't online List offlineRegions = offlineServers.get(regionLocation); @@ -2505,11 +2527,14 @@ offlineServers.put(regionLocation, offlineRegions); } offlineRegions.add(regionInfo); + populateCreatingTableHRIs(hriOfTableUnderCreation, regionInfo, tableName, false); // need to enable the table if not disabled or disabling or enabling // this will be used in rolling restarts - if (!disabledOrDisablingOrEnabling.contains(tableName) - && !getZKTable().isEnabledTable(tableName)) { - setEnabledTable(tableName); + if (!tableUnderCreation.contains(tableName)) { + if (!disabledOrDisablingOrEnabling.contains(tableName) + && !getZKTable().isEnabledTable(tableName)) { + setEnabledTable(tableName); + } } } else { // If region is in offline and split state check the ZKNode @@ -2527,20 +2552,56 @@ } // Region is being served and on an active server // add only if region not in disabled or enabling table + // Even if in table is under creation state we can make the region as + // online + // Ensure we take care while recovery if (!disabledOrEnablingTables.contains(tableName)) { regionStates.regionOnline(regionInfo, regionLocation); + populateCreatingTableHRIs(hriOfTableUnderCreation, regionInfo, tableName, false); } // need to enable the table if not disabled or disabling or enabling // this will be used in rolling restarts - if (!disabledOrDisablingOrEnabling.contains(tableName) - && !getZKTable().isEnabledTable(tableName)) { - setEnabledTable(tableName); + if (!tableUnderCreation.contains(tableName)) { + if (!disabledOrDisablingOrEnabling.contains(tableName) + && !getZKTable().isEnabledTable(tableName)) { + setEnabledTable(tableName); + } } } } - return offlineServers; + pair.setFirst(offlineServers); + if (tableUnderCreation.size() != 0 && hriOfTableUnderCreation.size() == 0) { + // There are some tables that is partially created and not added to the + // META + for (String tableName : tableUnderCreation) { + LOG.debug("The table " + tableName + " found in CREATINGTABLE"); + hriOfTableUnderCreation.put(tableName, null); + } + } + pair.setSecond(hriOfTableUnderCreation); + return pair; } + + public Set getCreatingTable() throws KeeperException { + return ZKTable.getTablesInCreation(this.watcher); + } + private void populateCreatingTableHRIs( + Map> tablesUnderCreation, HRegionInfo regionInfo, + String tableName, boolean process) { + if (tablesUnderCreation.get(tableName) == null) { + Map infoDetails = new HashMap(); + infoDetails.put(regionInfo, process); + LOG.debug("The region " + regionInfo + " is part of partially created table " + + ((process) ? (" that is not assigned to any RS") : ("that is already assigned"))); + tablesUnderCreation.put(tableName, infoDetails); + } else { + LOG.debug("The region " + regionInfo + " is part of partially created table " + + ((process) ? (" that is not assigned to any RS") : ("that is already assigned"))); + tablesUnderCreation.get(tableName).put(regionInfo, process); + } + } + /** * Recover the tables that were not fully moved to DISABLED state. These * tables are in DISABLING state when the master restarted/switched. @@ -2588,6 +2649,39 @@ } } } + + private void recoverTableInCreation(Map> hriOfCreatingTable) + throws NotAllMetaRegionsOnlineException, TableExistsException, IOException { + if (hriOfCreatingTable != null) { + Iterator>> iterator = hriOfCreatingTable.entrySet() + .iterator(); + HRegionInfo[] newRegions = null; + while (iterator.hasNext()) { + // Recover by calling CreateTableHandler + Entry> details = iterator.next(); + String tableName = details.getKey(); + Map hriDetails = details.getValue(); + if (hriDetails != null) { + Iterator> hriIterator = hriDetails + .entrySet().iterator(); + newRegions = new HRegionInfo[hriDetails.size()]; + int count = 0; + while (hriIterator.hasNext()) { + Entry hri = hriIterator.next(); + if (hri.getValue() == true) { + newRegions[count] = hri.getKey(); + count++; + } + } + } + LOG.info("The table " + tableName + + " is in partially created state. Hence trying to create the table"); + // Note that the table descriptor is passed as null + new CreateTableHandler(this.server, ((HMaster) this.server).getMasterFileSystem(), null, + server.getConfiguration(), newRegions, (MasterServices) this.server, true, tableName).prepare().process(); + } + } + } /** * Processes list of dead servers from result of META scan and regions in RIT Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/CreateTableHandler.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/CreateTableHandler.java (revision 1463118) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/CreateTableHandler.java (working copy) @@ -20,22 +20,24 @@ import java.io.IOException; import java.io.InterruptedIOException; +import java.util.Arrays; import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HTableDescriptor; -import org.apache.hadoop.hbase.exceptions.NotAllMetaRegionsOnlineException; import org.apache.hadoop.hbase.Server; -import org.apache.hadoop.hbase.exceptions.TableExistsException; import org.apache.hadoop.hbase.catalog.CatalogTracker; import org.apache.hadoop.hbase.catalog.MetaEditor; import org.apache.hadoop.hbase.catalog.MetaReader; +import org.apache.hadoop.hbase.exceptions.NotAllMetaRegionsOnlineException; +import org.apache.hadoop.hbase.exceptions.TableExistsException; import org.apache.hadoop.hbase.executor.EventHandler; import org.apache.hadoop.hbase.executor.EventType; import org.apache.hadoop.hbase.master.AssignmentManager; @@ -45,9 +47,14 @@ import org.apache.hadoop.hbase.master.MasterServices; import org.apache.hadoop.hbase.master.TableLockManager; import org.apache.hadoop.hbase.master.TableLockManager.TableLock; +import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.Table; +import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.Table.State; +import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.FSTableDescriptors; import org.apache.hadoop.hbase.util.ModifyRegionUtils; +import org.apache.hadoop.hbase.zookeeper.ZKTable; import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.KeeperException.NoNodeException; /** * Handler to create a table. @@ -63,21 +70,30 @@ private final TableLockManager tableLockManager; private final HRegionInfo [] newRegions; private final TableLock tableLock; + // this is set to true when master recovery happens + private final boolean isRecovery; + private final String tableName; + private Table.State currentState = null; public CreateTableHandler(Server server, MasterFileSystem fileSystemManager, HTableDescriptor hTableDescriptor, Configuration conf, HRegionInfo [] newRegions, - MasterServices masterServices) { + MasterServices masterServices, boolean isRecovery, String tableName) { super(server, EventType.C_M_CREATE_TABLE); this.fileSystemManager = fileSystemManager; + // This can be null when the constructor is called from the recovery flow. this.hTableDescriptor = hTableDescriptor; this.conf = conf; this.newRegions = newRegions; this.catalogTracker = masterServices.getCatalogTracker(); this.assignmentManager = masterServices.getAssignmentManager(); this.tableLockManager = masterServices.getTableLockManager(); + this.isRecovery = isRecovery; + this.tableName = (this.hTableDescriptor == null) ? tableName : this.hTableDescriptor + .getNameAsString(); - this.tableLock = this.tableLockManager.writeLock(this.hTableDescriptor.getName() + byte[] name = ((this.hTableDescriptor == null)? Bytes.toBytes(tableName):this.hTableDescriptor.getName()); + this.tableLock = this.tableLockManager.writeLock(name , EventType.C_M_CREATE_TABLE.toString()); } @@ -100,26 +116,48 @@ this.tableLock.acquire(); boolean success = false; try { - String tableName = this.hTableDescriptor.getNameAsString(); - if (MetaReader.tableExists(catalogTracker, tableName)) { - throw new TableExistsException(tableName); + String tableName = this.tableName; + if (!isRecovery) { + if (MetaReader.tableExists(catalogTracker, tableName)) { + throw new TableExistsException(tableName); + } } // If we have multiple client threads trying to create the table at the // same time, given the async nature of the operation, the table // could be in a state where .META. table hasn't been updated yet in // the process() function. - // Use enabling state to tell if there is already a request for the same - // table in progress. This will introduce a new zookeeper call. Given - // createTable isn't a frequent operation, that should be ok. //TODO: now that we have table locks, re-evaluate above - try { - if (!this.assignmentManager.getZKTable().checkAndSetEnablingTable(tableName)) { - throw new TableExistsException(tableName); + ZKTable zkTable = this.assignmentManager.getZKTable(); + // do not check if the table exists while in recovery because that is what + // we want to do here + if (!this.isRecovery) { + try { + // Move the tale znode to a new state so that we are sure that the table is under creation + if (!zkTable.checkAndSetCreatingTableStates(tableName)) { + throw new TableExistsException(tableName); + } + } catch (KeeperException e) { + throw new IOException("Unable to ensure that the table will be" + + " enabling because of a ZooKeeper issue", e); } - } catch (KeeperException e) { - throw new IOException("Unable to ensure that the table will be" + - " enabling because of a ZooKeeper issue", e); + } else { + try { + // Get the current state of the table here. + this.currentState = zkTable.getCurrentTableCreationState(tableName); + if (this.currentState != null) { + // Populate the current state in the ZKTable cache + zkTable.setCurrentCreateTableStatus(tableName, this.currentState); + LOG.debug("The table " + + tableName + + " found in " + + this.currentState + + " is partially created and the new master is trying to complete the table creation."); + } + } catch (KeeperException e) { + throw new IOException("Unable to ensure that the table will be" + + " enabling because of a ZooKeeper issue", e); + } } success = true; } finally { @@ -130,6 +168,137 @@ return this; } + private void handlePartiallyCreatedTable(String tableName) throws IOException, KeeperException { + ZKTable zkTable = this.assignmentManager.getZKTable(); + State currentTableCreationState = zkTable.getCurrentTableCreationState(tableName); + try { + Path tempDir = null; + FileSystem fs = null; + Path rootDir = null; + Path tableDir = null; + if (currentTableCreationState == null) { + LOG.fatal("The table " + tableName + " could not be recovered. Try recreating the table."); + return; + } + RegionInfoExtractor infoExtractor = null; + List regionInfos = null; + List currentHRegions = null; + switch (currentTableCreationState) { + case CREATING: + case CREATING_TD : + // Just clear the table znode and tableStatus znode. Upto the client + // to recreate the table + // Handling this case would be difficult without knowing the + // split keys + LOG.fatal("The table " + tableName + + " could not be recovered. Try recreating the table."); + break; + case CREATING_REGIONINFO: + // Try forming the region infos from the tmp directory. Try creating + // the table once again + tempDir = this.fileSystemManager.getTempDir(); + rootDir = this.fileSystemManager.getRootDir(); + fs = this.fileSystemManager.getFileSystem(); + + Path tempTableDir = new Path(tempDir, tableName); + FileStatus[] listStatus = fs.listStatus(tempTableDir); + if (listStatus != null) { + infoExtractor = new RegionInfoExtractor(tempTableDir, fs, tableName); + // Need to extract the table info here. The tableInfo has to be + // moved to the regiondir + // before we create the regions + regionInfos = infoExtractor.collectRegionInfos(true); + + } + if (regionInfos.size() == 0) { + LOG.fatal("The table "+ tableName + + " could not be recovered because no regioninfos were found. Try recreating the table."); + break; + } + LOG.debug("Actual no of regioninfos found in tabledir is " + regionInfos.size()); + // Remove those which are already assigned + // Set the status in the zookeeper as MOVED_TO_ORIG_LOCATION + setMoveToOrigLocation(tableName, zkTable); + tableDir = HTableDescriptor.getTableDir(rootDir, Bytes.toBytes(tableName)); + // Move Table temp directory to the hbase root location + if (!fs.rename(tempTableDir, tableDir)) { + throw new IOException("Unable to move table from temp=" + tempTableDir + + " to hbase root=" + tableDir); + } + if (this.newRegions != null) { + currentHRegions = Arrays.asList(this.newRegions); + regionInfos.retainAll(currentHRegions); + } + LOG.debug("Regions to be processed is " + regionInfos.size()); + addToMetaAndBulkAssign(tableName, zkTable, regionInfos); + + setEnabledState(tableName); + + break; + case MOVING_TO_ORIG_LOCATION: + rootDir = this.fileSystemManager.getRootDir(); + fs = this.fileSystemManager.getFileSystem(); + try { + // Try forming the region infos from the table directory. Try + // creating the table once again + tableDir = HTableDescriptor.getTableDir(rootDir, Bytes.toBytes(tableName)); + infoExtractor = new RegionInfoExtractor(tableDir, fs, tableName); + regionInfos = infoExtractor.collectRegionInfos(false); + LOG.debug("Actual no of regioninfos found in tabledir is " + regionInfos.size()); + // Remove those which are already assigned + if (this.newRegions != null) { + currentHRegions = Arrays.asList(this.newRegions); + regionInfos.retainAll(currentHRegions); + } + LOG.debug("Regions to be processed is " + regionInfos.size()); + addToMetaAndBulkAssign(tableName, zkTable, regionInfos); + + setEnabledState(tableName); + + } catch (IOException ioe) { + LOG.error("Cannot obtain the tabledescriptor from the tempdir for the table " + + tableName); + throw ioe; + } + break; + case ADDING_TO_META: + // Do the assignment of all the regions added to META + try { + regionInfos = MetaReader.getTableRegions(this.catalogTracker, Bytes.toBytes(tableName)); + LOG.debug("Actual no of regioninfos found in tabledir is " + regionInfos.size()); + // Remove those which are already assigned + if (this.newRegions != null) { + currentHRegions = Arrays.asList(this.newRegions); + regionInfos.retainAll(currentHRegions); + } + LOG.debug("Regions to be processed is " + regionInfos.size()); + try { + assignmentManager.getRegionStates().createRegionStates(regionInfos); + assignmentManager.assign(regionInfos); + } catch (InterruptedException e) { + LOG.error("Caught " + e + " during round-robin assignment"); + InterruptedIOException ie = new InterruptedIOException(e.getMessage()); + ie.initCause(e); + throw ie; + } + } catch (IOException e) { + LOG.error("Error while collecting the regions for the table " + tableName + + " from META.", e); + throw e; + } + // zkTable.setStatusAssignUserRegions(tableName); + setEnabledState(tableName); + break; + default: + throw new IllegalArgumentException("Invalid state from table status node for the table " + + tableName); + } + } catch (KeeperException e) { + LOG.error("Unable to get data from the znode while recreating the table "+tableName, e); + throw e; + } + } + @Override public String toString() { String name = "UnknownServerName"; @@ -137,27 +306,43 @@ name = server.getServerName().toString(); } return getClass().getSimpleName() + "-" + name + "-" + getSeqid() + "-" + - this.hTableDescriptor.getNameAsString(); + this.tableName; } @Override public void process() { - String tableName = this.hTableDescriptor.getNameAsString(); - LOG.info("Attempting to create the table " + tableName); + if (!this.isRecovery) { + String tableName = this.tableName; + LOG.info("Attempting to create the table " + tableName); - try { - MasterCoprocessorHost cpHost = ((HMaster) this.server).getCoprocessorHost(); - if (cpHost != null) { - cpHost.preCreateTableHandler(this.hTableDescriptor, this.newRegions); + try { + MasterCoprocessorHost cpHost = ((HMaster) this.server).getCoprocessorHost(); + if (cpHost != null) { + cpHost.preCreateTableHandler(this.hTableDescriptor, this.newRegions); + } + handleCreateTable(tableName); + completed(null); + if (cpHost != null) { + cpHost.postCreateTableHandler(this.hTableDescriptor, this.newRegions); + } + } catch (Throwable e) { + LOG.error("Error trying to create the table " + tableName, e); + completed(e); } - handleCreateTable(tableName); - completed(null); - if (cpHost != null) { - cpHost.postCreateTableHandler(this.hTableDescriptor, this.newRegions); + } else { + // This is called when the backup master comes alive and sees a table + // that was partially created + // TODO: should we call coprocessor hooks also here. + // Currently in restart case the htableDescriptor may be null + Throwable t = null; + try { + handlePartiallyCreatedTable(tableName); + } catch (Throwable e) { + LOG.fatal("Cannot recreate the table "+tableName); + t = e; + } finally { + completed(t); } - } catch (Throwable e) { - LOG.error("Error trying to create the table " + tableName, e); - completed(e); } } @@ -166,14 +351,15 @@ * @param exception null if process() is successful or not null if something has failed. */ protected void completed(final Throwable exception) { + String tableName = this.tableName; releaseTableLock(); - if (exception != null) { + if (exception != null || isRecovery) { // Try deleting the enabling node in case of error // If this does not happen then if the client tries to create the table // again with the same Active master // It will block the creation saying TableAlreadyExists. - this.assignmentManager.getZKTable().removeEnablingTable( - this.hTableDescriptor.getNameAsString()); + this.assignmentManager.getZKTable().removeTableStateForTableInCreation( + tableName); } } @@ -194,26 +380,77 @@ private void handleCreateTable(String tableName) throws IOException, KeeperException { Path tempdir = fileSystemManager.getTempDir(); FileSystem fs = fileSystemManager.getFileSystem(); + // 1. Set the status in the zookeeper as CREATED_TD + ZKTable zkTable = this.assignmentManager.getZKTable(); + setCreatedTD(tableName, zkTable); - // 1. Create Table Descriptor + // 2. Create Table Descriptor FSTableDescriptors.createTableDescriptor(fs, tempdir, this.hTableDescriptor); + + Path tempTableDir = new Path(tempdir, tableName); Path tableDir = new Path(fileSystemManager.getRootDir(), tableName); + + // 3. Set the status in the zookeeper as CREATED_REGIONINFO + setCreatedRegionInfo(tableName, zkTable); - // 2. Create Regions + // 4. Create Regions List regionInfos = handleCreateHdfsRegions(tempdir, tableName); - // 3. Move Table temp directory to the hbase root location + // 5. Set the status in the zookeeper as MOVED_TO_ORIG_LOCATION + setMoveToOrigLocation(tableName, zkTable); + + // 6. Move Table temp directory to the hbase root location if (!fs.rename(tempTableDir, tableDir)) { throw new IOException("Unable to move table from temp=" + tempTableDir + " to hbase root=" + tableDir); } + + // 7. Add to meta and assign the regions + addToMetaAndBulkAssign(tableName, zkTable, regionInfos); + + // 9. Set table enabled flag up in zk. + setEnabledState(tableName); + } + void setMoveToOrigLocation(String tableName, ZKTable zkTable) throws NoNodeException, + KeeperException, IOException { + zkTable.setStatusMovingToOriginalLocation(tableName); + } + + void setCreatedRegionInfo(String tableName, ZKTable zkTable) throws NoNodeException, + KeeperException, IOException { + zkTable.setStatusCreatingRegionInfo(tableName); + State currentTableCreationState = zkTable.getCurrentTableCreationState(tableName); + System.out.println("Should get the correct state " + currentTableCreationState); + } + + void setCreatedTD(String tableName, ZKTable zkTable) throws NoNodeException, KeeperException, + IOException { + zkTable.setStatusCreatingTableDescriptor(tableName); + } + + private void setEnabledState(String tableName) throws IOException { + try { + assignmentManager.getZKTable().setEnabledTable(tableName); + } catch (KeeperException e) { + throw new IOException("Unable to ensure that " + tableName + " will be" + + " enabled because of a ZooKeeper issue", e); + } + } + + void addToMetaAndBulkAssign(String tableName, ZKTable zkTable, + List regionInfos) throws IOException, NoNodeException, KeeperException, + InterruptedIOException { + // 8. Set the status in the zookeeper as ADD_TO_META + setAddedToMeta(tableName, zkTable); + // Note that the regionInfo cannot be 0. The HMaster creation of HRegionInfo[] takes care of it. if (regionInfos != null && regionInfos.size() > 0) { - // 4. Add regions to META + // Add regions to META MetaEditor.addRegionsToMeta(this.catalogTracker, regionInfos); + - // 5. Trigger immediate assignment of the regions in round-robin fashion + // Trigger immediate assignment of the regions in round-robin fashion try { assignmentManager.getRegionStates().createRegionStates(regionInfos); assignmentManager.assign(regionInfos); @@ -224,14 +461,11 @@ throw ie; } } + } - // 6. Set table enabled flag up in zk. - try { - assignmentManager.getZKTable().setEnabledTable(tableName); - } catch (KeeperException e) { - throw new IOException("Unable to ensure that " + tableName + " will be" + - " enabled because of a ZooKeeper issue", e); - } + void setAddedToMeta(String tableName, ZKTable zkTable) throws NoNodeException, KeeperException, + IOException { + zkTable.setStatusToAddingToMeta(tableName); } private void releaseTableLock() { @@ -254,6 +488,6 @@ final String tableName) throws IOException { return ModifyRegionUtils.createRegions(conf, tableRootDir, - hTableDescriptor, newRegions, null); + hTableDescriptor, newRegions, null, true); } } Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/RegionInfoExtractor.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/RegionInfoExtractor.java (revision 0) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/RegionInfoExtractor.java (working copy) @@ -0,0 +1,107 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.handler; + +import java.io.IOException; +import java.util.Comparator; +import java.util.LinkedList; +import java.util.List; +import java.util.TreeSet; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; +import org.apache.hadoop.hbase.util.Bytes; + +/** + * This class tries to find out how many regioninfos were created during table creation. + * Based on the directory passed, it check s either the region directory or the tmp directory. + * + */ +public class RegionInfoExtractor { + + private final Path path; + private final FileSystem fs; + private final String tableName; + private static final Log LOG = LogFactory.getLog(RegionInfoExtractor.class); + + public RegionInfoExtractor(Path path, FileSystem fs, String tableName) { + this.path = path; + this.fs = fs; + this.tableName = tableName; + } + + public List collectRegionInfos(boolean useTempDir) throws IOException { + // get the splitKeys file. If this file is found we will be able to check if + // all the regioninfos + // were created before the master went down + FileStatus[] listStatus = fs.listStatus(this.path); + TreeSet hris = new TreeSet(new RegionInfoComparatorBasedOnStartKey()); + int noOfSplitKeys = -1; + for (FileStatus regionDir : listStatus) { + if (useTempDir && !regionDir.isDir() + && regionDir.getPath().getName().equals(HConstants.SPLIT_KEYS_FILE)) { + FSDataInputStream splitKeyFile = null; + try { + splitKeyFile = fs.open(regionDir.getPath()); + noOfSplitKeys = splitKeyFile.readInt(); + } finally { + if (splitKeyFile != null) { + splitKeyFile.close(); + } + } + } + if (regionDir.isDir() && !regionDir.getPath().getName().endsWith(".tmp")) { + HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir.getPath()); + LOG.debug("The hri found in the regioninfo file in the path " + regionDir.getPath() + + " is " + hri.getRegionNameAsString()); + hris.add(hri); + } + } + // When we are trying to get the regioninfos from the region dir, then it + // means that + // the regioninfos were successfully created. + if (useTempDir) { + if (noOfSplitKeys != hris.size()) { + return new LinkedList(); + } + } + List hriList = new LinkedList(); + hriList.addAll(hris); + return hriList; + } + + private static class RegionInfoComparatorBasedOnStartKey implements Comparator { + @Override + public int compare(HRegionInfo l, HRegionInfo r) { + int compareTo = Bytes.compareTo(l.getStartKey(), r.getStartKey()); + if (compareTo < 0) + return -1; + if (compareTo > 0) + return 1; + return 0; + } + } + +} Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java (revision 1463118) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java (working copy) @@ -348,8 +348,7 @@ /** The health check chore. */ private HealthCheckChore healthCheckChore; - - + /** * Initializes the HMaster. The steps are as follows: *

@@ -726,6 +725,9 @@ status.setStatus("Initializing ZK system trackers"); initializeZKBasedSystemTrackers(); + Set tableUnderCreation = this.assignmentManager.getCreatingTable(); + // Deletes tmp directory only if there are no tables in partially created state + this.fileSystemManager.checkTempDir(tableUnderCreation); if (!masterRecovery) { // initialize master side coprocessors before we start handling requests @@ -774,7 +776,7 @@ this.balancer.setMasterServices(this); // Fix up assignment manager status status.setStatus("Starting assignment manager"); - this.assignmentManager.joinCluster(); + this.assignmentManager.joinCluster(tableUnderCreation); this.balancer.setClusterStatus(getClusterStatus()); @@ -1537,7 +1539,7 @@ this.executorService.submit(new CreateTableHandler(this, this.fileSystemManager, hTableDescriptor, conf, - newRegions, this).prepare()); + newRegions, this, false, hTableDescriptor.getNameAsString()).prepare()); if (cpHost != null) { cpHost.postCreateTable(hTableDescriptor, newRegions); } Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java (revision 1463118) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java (working copy) @@ -148,7 +148,7 @@ checkRootDir(this.rootdir, conf, this.fs); // check if temp directory exists and clean it - checkTempDir(this.tempdir, conf, this.fs); + //checkTempDir(this.tempdir, conf, this.fs); Path oldLogDir = new Path(this.rootdir, HConstants.HREGION_OLDLOGDIR_NAME); @@ -452,25 +452,28 @@ /** * Make sure the hbase temp directory exists and is empty. * NOTE that this method is only executed once just after the master becomes the active one. + * @param creatingTable */ - private void checkTempDir(final Path tmpdir, final Configuration c, final FileSystem fs) + private void checkTempDir(final Path tmpdir, final Configuration c, final FileSystem fs, Set creatingTable) throws IOException { // If the temp directory exists, clear the content (left over, from the previous run) if (fs.exists(tmpdir)) { // Archive table in temp, maybe left over from failed deletion, // if not the cleaner will take care of them. for (Path tabledir: FSUtils.getTableDirs(fs, tmpdir)) { - for (Path regiondir: FSUtils.getRegionDirs(fs, tabledir)) { - HFileArchiver.archiveRegion(fs, this.rootdir, tabledir, regiondir); + if (!creatingTable.contains(tabledir.getName())) { + for (Path regiondir : FSUtils.getRegionDirs(fs, tabledir)) { + HFileArchiver.archiveRegion(fs, this.rootdir, tabledir, regiondir); + } } } - if (!fs.delete(tmpdir, true)) { + if (creatingTable.size() == 0 && !fs.delete(tmpdir, true)) { throw new IOException("Unable to clean the temp directory: " + tmpdir); } } // Create the temp directory - if (!fs.mkdirs(tmpdir)) { + if (creatingTable.size() == 0 && !fs.mkdirs(tmpdir)) { throw new IOException("HBase temp directory '" + tmpdir + "' creation failure."); } } @@ -648,4 +651,13 @@ this.services.getTableDescriptors().add(htd); return htd; } + + /** + * Deletes the temp directory if there are no tables in partially created state + * @param creatingTable + * @throws IOException + */ + public void checkTempDir(Set creatingTable) throws IOException { + checkTempDir(this.tempdir, conf, this.fs, creatingTable); + } } Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/CloneSnapshotHandler.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/CloneSnapshotHandler.java (revision 1463118) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/CloneSnapshotHandler.java (working copy) @@ -67,7 +67,7 @@ final SnapshotDescription snapshot, final HTableDescriptor hTableDescriptor) throws NotAllMetaRegionsOnlineException, TableExistsException, IOException { super(masterServices, masterServices.getMasterFileSystem(), hTableDescriptor, - masterServices.getConfiguration(), null, masterServices); + masterServices.getConfiguration(), null, masterServices, false, hTableDescriptor.getNameAsString()); // Snapshot information this.snapshot = snapshot; Index: hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/RestoreSnapshotHelper.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/RestoreSnapshotHelper.java (revision 1463118) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/RestoreSnapshotHelper.java (working copy) @@ -405,7 +405,7 @@ public void fillRegion(final HRegion region) throws IOException { cloneRegion(region, snapshotRegions.get(region.getRegionInfo().getEncodedName())); } - }); + }, false); return clonedRegionsInfo; } Index: hbase-server/src/main/java/org/apache/hadoop/hbase/util/ModifyRegionUtils.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/util/ModifyRegionUtils.java (revision 1463118) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/util/ModifyRegionUtils.java (working copy) @@ -38,8 +38,10 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.backup.HFileArchiver; @@ -75,7 +77,7 @@ */ public static List createRegions(final Configuration conf, final Path rootDir, final HTableDescriptor hTableDescriptor, final HRegionInfo[] newRegions) throws IOException { - return createRegions(conf, rootDir, hTableDescriptor, newRegions, null); + return createRegions(conf, rootDir, hTableDescriptor, newRegions, null, false); } /** @@ -91,7 +93,7 @@ */ public static List createRegions(final Configuration conf, final Path rootDir, final HTableDescriptor hTableDescriptor, final HRegionInfo[] newRegions, - final RegionFillTask task) throws IOException { + final RegionFillTask task, boolean isTableCreation) throws IOException { if (newRegions == null) return null; int regionNumber = newRegions.length; ThreadPoolExecutor regionOpenAndInitThreadPool = getRegionOpenAndInitThreadPool(conf, @@ -99,6 +101,15 @@ CompletionService completionService = new ExecutorCompletionService( regionOpenAndInitThreadPool); List regionInfos = new ArrayList(); + // Create a file that has the number of splitkeys that is given by the user. + // Do this only during table creation + // On failure check for this file first. If this file is found and the number of regioninfos does not match + // and it is in the tmp location we cannot recreate the table. + // But if the region infos matches then we can recreate the table even if it is in the tmp folder + if (isTableCreation) { + Path tableDir = HTableDescriptor.getTableDir(rootDir, hTableDescriptor.getName()); + writeSplitKeysCount(conf, tableDir, newRegions); + } for (final HRegionInfo newRegion : newRegions) { completionService.submit(new Callable() { public HRegionInfo call() throws IOException { @@ -137,6 +148,21 @@ return regionInfos; } + private static void writeSplitKeysCount(final Configuration conf, final Path rootDir, + final HRegionInfo[] newRegions) throws IOException { + FileSystem fs = FileSystem.get(conf); + FSDataOutputStream splitKeyFile = null; + try { + splitKeyFile = fs.create(new Path(rootDir.toString() + Path.SEPARATOR + + HConstants.SPLIT_KEYS_FILE)); + splitKeyFile.writeInt(newRegions.length); + } finally { + if (splitKeyFile != null) { + splitKeyFile.close(); + } + } + } + /* * used by createRegions() to get the thread pool executor based on the * "hbase.hregion.open.and.init.threads.max" property. Index: hbase-server/src/test/java/org/apache/hadoop/hbase/master/handler/TestCreateTableHandler.java =================================================================== --- hbase-server/src/test/java/org/apache/hadoop/hbase/master/handler/TestCreateTableHandler.java (revision 1463118) +++ hbase-server/src/test/java/org/apache/hadoop/hbase/master/handler/TestCreateTableHandler.java (working copy) @@ -1,5 +1,4 @@ /** - * * Licensed to the Apache Software Foundation (ASF) under one * or more contributor license agreements. See the NOTICE file * distributed with this work for additional information @@ -18,9 +17,12 @@ */ package org.apache.hadoop.hbase.master.handler; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import java.io.IOException; +import java.io.InterruptedIOException; import java.util.List; import org.apache.commons.logging.Log; @@ -29,46 +31,68 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HBaseTestingUtility; import org.apache.hadoop.hbase.HColumnDescriptor; +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HTableDescriptor; -import org.apache.hadoop.hbase.MediumTests; +import org.apache.hadoop.hbase.LargeTests; import org.apache.hadoop.hbase.MiniHBaseCluster; import org.apache.hadoop.hbase.Server; +import org.apache.hadoop.hbase.catalog.MetaReader; import org.apache.hadoop.hbase.master.HMaster; import org.apache.hadoop.hbase.master.MasterFileSystem; import org.apache.hadoop.hbase.master.MasterServices; -import org.apache.hadoop.hbase.master.TestMaster; import org.apache.hadoop.hbase.util.Bytes; +import org.apache.hadoop.hbase.zookeeper.ZKAssign; import org.apache.hadoop.hbase.zookeeper.ZKTable; -import org.apache.hadoop.hbase.zookeeper.ZKUtil; -import org.junit.AfterClass; -import org.junit.BeforeClass; +import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.KeeperException.NoNodeException; +import org.junit.After; +import org.junit.Before; import org.junit.Test; import org.junit.experimental.categories.Category; -@Category(MediumTests.class) +/** + * Tests the restart scenario of master while table is partially created + */ +@Category(LargeTests.class) public class TestCreateTableHandler { private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility(); private static final Log LOG = LogFactory.getLog(TestCreateTableHandler.class); - private static final byte[] TABLENAME = Bytes.toBytes("TestCreateTableHandler"); private static final byte[] FAMILYNAME = Bytes.toBytes("fam"); public static boolean throwException = false; - @BeforeClass - public static void setUp() throws Exception { - TEST_UTIL.startMiniCluster(1); + public static boolean creating_td = false; + + public static boolean created_regioninfo = false; + public static boolean moved_to_orig = false; + public static boolean added_to_meta = false; + public static boolean put_to_meta = false; + + @Before + public void setUp() throws Exception { + TEST_UTIL.startMiniCluster(1, 1); + creating_td = false; + throwException = false; + created_regioninfo = false; + moved_to_orig = false; + added_to_meta = false; + put_to_meta = false; + } - @AfterClass - public static void tearDown() throws Exception { + @After + public void tearDown() throws Exception { TEST_UTIL.shutdownMiniCluster(); + LOG.info("Completed shutdown"); } - @Test + @Test(timeout = 180000) public void testCreateTableHandlerIfCalledTwoTimesAndFirstOneIsUnderProgress() throws Exception { + byte[] tableName = Bytes + .toBytes("testCreateTableHandlerIfCalledTwoTimesAndFirstOneIsUnderProgress"); final MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); final HMaster m = cluster.getMaster(); - final HTableDescriptor desc = new HTableDescriptor(TABLENAME); + final HTableDescriptor desc = new HTableDescriptor(tableName); desc.addFamily(new HColumnDescriptor(FAMILYNAME)); final HRegionInfo[] hRegionInfos = new HRegionInfo[] { new HRegionInfo(desc.getName(), null, null) }; @@ -83,19 +107,253 @@ handler1.prepare(); handler1.process(); for (int i = 0; i < 100; i++) { - if (!TEST_UTIL.getHBaseAdmin().isTableAvailable(TABLENAME)) { + if (!TEST_UTIL.getHBaseAdmin().isTableAvailable(tableName)) { Thread.sleep(200); } } - assertTrue(TEST_UTIL.getHBaseAdmin().isTableEnabled(TABLENAME)); + assertTrue(TEST_UTIL.getHBaseAdmin().isTableEnabled(tableName)); } + @Test(timeout = 180000) + public void testMasterRestartOnCreateTableAfterCreatingTD() throws Exception { + byte[] tableName = Bytes.toBytes("testMasterRestartOnCreateTableAfterCreatingTD"); + final MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + final HMaster m = cluster.getMaster(); + final HTableDescriptor desc = new HTableDescriptor(tableName); + desc.addFamily(new HColumnDescriptor(FAMILYNAME)); + final HRegionInfo[] hRegionInfos = new HRegionInfo[] { new HRegionInfo(desc.getName(), null, + null) }; + CustomCreateTableHandler handler = new CustomCreateTableHandler(m, m.getMasterFileSystem(), + desc, cluster.getConfiguration(), hRegionInfos, m); + creating_td = true; + handler = (CustomCreateTableHandler) handler.prepare(); + handler.process(); + + abortAndStartNewMaster(cluster); + + assertFalse(TEST_UTIL.getHBaseAdmin().isTableAvailable(tableName)); + } + + private void abortAndStartNewMaster(final MiniHBaseCluster cluster) throws IOException { + cluster.abortMaster(0); + cluster.waitOnMaster(0); + LOG.info("Starting new master"); + cluster.startMaster(); + LOG.info("Waiting for master to become active."); + cluster.waitForActiveAndReadyMaster(); + } + + @Test(timeout = 180000) + public void testMasterRestartOnCreateTableAfterCreatingRegionInfo() throws Exception { + byte[] tableName = Bytes.toBytes("testMasterRestartOnCreateTableAfterCreatingRegionInfo"); + final MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + final HMaster m = cluster.getMaster(); + final HTableDescriptor desc = new HTableDescriptor(tableName); + desc.addFamily(new HColumnDescriptor(FAMILYNAME)); + final HRegionInfo[] hRegionInfos = new HRegionInfo[] { new HRegionInfo(desc.getName(), null, + null) }; + CustomCreateTableHandler handler = new CustomCreateTableHandler(m, m.getMasterFileSystem(), + desc, cluster.getConfiguration(), hRegionInfos, m); + created_regioninfo = true; + handler = (CustomCreateTableHandler) handler.prepare(); + handler.process(); + + abortAndStartNewMaster(cluster); + + for (int i = 0; i < 100; i++) { + if (!TEST_UTIL.getHBaseAdmin().isTableAvailable(tableName)) { + Thread.sleep(20); + } + } + + blockTillNoRIT(cluster); + + assertFalse(TEST_UTIL.getHBaseAdmin().isTableAvailable(tableName)); + } + + @Test(timeout = 180000) + public void testMasterRestartOnCreateTableAfterCreatingAddedToMeta() throws Exception { + byte[] tableName = Bytes.toBytes("testMasterRestartOnCreateTableAfterCreatingAddedToMeta"); + final MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + final HMaster m = cluster.getMaster(); + final HTableDescriptor desc = new HTableDescriptor(tableName); + desc.addFamily(new HColumnDescriptor(FAMILYNAME)); + final HRegionInfo[] hRegionInfos = new HRegionInfo[] { new HRegionInfo(desc.getName(), null, + null) }; + CustomCreateTableHandler handler = new CustomCreateTableHandler(m, m.getMasterFileSystem(), + desc, cluster.getConfiguration(), hRegionInfos, m); + added_to_meta = true; + handler = (CustomCreateTableHandler) handler.prepare(); + handler.process(); + + abortAndStartNewMaster(cluster); + + for (int i = 0; i < 100; i++) { + if (!TEST_UTIL.getHBaseAdmin().isTableAvailable(tableName)) { + Thread.sleep(20); + } + } + + blockTillNoRIT(cluster); + + assertTrue(TEST_UTIL.getHBaseAdmin().isTableAvailable(tableName)); + } + + @Test(timeout = 180000) + public void testMasterRestartOnCreateTableAfterCreatingAddedToMetaWithMoreSplitKeys() + throws Exception { + byte[] tableName = Bytes + .toBytes("testMasterRestartOnCreateTableAfterCreatingAddedToMetaWithMoreSplitKeys"); + final MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + final HMaster m = cluster.getMaster(); + final HTableDescriptor desc = new HTableDescriptor(tableName); + desc.addFamily(new HColumnDescriptor(FAMILYNAME)); + final HRegionInfo[] hRegionInfos = createHRegionInfos(desc); + CustomCreateTableHandler handler = new CustomCreateTableHandler(m, m.getMasterFileSystem(), + desc, cluster.getConfiguration(), hRegionInfos, m); + added_to_meta = true; + handler = (CustomCreateTableHandler) handler.prepare(); + handler.process(); + + byte[][] splitKeys = createSplitKeys(); + + abortAndStartNewMaster(cluster); + + for (int i = 0; i < 100; i++) { + if (!TEST_UTIL.getHBaseAdmin().isTableAvailable(tableName, splitKeys)) { + Thread.sleep(20); + } + } + + blockTillNoRIT(cluster); + + int regionCount = MetaReader.getRegionCount(cluster.getConf(), Bytes.toString(tableName)); + assertEquals(4, regionCount); + assertTrue(TEST_UTIL.getHBaseAdmin().isTableEnabled(tableName)); + } + + @Test(timeout = 180000) + public void testMasterRestartOnCreateTableWhenAddingToMetaFails() throws Exception { + byte[] tableName = Bytes.toBytes("testMasterRestartOnCreateTableWhenAddingToMetaFails"); + final MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + final HMaster m = cluster.getMaster(); + final HTableDescriptor desc = new HTableDescriptor(tableName); + desc.addFamily(new HColumnDescriptor(FAMILYNAME)); + final HRegionInfo[] hRegionInfos = createHRegionInfos(desc); + + CustomCreateTableHandler handler = new CustomCreateTableHandler(m, m.getMasterFileSystem(), + desc, cluster.getConfiguration(), hRegionInfos, m); + put_to_meta = true; + handler = (CustomCreateTableHandler) handler.prepare(); + handler.process(); + + byte[][] splitKeys = createSplitKeys(); + + abortAndStartNewMaster(cluster); + + for (int i = 0; i < 100; i++) { + if (!TEST_UTIL.getHBaseAdmin().isTableAvailable(tableName, splitKeys)) { + Thread.sleep(20); + } + } + + blockTillNoRIT(cluster); + int regionCount = MetaReader.getRegionCount(cluster.getConf(), Bytes.toString(tableName)); + assertEquals(4, regionCount); + assertTrue(TEST_UTIL.getHBaseAdmin().isTableEnabled(tableName)); + } + + private HRegionInfo[] createHRegionInfos(final HTableDescriptor desc) { + final HRegionInfo[] hRegionInfos = new HRegionInfo[] { + new HRegionInfo(desc.getName(), HConstants.EMPTY_BYTE_ARRAY, Bytes.toBytes("a")), + new HRegionInfo(desc.getName(), Bytes.toBytes("a"), Bytes.toBytes("b")), + new HRegionInfo(desc.getName(), Bytes.toBytes("b"), Bytes.toBytes("c")), + new HRegionInfo(desc.getName(), Bytes.toBytes("c"), HConstants.EMPTY_BYTE_ARRAY) }; + return hRegionInfos; + } + + private byte[][] createSplitKeys() { + byte[][] splitKeys = new byte[3][]; + splitKeys[0] = Bytes.toBytes("a"); + splitKeys[0] = Bytes.toBytes("b"); + splitKeys[0] = Bytes.toBytes("c"); + return splitKeys; + } + + @Test(timeout = 180000) + public void testMasterRestartAfterMovedToOrigLocation() throws Exception { + byte[] tableName = Bytes.toBytes("testMasterRestartAfterMovedToOrigLocation"); + final MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + final HMaster m = cluster.getMaster(); + final HTableDescriptor desc = new HTableDescriptor(tableName); + desc.addFamily(new HColumnDescriptor(FAMILYNAME)); + final HRegionInfo[] hRegionInfos = new HRegionInfo[] { new HRegionInfo(desc.getName(), null, + null) }; + CustomCreateTableHandler handler = new CustomCreateTableHandler(m, m.getMasterFileSystem(), + desc, cluster.getConfiguration(), hRegionInfos, m); + moved_to_orig = true; + handler = (CustomCreateTableHandler) handler.prepare(); + handler.process(); + + abortAndStartNewMaster(cluster); + + for (int i = 0; i < 100; i++) { + if (!TEST_UTIL.getHBaseAdmin().isTableAvailable(tableName)) { + Thread.sleep(20); + } + } + + // Failover should be completed, now wait for no RIT + blockTillNoRIT(cluster); + + assertTrue(TEST_UTIL.getHBaseAdmin().isTableAvailable(tableName)); + int regionCount = MetaReader.getRegionCount(cluster.getConf(), Bytes.toString(tableName)); + assertEquals(1, regionCount); + } + + private void blockTillNoRIT(final MiniHBaseCluster cluster) throws KeeperException, + InterruptedException { + LOG.info("Waiting for no more RIT"); + ZKAssign.blockUntilNoRIT(cluster.getMaster().getZooKeeper()); + LOG.info("No more RIT in ZK, now doing final test verification"); + } + + @Test(timeout = 180000) + public void testMasterRestartAfterMovedToOrigLocationWithMoreSplitKeys() throws Exception { + byte[] tableName = Bytes.toBytes("testMasterRestartAfterMovedToOrigLocationWithMoreSplitKeys"); + final MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + final HMaster m = cluster.getMaster(); + final HTableDescriptor desc = new HTableDescriptor(tableName); + desc.addFamily(new HColumnDescriptor(FAMILYNAME)); + final HRegionInfo[] hRegionInfos = createHRegionInfos(desc); + CustomCreateTableHandler handler = new CustomCreateTableHandler(m, m.getMasterFileSystem(), + desc, cluster.getConfiguration(), hRegionInfos, m); + moved_to_orig = true; + handler = (CustomCreateTableHandler) handler.prepare(); + handler.process(); + + byte[][] splitKeys = createSplitKeys(); + + abortAndStartNewMaster(cluster); + + for (int i = 0; i < 100; i++) { + if (!TEST_UTIL.getHBaseAdmin().isTableAvailable(tableName, splitKeys)) { + Thread.sleep(20); + } + } + + blockTillNoRIT(cluster); + int regionCount = MetaReader.getRegionCount(cluster.getConf(), Bytes.toString(tableName)); + assertEquals(4, regionCount); + } + private static class CustomCreateTableHandler extends CreateTableHandler { public CustomCreateTableHandler(Server server, MasterFileSystem fileSystemManager, HTableDescriptor hTableDescriptor, Configuration conf, HRegionInfo[] newRegions, MasterServices masterServices) { - super(server, fileSystemManager, hTableDescriptor, conf, newRegions, masterServices); + super(server, fileSystemManager, hTableDescriptor, conf, newRegions, masterServices, false, + hTableDescriptor.getNameAsString()); } @Override @@ -106,5 +364,55 @@ } return super.handleCreateHdfsRegions(tableRootDir, tableName); } + + @Override + void setCreatedTD(String tableName, ZKTable zkTable) throws NoNodeException, KeeperException, + IOException { + if (creating_td) { + throw new IOException("Failure on Set creating table descriptor"); + } else { + super.setCreatedTD(tableName, zkTable); + } + } + + @Override + void setMoveToOrigLocation(String tableName, ZKTable zkTable) throws NoNodeException, + KeeperException, IOException { + if (moved_to_orig) { + throw new IOException("Failure on Set Moving to original location"); + } else { + super.setMoveToOrigLocation(tableName, zkTable); + } + } + + @Override + void addToMetaAndBulkAssign(String tableName, ZKTable zkTable, List regionInfos) + throws IOException, NoNodeException, KeeperException, InterruptedIOException { + if (put_to_meta) { + throw new IOException("Failure on doing put to META."); + } else { + super.addToMetaAndBulkAssign(tableName, zkTable, regionInfos); + } + } + + @Override + void setCreatedRegionInfo(String tableName, ZKTable zkTable) throws NoNodeException, + KeeperException, IOException { + if (created_regioninfo) { + throw new IOException("Failure on Set Creating region info"); + } else { + super.setCreatedRegionInfo(tableName, zkTable); + } + } + + @Override + void setAddedToMeta(String tableName, ZKTable zkTable) throws NoNodeException, KeeperException, + IOException { + if (added_to_meta) { + throw new IOException("Failure on adding to meta"); + } else { + super.setAddedToMeta(tableName, zkTable); + } + } } } Index: hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java =================================================================== --- hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java (revision 1463118) +++ hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java (working copy) @@ -26,6 +26,7 @@ import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; +import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.concurrent.atomic.AtomicBoolean; @@ -874,7 +875,7 @@ try{ // set table in disabling state. am.getZKTable().setDisablingTable(REGIONINFO.getTableNameAsString()); - am.joinCluster(); + am.joinCluster(new HashSet()); // should not call retainAssignment if we get empty regions in assignAllUserRegions. assertFalse( "Assign should not be invoked for disabling table regions during clean cluster startup.", @@ -1174,7 +1175,7 @@ am.getRegionStates().regionsInTransition.clear(); am.regionPlans.clear(); try { - am.joinCluster(); + am.joinCluster(new HashSet()); } catch (IOException e) { throw new RuntimeException(e); } catch (KeeperException e) {