Index: hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKTable.java =================================================================== --- hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKTable.java (revision 1461042) +++ hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKTable.java (working copy) @@ -21,10 +21,16 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.exceptions.DeserializationException; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos; +import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus; +import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus.State; import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.KeeperException.NoNodeException; +import com.google.protobuf.InvalidProtocolBufferException; + import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -58,6 +64,7 @@ */ private final Map cache = new HashMap(); + private byte[] data; // TODO: Make it so always a table znode. Put table schema here as well as table state. // Have watcher on table znode so all are notified of state or schema change. @@ -79,6 +86,12 @@ if (children == null) return; for (String child: children) { ZooKeeperProtos.Table.State state = ZKTableReadOnly.getTableState(this.watcher, child); + if(state == ZooKeeperProtos.Table.State.CREATING){ + List listChildrenNoWatch = ZKUtil.listChildrenNoWatch(this.watcher, this.watcher.tableZNode+"/"+child); + for (String string : listChildrenNoWatch) { + System.out.println("Status node should be prenet "+string); + } + } if (state != null) this.cache.put(child, state); } } @@ -134,7 +147,98 @@ setTableState(tableName, ZooKeeperProtos.Table.State.ENABLING); } } + + public void createTableCreationStatusNode(final String tableName) throws KeeperException{ + String tableStatusZNode = pathOfTableStatusZNode(tableName); + if (ZKUtil.checkExists(this.watcher, tableStatusZNode) == -1) { + ZKUtil.createAndFailSilent(this.watcher, tableStatusZNode); + } + String tableZNode = ZKUtil.joinZNode(this.watcher.tableZNode, tableName); + int numberOfChildren = ZKUtil.getNumberOfChildren(this.watcher, tableZNode); + LOG.info("Created children node "+numberOfChildren); + } + + public void cleanUpTableStatusNode(final String tableName) throws KeeperException { + String tableStatusZNode = pathOfTableStatusZNode(tableName); + ZKUtil.deleteNodeFailSilent(this.watcher, tableStatusZNode); + } + + + /** + * If the table is found in CREATING_TABLE state the inmemory state is + * removed. This helps in cases where CreateTable is to be retried by the + * client incase of failures + * + * @param tableName + */ + public void removeCreatingTable(final String tableName) { + synchronized (this.cache) { + if (isCreatingTable(tableName)) { + this.cache.remove(tableName); + } + } + } + + public static Set getCreatingTables(ZooKeeperWatcher zkw) throws KeeperException { + return getAllTables(zkw, ZooKeeperProtos.Table.State.CREATING); + } + + + public TableCreationStatus.State getCurrentTableCreationState(String tableName) throws KeeperException { + String tableStatusZNode = pathOfTableStatusZNode(tableName); + byte[] data = ZKUtil.getData(this.watcher, tableStatusZNode); + try { + ProtobufUtil.expectPBMagicPrefix(data); + ZooKeeperProtos.TableCreationStatus.Builder builder = ZooKeeperProtos.TableCreationStatus.newBuilder(); + int magicLen = ProtobufUtil.lengthOfPBMagic(); + ZooKeeperProtos.TableCreationStatus t = builder.mergeFrom(data, magicLen, data.length - magicLen).build(); + return t.getState(); + } catch (InvalidProtocolBufferException e) { + KeeperException ke = new KeeperException.DataInconsistencyException(); + ke.initCause(e); + throw ke; + } catch (DeserializationException e) { + throw ZKUtil.convert(e); + } + } + + public void setStatusCreatedTableDescriptor(final String tableName) throws NoNodeException, + KeeperException { + setStatusOfTableCreationNode(tableName, TableCreationStatus.State.CREATED_TD); + } + + public void setStatusCreatedRegionInfo(final String tableName) throws NoNodeException, + KeeperException { + setStatusOfTableCreationNode(tableName, TableCreationStatus.State.CREATED_REGIONINFO); + } + + public void setStatusMovedToOriginalLocation(final String tableName) throws NoNodeException, + KeeperException { + setStatusOfTableCreationNode(tableName, TableCreationStatus.State.MOVED_TO_ORIG_LOCATION); + } + + public void setStatusToAddedToMeta(final String tableName) throws NoNodeException, + KeeperException { + setStatusOfTableCreationNode(tableName, TableCreationStatus.State.ADDED_TO_META); + } + + private void setStatusOfTableCreationNode(final String tableName, final TableCreationStatus.State state) + throws NoNodeException, KeeperException { + String tableStatusZNode = pathOfTableStatusZNode(tableName); + ZooKeeperProtos.TableCreationStatus.Builder builder = ZooKeeperProtos.TableCreationStatus + .newBuilder(); + builder.setState(state); + byte[] data = ProtobufUtil.prependPBMagic(builder.build().toByteArray()); + ZKUtil.setData(this.watcher, tableStatusZNode, data); + } + + private String pathOfTableStatusZNode(final String tableName) { + String tableZNode = ZKUtil.joinZNode(this.watcher.tableZNode, tableName); + String tableStatusZNode = ZKUtil.joinZNode(tableZNode, this.watcher.tableCreationStatusNode); + return tableStatusZNode; + } + /** * Sets the specified table as ENABLING in zookeeper atomically * If the table is already in ENABLING state, no operation is performed @@ -152,6 +256,24 @@ return true; } } + + /** + * Sets the specified table as ENABLING in zookeeper atomically + * If the table is already in ENABLING state, no operation is performed + * @param tableName + * @return if the operation succeeds or not + * @throws KeeperException unexpected zookeeper exception + */ + public boolean checkAndSetCreatingTable(final String tableName) + throws KeeperException { + synchronized (this.cache) { + if (isEnablingTable(tableName)) { + return false; + } + setTableState(tableName, ZooKeeperProtos.Table.State.CREATING); + return true; + } + } /** * Sets the specified table as ENABLING in zookeeper atomically @@ -215,6 +337,10 @@ public boolean isEnablingTable(final String tableName) { return isTableState(tableName, ZooKeeperProtos.Table.State.ENABLING); } + + public boolean isCreatingTable(final String tableName) { + return isTableState(tableName, ZooKeeperProtos.Table.State.CREATING); + } public boolean isEnabledTable(String tableName) { return isTableState(tableName, ZooKeeperProtos.Table.State.ENABLED); @@ -374,6 +500,7 @@ ZooKeeperProtos.Table.State state = ZKTableReadOnly.getTableState(zkw, child); for (ZooKeeperProtos.Table.State expectedState: states) { if (state == expectedState) { + LOG.debug("The table "+child+" found in "+state+" state"); allTables.add(child); break; } @@ -381,4 +508,5 @@ } return allTables; } + } Index: hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java =================================================================== --- hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java (revision 1461042) +++ hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java (working copy) @@ -105,6 +105,8 @@ public String balancerZNode; // znode containing the lock for the tables public String tableLockZNode; + // znode that has the current state of the create table + public String tableCreationStatusNode = "status"; // Certain ZooKeeper nodes need to be world-readable public static final ArrayList CREATOR_ALL_AND_WORLD_READABLE = @@ -220,6 +222,9 @@ conf.get("zookeeper.znode.balancer", "balancer")); tableLockZNode = ZKUtil.joinZNode(baseZNode, conf.get("zookeeper.znode.tableLock", "table-lock")); + + tableLockZNode = ZKUtil.joinZNode(tableZNode, + conf.get("zookeeper.znode.tatbleStatus", "table-status")); } /** Index: hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java =================================================================== --- hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java (revision 1461042) +++ hbase-common/src/main/java/org/apache/hadoop/hbase/HConstants.java (working copy) @@ -324,6 +324,9 @@ /** Default value for cluster ID */ public static final String CLUSTER_ID_DEFAULT = "default-cluster"; + + /** Count of Split keys file **/ + public static final String SPLIT_KEYS_FILE = "splitKeys"; // Always store the location of the root table's HRegion. // This HRegion is never split. Index: hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ZooKeeperProtos.java =================================================================== --- hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ZooKeeperProtos.java (revision 1461042) +++ hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ZooKeeperProtos.java (working copy) @@ -2812,12 +2812,14 @@ DISABLED(1, 1), DISABLING(2, 2), ENABLING(3, 3), + CREATING(4, 4), ; public static final int ENABLED_VALUE = 0; public static final int DISABLED_VALUE = 1; public static final int DISABLING_VALUE = 2; public static final int ENABLING_VALUE = 3; + public static final int CREATING_VALUE = 4; public final int getNumber() { return value; } @@ -2828,6 +2830,7 @@ case 1: return DISABLED; case 2: return DISABLING; case 3: return ENABLING; + case 4: return CREATING; default: return null; } } @@ -2858,7 +2861,7 @@ } private static final State[] VALUES = { - ENABLED, DISABLED, DISABLING, ENABLING, + ENABLED, DISABLED, DISABLING, ENABLING, CREATING, }; public static State valueOf( @@ -3234,6 +3237,469 @@ // @@protoc_insertion_point(class_scope:Table) } + public interface TableCreationStatusOrBuilder + extends com.google.protobuf.MessageOrBuilder { + + // required .TableCreationStatus.State state = 1 [default = CREATED_TD]; + boolean hasState(); + org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus.State getState(); + } + public static final class TableCreationStatus extends + com.google.protobuf.GeneratedMessage + implements TableCreationStatusOrBuilder { + // Use TableCreationStatus.newBuilder() to construct. + private TableCreationStatus(Builder builder) { + super(builder); + } + private TableCreationStatus(boolean noInit) {} + + private static final TableCreationStatus defaultInstance; + public static TableCreationStatus getDefaultInstance() { + return defaultInstance; + } + + public TableCreationStatus getDefaultInstanceForType() { + return defaultInstance; + } + + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.internal_static_TableCreationStatus_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.internal_static_TableCreationStatus_fieldAccessorTable; + } + + public enum State + implements com.google.protobuf.ProtocolMessageEnum { + CREATED_TD(0, 0), + CREATED_REGIONINFO(1, 1), + MOVED_TO_ORIG_LOCATION(2, 2), + ADDED_TO_META(3, 3), + ; + + public static final int CREATED_TD_VALUE = 0; + public static final int CREATED_REGIONINFO_VALUE = 1; + public static final int MOVED_TO_ORIG_LOCATION_VALUE = 2; + public static final int ADDED_TO_META_VALUE = 3; + + + public final int getNumber() { return value; } + + public static State valueOf(int value) { + switch (value) { + case 0: return CREATED_TD; + case 1: return CREATED_REGIONINFO; + case 2: return MOVED_TO_ORIG_LOCATION; + case 3: return ADDED_TO_META; + default: return null; + } + } + + public static com.google.protobuf.Internal.EnumLiteMap + internalGetValueMap() { + return internalValueMap; + } + private static com.google.protobuf.Internal.EnumLiteMap + internalValueMap = + new com.google.protobuf.Internal.EnumLiteMap() { + public State findValueByNumber(int number) { + return State.valueOf(number); + } + }; + + public final com.google.protobuf.Descriptors.EnumValueDescriptor + getValueDescriptor() { + return getDescriptor().getValues().get(index); + } + public final com.google.protobuf.Descriptors.EnumDescriptor + getDescriptorForType() { + return getDescriptor(); + } + public static final com.google.protobuf.Descriptors.EnumDescriptor + getDescriptor() { + return org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus.getDescriptor().getEnumTypes().get(0); + } + + private static final State[] VALUES = { + CREATED_TD, CREATED_REGIONINFO, MOVED_TO_ORIG_LOCATION, ADDED_TO_META, + }; + + public static State valueOf( + com.google.protobuf.Descriptors.EnumValueDescriptor desc) { + if (desc.getType() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "EnumValueDescriptor is not for this type."); + } + return VALUES[desc.getIndex()]; + } + + private final int index; + private final int value; + + private State(int index, int value) { + this.index = index; + this.value = value; + } + + // @@protoc_insertion_point(enum_scope:TableCreationStatus.State) + } + + private int bitField0_; + // required .TableCreationStatus.State state = 1 [default = CREATED_TD]; + public static final int STATE_FIELD_NUMBER = 1; + private org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus.State state_; + public boolean hasState() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + public org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus.State getState() { + return state_; + } + + private void initFields() { + state_ = org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus.State.CREATED_TD; + } + private byte memoizedIsInitialized = -1; + public final boolean isInitialized() { + byte isInitialized = memoizedIsInitialized; + if (isInitialized != -1) return isInitialized == 1; + + if (!hasState()) { + memoizedIsInitialized = 0; + return false; + } + memoizedIsInitialized = 1; + return true; + } + + public void writeTo(com.google.protobuf.CodedOutputStream output) + throws java.io.IOException { + getSerializedSize(); + if (((bitField0_ & 0x00000001) == 0x00000001)) { + output.writeEnum(1, state_.getNumber()); + } + getUnknownFields().writeTo(output); + } + + private int memoizedSerializedSize = -1; + public int getSerializedSize() { + int size = memoizedSerializedSize; + if (size != -1) return size; + + size = 0; + if (((bitField0_ & 0x00000001) == 0x00000001)) { + size += com.google.protobuf.CodedOutputStream + .computeEnumSize(1, state_.getNumber()); + } + size += getUnknownFields().getSerializedSize(); + memoizedSerializedSize = size; + return size; + } + + private static final long serialVersionUID = 0L; + @java.lang.Override + protected java.lang.Object writeReplace() + throws java.io.ObjectStreamException { + return super.writeReplace(); + } + + @java.lang.Override + public boolean equals(final java.lang.Object obj) { + if (obj == this) { + return true; + } + if (!(obj instanceof org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus)) { + return super.equals(obj); + } + org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus other = (org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus) obj; + + boolean result = true; + result = result && (hasState() == other.hasState()); + if (hasState()) { + result = result && + (getState() == other.getState()); + } + result = result && + getUnknownFields().equals(other.getUnknownFields()); + return result; + } + + @java.lang.Override + public int hashCode() { + int hash = 41; + hash = (19 * hash) + getDescriptorForType().hashCode(); + if (hasState()) { + hash = (37 * hash) + STATE_FIELD_NUMBER; + hash = (53 * hash) + hashEnum(getState()); + } + hash = (29 * hash) + getUnknownFields().hashCode(); + return hash; + } + + public static org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus parseFrom( + com.google.protobuf.ByteString data) + throws com.google.protobuf.InvalidProtocolBufferException { + return newBuilder().mergeFrom(data).buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus parseFrom( + com.google.protobuf.ByteString data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return newBuilder().mergeFrom(data, extensionRegistry) + .buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus parseFrom(byte[] data) + throws com.google.protobuf.InvalidProtocolBufferException { + return newBuilder().mergeFrom(data).buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus parseFrom( + byte[] data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return newBuilder().mergeFrom(data, extensionRegistry) + .buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus parseFrom(java.io.InputStream input) + throws java.io.IOException { + return newBuilder().mergeFrom(input).buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus parseFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return newBuilder().mergeFrom(input, extensionRegistry) + .buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus parseDelimitedFrom(java.io.InputStream input) + throws java.io.IOException { + Builder builder = newBuilder(); + if (builder.mergeDelimitedFrom(input)) { + return builder.buildParsed(); + } else { + return null; + } + } + public static org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus parseDelimitedFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + Builder builder = newBuilder(); + if (builder.mergeDelimitedFrom(input, extensionRegistry)) { + return builder.buildParsed(); + } else { + return null; + } + } + public static org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus parseFrom( + com.google.protobuf.CodedInputStream input) + throws java.io.IOException { + return newBuilder().mergeFrom(input).buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus parseFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return newBuilder().mergeFrom(input, extensionRegistry) + .buildParsed(); + } + + public static Builder newBuilder() { return Builder.create(); } + public Builder newBuilderForType() { return newBuilder(); } + public static Builder newBuilder(org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus prototype) { + return newBuilder().mergeFrom(prototype); + } + public Builder toBuilder() { return newBuilder(this); } + + @java.lang.Override + protected Builder newBuilderForType( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + Builder builder = new Builder(parent); + return builder; + } + public static final class Builder extends + com.google.protobuf.GeneratedMessage.Builder + implements org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatusOrBuilder { + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.internal_static_TableCreationStatus_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.internal_static_TableCreationStatus_fieldAccessorTable; + } + + // Construct using org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus.newBuilder() + private Builder() { + maybeForceBuilderInitialization(); + } + + private Builder(BuilderParent parent) { + super(parent); + maybeForceBuilderInitialization(); + } + private void maybeForceBuilderInitialization() { + if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) { + } + } + private static Builder create() { + return new Builder(); + } + + public Builder clear() { + super.clear(); + state_ = org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus.State.CREATED_TD; + bitField0_ = (bitField0_ & ~0x00000001); + return this; + } + + public Builder clone() { + return create().mergeFrom(buildPartial()); + } + + public com.google.protobuf.Descriptors.Descriptor + getDescriptorForType() { + return org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus.getDescriptor(); + } + + public org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus getDefaultInstanceForType() { + return org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus.getDefaultInstance(); + } + + public org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus build() { + org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException(result); + } + return result; + } + + private org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus buildParsed() + throws com.google.protobuf.InvalidProtocolBufferException { + org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException( + result).asInvalidProtocolBufferException(); + } + return result; + } + + public org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus buildPartial() { + org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus result = new org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus(this); + int from_bitField0_ = bitField0_; + int to_bitField0_ = 0; + if (((from_bitField0_ & 0x00000001) == 0x00000001)) { + to_bitField0_ |= 0x00000001; + } + result.state_ = state_; + result.bitField0_ = to_bitField0_; + onBuilt(); + return result; + } + + public Builder mergeFrom(com.google.protobuf.Message other) { + if (other instanceof org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus) { + return mergeFrom((org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus)other); + } else { + super.mergeFrom(other); + return this; + } + } + + public Builder mergeFrom(org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus other) { + if (other == org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus.getDefaultInstance()) return this; + if (other.hasState()) { + setState(other.getState()); + } + this.mergeUnknownFields(other.getUnknownFields()); + return this; + } + + public final boolean isInitialized() { + if (!hasState()) { + + return false; + } + return true; + } + + public Builder mergeFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + com.google.protobuf.UnknownFieldSet.Builder unknownFields = + com.google.protobuf.UnknownFieldSet.newBuilder( + this.getUnknownFields()); + while (true) { + int tag = input.readTag(); + switch (tag) { + case 0: + this.setUnknownFields(unknownFields.build()); + onChanged(); + return this; + default: { + if (!parseUnknownField(input, unknownFields, + extensionRegistry, tag)) { + this.setUnknownFields(unknownFields.build()); + onChanged(); + return this; + } + break; + } + case 8: { + int rawValue = input.readEnum(); + org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus.State value = org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus.State.valueOf(rawValue); + if (value == null) { + unknownFields.mergeVarintField(1, rawValue); + } else { + bitField0_ |= 0x00000001; + state_ = value; + } + break; + } + } + } + } + + private int bitField0_; + + // required .TableCreationStatus.State state = 1 [default = CREATED_TD]; + private org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus.State state_ = org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus.State.CREATED_TD; + public boolean hasState() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + public org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus.State getState() { + return state_; + } + public Builder setState(org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus.State value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000001; + state_ = value; + onChanged(); + return this; + } + public Builder clearState() { + bitField0_ = (bitField0_ & ~0x00000001); + state_ = org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus.State.CREATED_TD; + onChanged(); + return this; + } + + // @@protoc_insertion_point(builder_scope:TableCreationStatus) + } + + static { + defaultInstance = new TableCreationStatus(true); + defaultInstance.initFields(); + } + + // @@protoc_insertion_point(class_scope:TableCreationStatus) + } + public interface ReplicationPeerOrBuilder extends com.google.protobuf.MessageOrBuilder { @@ -5706,6 +6172,11 @@ com.google.protobuf.GeneratedMessage.FieldAccessorTable internal_static_Table_fieldAccessorTable; private static com.google.protobuf.Descriptors.Descriptor + internal_static_TableCreationStatus_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_TableCreationStatus_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor internal_static_ReplicationPeer_descriptor; private static com.google.protobuf.GeneratedMessage.FieldAccessorTable @@ -5750,19 +6221,24 @@ "plitLogTask.State\022\037\n\nserverName\030\002 \002(\0132\013." + "ServerName\"C\n\005State\022\016\n\nUNASSIGNED\020\000\022\t\n\005O", "WNED\020\001\022\014\n\010RESIGNED\020\002\022\010\n\004DONE\020\003\022\007\n\003ERR\020\004\"" + - "n\n\005Table\022$\n\005state\030\001 \002(\0162\014.Table.State:\007E" + - "NABLED\"?\n\005State\022\013\n\007ENABLED\020\000\022\014\n\010DISABLED" + - "\020\001\022\r\n\tDISABLING\020\002\022\014\n\010ENABLING\020\003\"%\n\017Repli" + - "cationPeer\022\022\n\nclusterkey\030\001 \002(\t\"^\n\020Replic" + - "ationState\022&\n\005state\030\001 \002(\0162\027.ReplicationS" + - "tate.State\"\"\n\005State\022\013\n\007ENABLED\020\000\022\014\n\010DISA" + - "BLED\020\001\"+\n\027ReplicationHLogPosition\022\020\n\010pos" + - "ition\030\001 \002(\003\"$\n\017ReplicationLock\022\021\n\tlockOw" + - "ner\030\001 \002(\t\"s\n\tTableLock\022\021\n\ttableName\030\001 \001(", - "\014\022\036\n\tlockOwner\030\002 \001(\0132\013.ServerName\022\020\n\010thr" + - "eadId\030\003 \001(\003\022\020\n\010isShared\030\004 \001(\010\022\017\n\007purpose" + - "\030\005 \001(\tBE\n*org.apache.hadoop.hbase.protob" + - "uf.generatedB\017ZooKeeperProtosH\001\210\001\001\240\001\001" + "|\n\005Table\022$\n\005state\030\001 \002(\0162\014.Table.State:\007E" + + "NABLED\"M\n\005State\022\013\n\007ENABLED\020\000\022\014\n\010DISABLED" + + "\020\001\022\r\n\tDISABLING\020\002\022\014\n\010ENABLING\020\003\022\014\n\010CREAT" + + "ING\020\004\"\254\001\n\023TableCreationStatus\0225\n\005state\030\001" + + " \002(\0162\032.TableCreationStatus.State:\nCREATE" + + "D_TD\"^\n\005State\022\016\n\nCREATED_TD\020\000\022\026\n\022CREATED" + + "_REGIONINFO\020\001\022\032\n\026MOVED_TO_ORIG_LOCATION\020" + + "\002\022\021\n\rADDED_TO_META\020\003\"%\n\017ReplicationPeer\022" + + "\022\n\nclusterkey\030\001 \002(\t\"^\n\020ReplicationState\022", + "&\n\005state\030\001 \002(\0162\027.ReplicationState.State\"" + + "\"\n\005State\022\013\n\007ENABLED\020\000\022\014\n\010DISABLED\020\001\"+\n\027R" + + "eplicationHLogPosition\022\020\n\010position\030\001 \002(\003" + + "\"$\n\017ReplicationLock\022\021\n\tlockOwner\030\001 \002(\t\"s" + + "\n\tTableLock\022\021\n\ttableName\030\001 \001(\014\022\036\n\tlockOw" + + "ner\030\002 \001(\0132\013.ServerName\022\020\n\010threadId\030\003 \001(\003" + + "\022\020\n\010isShared\030\004 \001(\010\022\017\n\007purpose\030\005 \001(\tBE\n*o" + + "rg.apache.hadoop.hbase.protobuf.generate" + + "dB\017ZooKeeperProtosH\001\210\001\001\240\001\001" }; com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner = new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() { @@ -5817,8 +6293,16 @@ new java.lang.String[] { "State", }, org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.Table.class, org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.Table.Builder.class); + internal_static_TableCreationStatus_descriptor = + getDescriptor().getMessageTypes().get(6); + internal_static_TableCreationStatus_fieldAccessorTable = new + com.google.protobuf.GeneratedMessage.FieldAccessorTable( + internal_static_TableCreationStatus_descriptor, + new java.lang.String[] { "State", }, + org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus.class, + org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus.Builder.class); internal_static_ReplicationPeer_descriptor = - getDescriptor().getMessageTypes().get(6); + getDescriptor().getMessageTypes().get(7); internal_static_ReplicationPeer_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_ReplicationPeer_descriptor, @@ -5826,7 +6310,7 @@ org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.ReplicationPeer.class, org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.ReplicationPeer.Builder.class); internal_static_ReplicationState_descriptor = - getDescriptor().getMessageTypes().get(7); + getDescriptor().getMessageTypes().get(8); internal_static_ReplicationState_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_ReplicationState_descriptor, @@ -5834,7 +6318,7 @@ org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.ReplicationState.class, org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.ReplicationState.Builder.class); internal_static_ReplicationHLogPosition_descriptor = - getDescriptor().getMessageTypes().get(8); + getDescriptor().getMessageTypes().get(9); internal_static_ReplicationHLogPosition_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_ReplicationHLogPosition_descriptor, @@ -5842,7 +6326,7 @@ org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.ReplicationHLogPosition.class, org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.ReplicationHLogPosition.Builder.class); internal_static_ReplicationLock_descriptor = - getDescriptor().getMessageTypes().get(9); + getDescriptor().getMessageTypes().get(10); internal_static_ReplicationLock_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_ReplicationLock_descriptor, @@ -5850,7 +6334,7 @@ org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.ReplicationLock.class, org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.ReplicationLock.Builder.class); internal_static_TableLock_descriptor = - getDescriptor().getMessageTypes().get(10); + getDescriptor().getMessageTypes().get(11); internal_static_TableLock_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_TableLock_descriptor, Index: hbase-protocol/src/main/protobuf/ZooKeeper.proto =================================================================== --- hbase-protocol/src/main/protobuf/ZooKeeper.proto (revision 1461042) +++ hbase-protocol/src/main/protobuf/ZooKeeper.proto (working copy) @@ -93,12 +93,26 @@ DISABLED = 1; DISABLING = 2; ENABLING = 3; + CREATING = 4; } // This is the table's state. If no znode for a table, // its state is presumed enabled. See o.a.h.h.zookeeper.ZKTable class // for more. required State state = 1 [default = ENABLED]; } +message TableCreationStatus { + // Table's current state + enum State { + CREATED_TD = 0; + CREATED_REGIONINFO = 1; + MOVED_TO_ORIG_LOCATION = 2; + ADDED_TO_META = 3; + } + // This is the table's state. If no znode for a table, + // its state is presumed enabled. See o.a.h.h.zookeeper.ZKTable class + // for more. + required State state = 1 [default = CREATED_TD]; +} /** * Used by replication. Holds a replication peer key. Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (revision 1461042) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (working copy) @@ -27,6 +27,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.NavigableMap; import java.util.Set; import java.util.TreeMap; @@ -54,15 +55,18 @@ import org.apache.hadoop.hbase.catalog.MetaReader; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.exceptions.DeserializationException; +import org.apache.hadoop.hbase.exceptions.NotAllMetaRegionsOnlineException; import org.apache.hadoop.hbase.exceptions.NotServingRegionException; import org.apache.hadoop.hbase.exceptions.RegionAlreadyInTransitionException; import org.apache.hadoop.hbase.exceptions.RegionServerStoppedException; import org.apache.hadoop.hbase.exceptions.ServerNotRunningYetException; +import org.apache.hadoop.hbase.exceptions.TableExistsException; import org.apache.hadoop.hbase.exceptions.TableNotFoundException; import org.apache.hadoop.hbase.executor.EventHandler; import org.apache.hadoop.hbase.executor.EventType; import org.apache.hadoop.hbase.executor.ExecutorService; import org.apache.hadoop.hbase.master.handler.ClosedRegionHandler; +import org.apache.hadoop.hbase.master.handler.CreateTableHandler; import org.apache.hadoop.hbase.master.handler.DisableTableHandler; import org.apache.hadoop.hbase.master.handler.EnableTableHandler; import org.apache.hadoop.hbase.master.handler.MergedRegionHandler; @@ -187,6 +191,16 @@ private final boolean tomActivated; /** + * A map to track the count a region fails to open in a row. + * So that we don't try to open a region forever if the failure is + * unrecoverable. We don't put this information in region states + * because we don't expect this to happen frequently; we don't + * want to copy this information over during each state transition either. + */ + private final ConcurrentHashMap + failedOpenTracker = new ConcurrentHashMap(); + + /** * Constructs a new assignment manager. * * @param server @@ -369,7 +383,9 @@ // Scan META to build list of existing regions, servers, and assignment // Returns servers who have not checked in (assumed dead) and their regions - Map> deadServers = rebuildUserRegions(); + Pair>, Map>> recoveryInfo + = rebuildUserRegions(); + Map> deadServers = recoveryInfo.getFirst(); // This method will assign all user regions if a clean server startup or // it will reconstruct master state and cleanup any leftovers from @@ -378,6 +394,8 @@ recoverTableInDisablingState(); recoverTableInEnablingState(); + + recoverTableInCreation(recoveryInfo.getSecond()); } /** @@ -880,9 +898,25 @@ // When there are more than one region server a new RS is selected as the // destination and the same is updated in the regionplan. (HBASE-5546) if (regionState != null) { - getRegionPlan(regionState.getRegion(), sn, true); - this.executorService.submit(new ClosedRegionHandler(server, - this, regionState.getRegion())); + AtomicInteger failedOpenCount = failedOpenTracker.get(encodedName); + if (failedOpenCount == null) { + failedOpenCount = new AtomicInteger(); + // No need to use putIfAbsent, or extra synchronization since + // this whole handleRegion block is locked on the encoded region + // name, and failedOpenTracker is updated only in this block + failedOpenTracker.put(encodedName, failedOpenCount); + } + if (failedOpenCount.incrementAndGet() >= maximumAttempts) { + regionStates.updateRegionState( + regionState.getRegion(), RegionState.State.FAILED_OPEN); + // remove the tracking info to save memory, also reset + // the count for next open initiative + failedOpenTracker.remove(encodedName); + } else { + getRegionPlan(regionState.getRegion(), sn, true); + this.executorService.submit(new ClosedRegionHandler(server, + this, regionState.getRegion())); + } } break; @@ -914,6 +948,7 @@ // Handle OPENED by removing from transition and deleted zk node regionState = regionStates.updateRegionState(rt, RegionState.State.OPEN); if (regionState != null) { + failedOpenTracker.remove(encodedName); // reset the count, if any this.executorService.submit(new OpenedRegionHandler( server, this, regionState.getRegion(), sn, expectedVersion)); } @@ -1151,6 +1186,8 @@ } } }); + }else { + System.out.println("Node deleted "+path); } } @@ -1718,13 +1755,16 @@ } if (setOfflineInZK && versionOfOfflineNode == -1) { LOG.warn("Unable to set offline in ZooKeeper to assign " + region); - if (!tomActivated) { - regionStates.updateRegionState(region, RegionState.State.FAILED_OPEN); + // Setting offline in ZK must have been failed due to ZK racing or some + // exception which may make the server to abort. If it is ZK racing, + // we should retry since we already reset the region state, + // existing (re)assignment will fail anyway. + if (!server.isAborted()) { + continue; } - return; } - if (this.server.isStopped()) { - LOG.debug("Server stopped; skipping assign of " + region); + if (this.server.isStopped() || this.server.isAborted()) { + LOG.debug("Server stopped/aborted; skipping assign of " + region); return; } LOG.info("Assigning region " + region.getRegionNameAsString() + @@ -2408,7 +2448,9 @@ * in META * @throws IOException */ - Map> rebuildUserRegions() throws IOException, KeeperException { + Pair>, Map>> rebuildUserRegions() + throws IOException, KeeperException { + Set creatingTables = ZKTable.getCreatingTables(watcher); Set enablingTables = ZKTable.getEnablingTables(watcher); Set disabledOrEnablingTables = ZKTable.getDisabledTables(watcher); disabledOrEnablingTables.addAll(enablingTables); @@ -2419,9 +2461,12 @@ List results = MetaReader.fullScan(this.catalogTracker); // Get any new but slow to checkin region server that joined the cluster Set onlineServers = serverManager.getOnlineServers().keySet(); + Pair>, Map>> pair = + new Pair>, Map>>(); // Map of offline servers and their regions to be returned Map> offlineServers = new TreeMap>(); + Map> hriOfCreatingTables = new HashMap>(); // Iterate regions in META for (Result result : results) { Pair region = HRegionInfo.getHRegionInfoAndServerName(result); @@ -2447,6 +2492,14 @@ " has null regionLocation." + " But its table " + tableName + " isn't in ENABLING state."); } + // If the table is in CREATING_TABLE state and if the regionLocation is null + // it means they are not yet assigned while table creation + + // Collect the hris for the regions of the table that is under creation + if (creatingTables.contains(tableName)) { + populateCreatingTableHRIs(hriOfCreatingTables, regionInfo, tableName, true); + } + } else if (!onlineServers.contains(regionLocation)) { // Region is located on a server that isn't online List offlineRegions = offlineServers.get(regionLocation); @@ -2455,11 +2508,14 @@ offlineServers.put(regionLocation, offlineRegions); } offlineRegions.add(regionInfo); + populateCreatingTableHRIs(hriOfCreatingTables, regionInfo, tableName, false); // need to enable the table if not disabled or disabling or enabling // this will be used in rolling restarts - if (!disabledOrDisablingOrEnabling.contains(tableName) - && !getZKTable().isEnabledTable(tableName)) { - setEnabledTable(tableName); + if (!creatingTables.contains(tableName)) { + if (!disabledOrDisablingOrEnabling.contains(tableName) + && !getZKTable().isEnabledTable(tableName)) { + setEnabledTable(tableName); + } } } else { // If region is in offline and split state check the ZKNode @@ -2477,20 +2533,54 @@ } // Region is being served and on an active server // add only if region not in disabled or enabling table + // Even if in CreatingTable state we can make the region as online + // Ensure we take care while recovery if (!disabledOrEnablingTables.contains(tableName)) { regionStates.regionOnline(regionInfo, regionLocation); + populateCreatingTableHRIs(hriOfCreatingTables, regionInfo, tableName, false); } // need to enable the table if not disabled or disabling or enabling // this will be used in rolling restarts - if (!disabledOrDisablingOrEnabling.contains(tableName) - && !getZKTable().isEnabledTable(tableName)) { - setEnabledTable(tableName); + if (!creatingTables.contains(tableName)) { + if (!disabledOrDisablingOrEnabling.contains(tableName) + && !getZKTable().isEnabledTable(tableName)) { + setEnabledTable(tableName); + } } } } - return offlineServers; + pair.setFirst(offlineServers); + if(creatingTables.size() != 0 && hriOfCreatingTables.size() == 0){ + // There are some tables that is partially created and not added to the META + for (String tableName : creatingTables) { + LOG.debug("The table "+ tableName+ " found in CREATINGTABLE"); + hriOfCreatingTables.put(tableName, null); + } + } + pair.setSecond(hriOfCreatingTables); + return pair; } + + public Set getCreatingTable() throws KeeperException { + return ZKTable.getCreatingTables(this.watcher); + } + private void populateCreatingTableHRIs( + Map> hriOfCreatingTables, HRegionInfo regionInfo, + String tableName, boolean process) { + if (hriOfCreatingTables.get(tableName) == null) { + Map infoDetails = new HashMap(); + infoDetails.put(regionInfo, process); + LOG.debug("The region " + regionInfo + " is part of partially created table" + + ((process) ? (" that is not assigned to any RS") : ("that is already assigned"))); + hriOfCreatingTables.put(tableName, infoDetails); + } else { + LOG.debug("The region " + regionInfo + " is part of partially created table" + + ((process) ? (" that is not assigned to any RS") : ("that is already assigned"))); + hriOfCreatingTables.get(tableName).put(regionInfo, process); + } + } + /** * Recover the tables that were not fully moved to DISABLED state. These * tables are in DISABLING state when the master restarted/switched. @@ -2538,6 +2628,39 @@ } } } + + private void recoverTableInCreation(Map> hriOfCreatingTable) + throws NotAllMetaRegionsOnlineException, TableExistsException, IOException { + if (hriOfCreatingTable != null) { + Iterator>> iterator = hriOfCreatingTable.entrySet() + .iterator(); + HRegionInfo[] newRegions = null; + while (iterator.hasNext()) { + // Recover by calling CreateTableHandler + Entry> details = iterator.next(); + String tableName = details.getKey(); + Map hriDetails = details.getValue(); + if (hriDetails != null) { + Iterator> hriIterator = hriDetails + .entrySet().iterator(); + newRegions = new HRegionInfo[hriDetails.size()]; + int count = 0; + while (hriIterator.hasNext()) { + Entry hri = hriIterator.next(); + if (hri.getValue() == true) { + newRegions[count] = hri.getKey(); + count++; + } + } + } + LOG.info("The table " + tableName + + " is in CREATINGTABLE state. Hence trying to create the table"); + // Note that the table descriptor is passed as null + new CreateTableHandler(this.server, ((HMaster) this.server).getMasterFileSystem(), null, + server.getConfiguration(), newRegions, (MasterServices) this.server, true, tableName).prepare().process(); + } + } + } /** * Processes list of dead servers from result of META scan and regions in RIT Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/CreateTableHandler.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/CreateTableHandler.java (revision 1461042) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/CreateTableHandler.java (working copy) @@ -20,12 +20,15 @@ import java.io.IOException; import java.io.InterruptedIOException; +import java.util.Arrays; +import java.util.Collections; import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hbase.HRegionInfo; @@ -45,10 +48,17 @@ import org.apache.hadoop.hbase.master.MasterServices; import org.apache.hadoop.hbase.master.TableLockManager; import org.apache.hadoop.hbase.master.TableLockManager.TableLock; +import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus; +import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus.State; +import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.FSTableDescriptors; import org.apache.hadoop.hbase.util.ModifyRegionUtils; +import org.apache.hadoop.hbase.zookeeper.ZKTable; import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.KeeperException.NoNodeException; +import com.google.common.collect.Lists; + /** * Handler to create a table. */ @@ -63,21 +73,28 @@ private final TableLockManager tableLockManager; private final HRegionInfo [] newRegions; private final TableLock tableLock; + private final boolean isRecovery; + private final String tableName; public CreateTableHandler(Server server, MasterFileSystem fileSystemManager, HTableDescriptor hTableDescriptor, Configuration conf, HRegionInfo [] newRegions, - MasterServices masterServices) { + MasterServices masterServices, boolean isRecovery, String tableName) { super(server, EventType.C_M_CREATE_TABLE); this.fileSystemManager = fileSystemManager; + // This can be null when the constructor is called from the recovery flow. this.hTableDescriptor = hTableDescriptor; this.conf = conf; this.newRegions = newRegions; this.catalogTracker = masterServices.getCatalogTracker(); this.assignmentManager = masterServices.getAssignmentManager(); this.tableLockManager = masterServices.getTableLockManager(); + this.isRecovery = isRecovery; + this.tableName = (this.hTableDescriptor == null) ? tableName : this.hTableDescriptor + .getNameAsString(); - this.tableLock = this.tableLockManager.writeLock(this.hTableDescriptor.getName() + byte[] name = ((this.hTableDescriptor == null)? Bytes.toBytes(tableName):this.hTableDescriptor.getName()); + this.tableLock = this.tableLockManager.writeLock(name , EventType.C_M_CREATE_TABLE.toString()); } @@ -100,26 +117,42 @@ this.tableLock.acquire(); boolean success = false; try { - String tableName = this.hTableDescriptor.getNameAsString(); - if (MetaReader.tableExists(catalogTracker, tableName)) { - throw new TableExistsException(tableName); + String tableName = this.tableName; + if (!isRecovery) { + if (MetaReader.tableExists(catalogTracker, tableName)) { + throw new TableExistsException(tableName); + } } // If we have multiple client threads trying to create the table at the // same time, given the async nature of the operation, the table // could be in a state where .META. table hasn't been updated yet in // the process() function. - // Use enabling state to tell if there is already a request for the same + // Use CREATINGTABLE state to tell if there is already a request for the same // table in progress. This will introduce a new zookeeper call. Given // createTable isn't a frequent operation, that should be ok. //TODO: now that we have table locks, re-evaluate above try { - if (!this.assignmentManager.getZKTable().checkAndSetEnablingTable(tableName)) { + if (!this.assignmentManager.getZKTable().checkAndSetCreatingTable(tableName)) { throw new TableExistsException(tableName); } + // Create the status znode under the /hbase/table/tablename znode + // If this creation fails we can throw IOException. If the master + // aborts just after this + // then we can just clean up the znode on master restart. Client will + // anyways recreate the table + // as isTableAvailable() will always be false + + if (!this.isRecovery) { + this.assignmentManager.getZKTable().createTableCreationStatusNode(tableName); + } else { + LOG.debug("The table " + + tableName + + " is partially created and the new master is trying to complete the table creation."); + } } catch (KeeperException e) { - throw new IOException("Unable to ensure that the table will be" + - " enabling because of a ZooKeeper issue", e); + throw new IOException("Unable to ensure that the table will be" + + " enabling because of a ZooKeeper issue", e); } success = true; } finally { @@ -130,6 +163,140 @@ return this; } + private void handlePartiallyCreatedTable(String tableName) throws IOException { + ZKTable zkTable = this.assignmentManager.getZKTable(); + try { + TableCreationStatus.State currentTableCreationState = zkTable + .getCurrentTableCreationState(tableName); + Path tempDir = null; + FileSystem fs = null; + Path rootDir = null; + Path tableDir = null; + if (currentTableCreationState == null) { + LOG.error("The table " + tableName + " could not be recovered. Try recreating the table."); + return; + } + RegionInfoExtractor infoExtractor = null; + List regionInfos = null; + List currentHRegions = null; + switch (currentTableCreationState) { + case CREATED_TD: + // Just clear the table znode and tableStatus znode. Upto the client + // to recreate the table + // I think handling this case would be difficult without knowing the + // split keys + LOG.error("The table " + tableName + + " could not be recovered. Try recreating the table."); + break; + case CREATED_REGIONINFO: + // Try forming the region infos from the tmp directory. Try creating + // the table once again + tempDir = this.fileSystemManager.getTempDir(); + rootDir = this.fileSystemManager.getRootDir(); + fs = this.fileSystemManager.getFileSystem(); + + Path tempTableDir = new Path(tempDir, tableName); + FileStatus[] listStatus = fs.listStatus(tempTableDir); + if (listStatus != null) { + infoExtractor = new RegionInfoExtractor(tempTableDir, fs, tableName); + // Need to extract the table info here. The tableInfo has to be + // moved to the regiondir + // before we create the regions + regionInfos = infoExtractor.collectRegionInfos(true); + + } + if (regionInfos.size() == 0) { + LOG.error("The table " + tableName + + " could not be recovered. Try recreating the table."); + break; + } + LOG.debug("Actual no of regioninfos found in tabledir is " + regionInfos.size()); + // Remove those which are already assigned + // Set the status in the zookeeper as MOVED_TO_ORIG_LOCATION + setMoveToOrigLocation(tableName, zkTable); + tableDir = HTableDescriptor.getTableDir(rootDir, Bytes.toBytes(tableName)); + // Move Table temp directory to the hbase root location + if (!fs.rename(tempTableDir, tableDir)) { + throw new IOException("Unable to move table from temp=" + tempTableDir + + " to hbase root=" + tableDir); + } + if (this.newRegions != null) { + currentHRegions = Arrays.asList(this.newRegions); + regionInfos.retainAll(currentHRegions); + } + LOG.debug("Regions to be processed is " + regionInfos.size()); + addToMetaAndBulkAssign(tableName, zkTable, regionInfos); + + setEnabledState(tableName); + + break; + case MOVED_TO_ORIG_LOCATION: + rootDir = this.fileSystemManager.getRootDir(); + fs = this.fileSystemManager.getFileSystem(); + try { + // Try forming the region infos from the table directory. Try + // creating the table once again + tableDir = HTableDescriptor.getTableDir(rootDir, Bytes.toBytes(tableName)); + infoExtractor = new RegionInfoExtractor(tableDir, fs, tableName); + regionInfos = infoExtractor.collectRegionInfos(false); + LOG.debug("Actual no of regioninfos found in tabledir is " + regionInfos.size()); + // Remove those which are already assigned + if (this.newRegions != null) { + currentHRegions = Arrays.asList(this.newRegions); + regionInfos.retainAll(currentHRegions); + } + LOG.debug("Regions to be processed is " + regionInfos.size()); + addToMetaAndBulkAssign(tableName, zkTable, regionInfos); + + setEnabledState(tableName); + + } catch (IOException ioe) { + LOG.error("Cannot obtain the tabledescriptor from the tempdir for the table " + + tableName); + } + break; + case ADDED_TO_META: + // Do the assignment of all the regions added to META + try { + regionInfos = MetaReader.getTableRegions(this.catalogTracker, Bytes.toBytes(tableName)); + LOG.debug("Actual no of regioninfos found in tabledir is " + regionInfos.size()); + // Remove those which are already assigned + if (this.newRegions != null) { + currentHRegions = Arrays.asList(this.newRegions); + regionInfos.retainAll(currentHRegions); + } + LOG.debug("Regions to be processed is " + regionInfos.size()); + try { + assignmentManager.getRegionStates().createRegionStates(regionInfos); + assignmentManager.assign(regionInfos); + } catch (InterruptedException e) { + LOG.error("Caught " + e + " during round-robin assignment"); + InterruptedIOException ie = new InterruptedIOException(e.getMessage()); + ie.initCause(e); + throw ie; + } + } catch (IOException e) { + LOG.error("Error while collecting the regions for the table " + tableName + + " from META.", e); + } + // zkTable.setStatusAssignUserRegions(tableName); + setEnabledState(tableName); + break; + default: + throw new IllegalArgumentException("Invalid state from table status node for the table " + + tableName); + } + } catch (KeeperException e) { + LOG.error("Unable to get data from the znode ", e); + } + } + + private void cleanUpFailedCreation() { + releaseTableLock(); + this.assignmentManager.getZKTable().removeCreatingTable( + this.tableName); + } + @Override public String toString() { String name = "UnknownServerName"; @@ -137,27 +304,41 @@ name = server.getServerName().toString(); } return getClass().getSimpleName() + "-" + name + "-" + getSeqid() + "-" + - this.hTableDescriptor.getNameAsString(); + this.tableName; } @Override public void process() { - String tableName = this.hTableDescriptor.getNameAsString(); - LOG.info("Attempting to create the table " + tableName); + if (!this.isRecovery) { + String tableName = this.tableName; + LOG.info("Attempting to create the table " + tableName); - try { - MasterCoprocessorHost cpHost = ((HMaster) this.server).getCoprocessorHost(); - if (cpHost != null) { - cpHost.preCreateTableHandler(this.hTableDescriptor, this.newRegions); + try { + MasterCoprocessorHost cpHost = ((HMaster) this.server).getCoprocessorHost(); + if (cpHost != null) { + cpHost.preCreateTableHandler(this.hTableDescriptor, this.newRegions); + } + handleCreateTable(tableName); + completed(null); + if (cpHost != null) { + cpHost.postCreateTableHandler(this.hTableDescriptor, this.newRegions); + } + } catch (Throwable e) { + LOG.error("Error trying to create the table " + tableName, e); + completed(e); } - handleCreateTable(tableName); - completed(null); - if (cpHost != null) { - cpHost.postCreateTableHandler(this.hTableDescriptor, this.newRegions); + } else { + // This is called when the backup master comes alive and sees a table + // that was partially created + Throwable t = null; + try { + LOG.info("Found table "+tableName +" in partially created state"); + handlePartiallyCreatedTable(tableName); + } catch (Throwable e) { + t = e; + } finally { + completed(t); } - } catch (Throwable e) { - LOG.error("Error trying to create the table " + tableName, e); - completed(e); } } @@ -166,14 +347,23 @@ * @param exception null if process() is successful or not null if something has failed. */ protected void completed(final Throwable exception) { + // TODO: clean up the table status node also here + String tableName = this.tableName; + if (exception == null) { + try { + this.assignmentManager.getZKTable().cleanUpTableStatusNode(tableName); + } catch (KeeperException e) { + LOG.error("Could not delete the table status znode for the table " + tableName); + } + } releaseTableLock(); if (exception != null) { // Try deleting the enabling node in case of error // If this does not happen then if the client tries to create the table // again with the same Active master // It will block the creation saying TableAlreadyExists. - this.assignmentManager.getZKTable().removeEnablingTable( - this.hTableDescriptor.getNameAsString()); + this.assignmentManager.getZKTable().removeCreatingTable( + tableName); } } @@ -194,26 +384,79 @@ private void handleCreateTable(String tableName) throws IOException, KeeperException { Path tempdir = fileSystemManager.getTempDir(); FileSystem fs = fileSystemManager.getFileSystem(); + // 1. Set the status in the zookeeper as CREATED_TD + ZKTable zkTable = this.assignmentManager.getZKTable(); + setCreatedTD(tableName, zkTable); - // 1. Create Table Descriptor + // 2. Create Table Descriptor FSTableDescriptors.createTableDescriptor(fs, tempdir, this.hTableDescriptor); + + Path tempTableDir = new Path(tempdir, tableName); Path tableDir = new Path(fileSystemManager.getRootDir(), tableName); + + // 3. Set the status in the zookeeper as CREATED_REGIONINFO + setCreatedRegionInfo(tableName, zkTable); - // 2. Create Regions + // 4. Create Regions List regionInfos = handleCreateHdfsRegions(tempdir, tableName); - // 3. Move Table temp directory to the hbase root location + // 5. Set the status in the zookeeper as MOVED_TO_ORIG_LOCATION + setMoveToOrigLocation(tableName, zkTable); + + // 6. Move Table temp directory to the hbase root location if (!fs.rename(tempTableDir, tableDir)) { throw new IOException("Unable to move table from temp=" + tempTableDir + " to hbase root=" + tableDir); } + + // 7. Add to meta and assign the regions + addToMetaAndBulkAssign(tableName, zkTable, regionInfos); + + // 9. Set table enabled flag up in zk. + setEnabledState(tableName); + } + void setMoveToOrigLocation(String tableName, ZKTable zkTable) + throws NoNodeException, KeeperException, IOException { + zkTable.setStatusMovedToOriginalLocation(tableName); + } + + void setCreatedRegionInfo(String tableName, ZKTable zkTable) + throws NoNodeException, KeeperException, IOException { + zkTable.setStatusCreatedRegionInfo(tableName); + State currentTableCreationState = zkTable.getCurrentTableCreationState(tableName); + System.out.println("Should get the correct state "+currentTableCreationState); + } + + void setCreatedTD(String tableName, ZKTable zkTable) throws NoNodeException, + KeeperException, IOException { + zkTable.setStatusCreatedTableDescriptor(tableName); + State currentTableCreationState = zkTable.getCurrentTableCreationState(tableName); + System.out.println("Should get the correct state "+currentTableCreationState); + } + + private void setEnabledState(String tableName) throws IOException { + try { + assignmentManager.getZKTable().setEnabledTable(tableName); + } catch (KeeperException e) { + throw new IOException("Unable to ensure that " + tableName + " will be" + + " enabled because of a ZooKeeper issue", e); + } + } + + private void addToMetaAndBulkAssign(String tableName, ZKTable zkTable, + List regionInfos) throws IOException, NoNodeException, KeeperException, + InterruptedIOException { + // 8. Set the status in the zookeeper as ADD_TO_META + setAddedToMeta(tableName, zkTable); + // Note that the regionInfo cannot be 0. The HMaster creation of HRegionInfo[] takes care of it. if (regionInfos != null && regionInfos.size() > 0) { - // 4. Add regions to META + // Add regions to META MetaEditor.addRegionsToMeta(this.catalogTracker, regionInfos); + - // 5. Trigger immediate assignment of the regions in round-robin fashion + // Trigger immediate assignment of the regions in round-robin fashion try { assignmentManager.getRegionStates().createRegionStates(regionInfos); assignmentManager.assign(regionInfos); @@ -224,14 +467,11 @@ throw ie; } } + } - // 6. Set table enabled flag up in zk. - try { - assignmentManager.getZKTable().setEnabledTable(tableName); - } catch (KeeperException e) { - throw new IOException("Unable to ensure that " + tableName + " will be" + - " enabled because of a ZooKeeper issue", e); - } + void setAddedToMeta(String tableName, ZKTable zkTable) + throws NoNodeException, KeeperException, IOException { + zkTable.setStatusToAddedToMeta(tableName); } private void releaseTableLock() { @@ -254,6 +494,6 @@ final String tableName) throws IOException { return ModifyRegionUtils.createRegions(conf, tableRootDir, - hTableDescriptor, newRegions, null); + hTableDescriptor, newRegions, null, true); } } Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/RegionInfoExtractor.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/RegionInfoExtractor.java (revision 0) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/RegionInfoExtractor.java (working copy) @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.handler; + +import java.io.IOException; +import java.util.Comparator; +import java.util.LinkedList; +import java.util.List; +import java.util.TreeSet; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FSDataInputStream; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.master.AssignmentManager; +import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; +import org.apache.hadoop.hbase.util.Bytes; + +public class RegionInfoExtractor { + + private final Path path; + private final FileSystem fs; + private final String tableName; + private static final Log LOG = LogFactory.getLog(RegionInfoExtractor.class); + private final PathFilter regionDirFilter = new PathFilter() { + + @Override + public boolean accept(Path path) { + if (path.getName().endsWith(".tmp")) { + return false; + } + return true; + } + }; + + public RegionInfoExtractor(Path path, FileSystem fs, String tableName) { + this.path = path; + this.fs = fs; + this.tableName = tableName; + } + + public List collectRegionInfos(boolean useTempDir) throws IOException { + // get the splitKeys file. If this file is found we will be able to check if all the regioninfos + // were created before the master went down + FileStatus[] listStatus = fs.listStatus(this.path); + TreeSet hris = new TreeSet(new RegionInfoComparatorBasedOnStartKey()); + int noOfSplitKeys = -1; + for (FileStatus regionDir : listStatus) { + if (useTempDir && !regionDir.isDir() + && regionDir.getPath().getName().equals(HConstants.SPLIT_KEYS_FILE)) { + FSDataInputStream splitKeyFile = null; + try { + splitKeyFile = fs.open(regionDir.getPath()); + noOfSplitKeys = splitKeyFile.readInt(); + } finally { + if (splitKeyFile != null) { + splitKeyFile.close(); + } + } + } + if (regionDir.isDir() && !regionDir.getPath().getName().endsWith(".tmp")) { + HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir.getPath()); + LOG.debug("The hri found in the regioninfo file in the path " + regionDir.getPath() + + " is " + hri.getRegionNameAsString()); + hris.add(hri); + } + } + // When we are trying to get the regioninfos from the region dir, then it means that + // the regioninfos were successfully created. + if (useTempDir) { + if (noOfSplitKeys != hris.size()) { + return new LinkedList(); + } + } + List hriList = new LinkedList(); + hriList.addAll(hris); + return hriList; + } + + private static class RegionInfoComparatorBasedOnStartKey implements Comparator { + @Override + public int compare(HRegionInfo l, HRegionInfo r) { + int compareTo = Bytes.compareTo(l.getStartKey(), r.getStartKey()); + if (compareTo < 0) + return -1; + if (compareTo > 0) + return 1; + return 0; + } + } + +} Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java (revision 1461042) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java (working copy) @@ -726,6 +726,9 @@ status.setStatus("Initializing ZK system trackers"); initializeZKBasedSystemTrackers(); + Set creatingTable = this.assignmentManager.getCreatingTable(); + // Deletes tmp directory only if there are no tables in partially created state + this.fileSystemManager.checkTempDir(creatingTable); if (!masterRecovery) { // initialize master side coprocessors before we start handling requests @@ -1537,7 +1540,7 @@ this.executorService.submit(new CreateTableHandler(this, this.fileSystemManager, hTableDescriptor, conf, - newRegions, this).prepare()); + newRegions, this, false, hTableDescriptor.getNameAsString()).prepare()); if (cpHost != null) { cpHost.postCreateTable(hTableDescriptor, newRegions); } Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java (revision 1461042) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java (working copy) @@ -148,7 +148,7 @@ checkRootDir(this.rootdir, conf, this.fs); // check if temp directory exists and clean it - checkTempDir(this.tempdir, conf, this.fs); + //checkTempDir(this.tempdir, conf, this.fs); Path oldLogDir = new Path(this.rootdir, HConstants.HREGION_OLDLOGDIR_NAME); @@ -452,25 +452,28 @@ /** * Make sure the hbase temp directory exists and is empty. * NOTE that this method is only executed once just after the master becomes the active one. + * @param creatingTable */ - private void checkTempDir(final Path tmpdir, final Configuration c, final FileSystem fs) + private void checkTempDir(final Path tmpdir, final Configuration c, final FileSystem fs, Set creatingTable) throws IOException { // If the temp directory exists, clear the content (left over, from the previous run) if (fs.exists(tmpdir)) { // Archive table in temp, maybe left over from failed deletion, // if not the cleaner will take care of them. for (Path tabledir: FSUtils.getTableDirs(fs, tmpdir)) { - for (Path regiondir: FSUtils.getRegionDirs(fs, tabledir)) { - HFileArchiver.archiveRegion(fs, this.rootdir, tabledir, regiondir); + if (!creatingTable.contains(tabledir.getName())) { + for (Path regiondir : FSUtils.getRegionDirs(fs, tabledir)) { + HFileArchiver.archiveRegion(fs, this.rootdir, tabledir, regiondir); + } } } - if (!fs.delete(tmpdir, true)) { + if (creatingTable.size() == 0 && !fs.delete(tmpdir, true)) { throw new IOException("Unable to clean the temp directory: " + tmpdir); } } // Create the temp directory - if (!fs.mkdirs(tmpdir)) { + if (creatingTable.size() == 0 && !fs.mkdirs(tmpdir)) { throw new IOException("HBase temp directory '" + tmpdir + "' creation failure."); } } @@ -648,4 +651,9 @@ this.services.getTableDescriptors().add(htd); return htd; } + + // Deletes the temp directory if there are not tables in partially created state + public void checkTempDir(Set creatingTable) throws IOException { + checkTempDir(this.tempdir, conf, this.fs, creatingTable); + } } Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java (revision 1461042) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java (working copy) @@ -253,6 +253,10 @@ newServerName = null; } + if (state == State.FAILED_CLOSE || state == State.FAILED_OPEN) { + LOG.warn("Failed to transition " + hri + " on " + serverName + ": " + state); + } + String regionName = hri.getEncodedName(); RegionState regionState = new RegionState( hri, state, System.currentTimeMillis(), newServerName); Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/CloneSnapshotHandler.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/CloneSnapshotHandler.java (revision 1461042) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/CloneSnapshotHandler.java (working copy) @@ -67,7 +67,7 @@ final SnapshotDescription snapshot, final HTableDescriptor hTableDescriptor) throws NotAllMetaRegionsOnlineException, TableExistsException, IOException { super(masterServices, masterServices.getMasterFileSystem(), hTableDescriptor, - masterServices.getConfiguration(), null, masterServices); + masterServices.getConfiguration(), null, masterServices, false, hTableDescriptor.getNameAsString()); // Snapshot information this.snapshot = snapshot; Index: hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/RestoreSnapshotHelper.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/RestoreSnapshotHelper.java (revision 1461042) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/snapshot/RestoreSnapshotHelper.java (working copy) @@ -405,7 +405,7 @@ public void fillRegion(final HRegion region) throws IOException { cloneRegion(region, snapshotRegions.get(region.getRegionInfo().getEncodedName())); } - }); + }, false); return clonedRegionsInfo; } Index: hbase-server/src/main/java/org/apache/hadoop/hbase/util/ModifyRegionUtils.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/util/ModifyRegionUtils.java (revision 1461042) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/util/ModifyRegionUtils.java (working copy) @@ -38,8 +38,10 @@ import org.apache.hadoop.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FSDataOutputStream; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.backup.HFileArchiver; @@ -75,7 +77,7 @@ */ public static List createRegions(final Configuration conf, final Path rootDir, final HTableDescriptor hTableDescriptor, final HRegionInfo[] newRegions) throws IOException { - return createRegions(conf, rootDir, hTableDescriptor, newRegions, null); + return createRegions(conf, rootDir, hTableDescriptor, newRegions, null, false); } /** @@ -91,7 +93,7 @@ */ public static List createRegions(final Configuration conf, final Path rootDir, final HTableDescriptor hTableDescriptor, final HRegionInfo[] newRegions, - final RegionFillTask task) throws IOException { + final RegionFillTask task, boolean isTableCreation) throws IOException { if (newRegions == null) return null; int regionNumber = newRegions.length; ThreadPoolExecutor regionOpenAndInitThreadPool = getRegionOpenAndInitThreadPool(conf, @@ -99,6 +101,15 @@ CompletionService completionService = new ExecutorCompletionService( regionOpenAndInitThreadPool); List regionInfos = new ArrayList(); + // Create a file that has the number of splitkeys that is given by the user. + // Do this only during table creation + // On failure check for this file first. If this file is found and the number of regioninfos does not match + // and it is in the tmp location we cannot recreate the table. + // But if the region infos matches then we can recreate the table even if it is in the tmp folder + if (isTableCreation) { + Path tableDir = HTableDescriptor.getTableDir(rootDir, hTableDescriptor.getName()); + writeSplitKeysCount(conf, tableDir, newRegions); + } for (final HRegionInfo newRegion : newRegions) { completionService.submit(new Callable() { public HRegionInfo call() throws IOException { @@ -137,6 +148,21 @@ return regionInfos; } + private static void writeSplitKeysCount(final Configuration conf, final Path rootDir, + final HRegionInfo[] newRegions) throws IOException { + FileSystem fs = FileSystem.get(conf); + FSDataOutputStream splitKeyFile = null; + try { + splitKeyFile = fs.create(new Path(rootDir.toString() + Path.SEPARATOR + + HConstants.SPLIT_KEYS_FILE)); + splitKeyFile.writeInt(newRegions.length); + } finally { + if (splitKeyFile != null) { + splitKeyFile.close(); + } + } + } + /* * used by createRegions() to get the thread pool executor based on the * "hbase.hregion.open.and.init.threads.max" property. Index: hbase-server/src/test/java/org/apache/hadoop/hbase/master/handler/TestCreateTableHandler.java =================================================================== --- hbase-server/src/test/java/org/apache/hadoop/hbase/master/handler/TestCreateTableHandler.java (revision 1461042) +++ hbase-server/src/test/java/org/apache/hadoop/hbase/master/handler/TestCreateTableHandler.java (working copy) @@ -18,6 +18,7 @@ */ package org.apache.hadoop.hbase.master.handler; +import static org.junit.Assert.assertFalse; import static org.junit.Assert.assertTrue; import java.io.IOException; @@ -41,6 +42,8 @@ import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.zookeeper.ZKTable; import org.apache.hadoop.hbase.zookeeper.ZKUtil; +import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.KeeperException.NoNodeException; import org.junit.AfterClass; import org.junit.BeforeClass; import org.junit.Test; @@ -53,10 +56,17 @@ private static final byte[] TABLENAME = Bytes.toBytes("TestCreateTableHandler"); private static final byte[] FAMILYNAME = Bytes.toBytes("fam"); public static boolean throwException = false; + + public static boolean creating_td = false; + + public static boolean created_regioninfo = false; + public static boolean moved_to_orig = false; + public static boolean added_to_meta = false; + @BeforeClass public static void setUp() throws Exception { - TEST_UTIL.startMiniCluster(1); + TEST_UTIL.startMiniCluster(2, 1); } @AfterClass @@ -90,12 +100,125 @@ assertTrue(TEST_UTIL.getHBaseAdmin().isTableEnabled(TABLENAME)); } + + @Test + public void testMasterRestartOnCreateTableAfterCreatingTD() throws Exception { + final MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + final HMaster m = cluster.getMaster(); + final HTableDescriptor desc = new HTableDescriptor(TABLENAME); + desc.addFamily(new HColumnDescriptor(FAMILYNAME)); + final HRegionInfo[] hRegionInfos = new HRegionInfo[] { new HRegionInfo( + desc.getName(), null, null) }; + CustomCreateTableHandler handler = new CustomCreateTableHandler(m, + m.getMasterFileSystem(), desc, cluster.getConfiguration(), + hRegionInfos, m); + creating_td = true; + handler = (CustomCreateTableHandler) handler.prepare(); + handler.process(); + m.stop("Abort for master"); + cluster.startMaster(); + LOG.info("Waiting for master to become active."); + cluster.waitForActiveAndReadyMaster(); + + for (int i = 0; i < 100; i++) { + if (!TEST_UTIL.getHBaseAdmin().isTableAvailable(TABLENAME)) { + Thread.sleep(200); + } + } + assertFalse(TEST_UTIL.getHBaseAdmin().isTableAvailable(TABLENAME)); + } + + @Test + public void testMasterRestartOnCreateTableAfterCreatingRegionInfo() throws Exception { + final MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + final HMaster m = cluster.getMaster(); + final HTableDescriptor desc = new HTableDescriptor(TABLENAME); + desc.addFamily(new HColumnDescriptor(FAMILYNAME)); + final HRegionInfo[] hRegionInfos = new HRegionInfo[] { new HRegionInfo( + desc.getName(), null, null) }; + CustomCreateTableHandler handler = new CustomCreateTableHandler(m, + m.getMasterFileSystem(), desc, cluster.getConfiguration(), + hRegionInfos, m); + created_regioninfo = true; + handler = (CustomCreateTableHandler) handler.prepare(); + handler.process(); + + m.stop("Abort for master"); + cluster.startMaster(); + LOG.info("Waiting for master to become active."); + cluster.waitForActiveAndReadyMaster(); + + for (int i = 0; i < 100; i++) { + if (!TEST_UTIL.getHBaseAdmin().isTableAvailable(TABLENAME)) { + Thread.sleep(200000); + } + } + assertFalse(TEST_UTIL.getHBaseAdmin().isTableAvailable(TABLENAME)); + } + + @Test + public void testMasterRestartOnCreateTableAfterCreatingAddedToMeta() throws Exception { + final MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + final HMaster m = cluster.getMaster(); + final HTableDescriptor desc = new HTableDescriptor(TABLENAME); + desc.addFamily(new HColumnDescriptor(FAMILYNAME)); + final HRegionInfo[] hRegionInfos = new HRegionInfo[] { new HRegionInfo( + desc.getName(), null, null) }; + CustomCreateTableHandler handler = new CustomCreateTableHandler(m, + m.getMasterFileSystem(), desc, cluster.getConfiguration(), + hRegionInfos, m); + added_to_meta = true; + handler = (CustomCreateTableHandler) handler.prepare(); + handler.process(); + + m.stop("Abort for master"); + cluster.startMaster(); + LOG.info("Waiting for master to become active."); + cluster.waitForActiveAndReadyMaster(); + + for (int i = 0; i < 100; i++) { + if (!TEST_UTIL.getHBaseAdmin().isTableAvailable(TABLENAME)) { + Thread.sleep(200); + } + } + assertFalse(TEST_UTIL.getHBaseAdmin().isTableAvailable(TABLENAME)); + } + + @Test + public void testMasterRestartAfterMovedToOrigLocation() throws Exception { + final MiniHBaseCluster cluster = TEST_UTIL.getHBaseCluster(); + final HMaster m = cluster.getMaster(); + final HTableDescriptor desc = new HTableDescriptor(TABLENAME); + desc.addFamily(new HColumnDescriptor(FAMILYNAME)); + final HRegionInfo[] hRegionInfos = new HRegionInfo[] { new HRegionInfo( + desc.getName(), null, null) }; + CustomCreateTableHandler handler = new CustomCreateTableHandler(m, + m.getMasterFileSystem(), desc, cluster.getConfiguration(), + hRegionInfos, m); + moved_to_orig = true; + handler = (CustomCreateTableHandler) handler.prepare(); + handler.process(); + + m.stop("Abort for master"); + cluster.startMaster(); + LOG.info("Waiting for master to become active."); + cluster.waitForActiveAndReadyMaster(); + + for (int i = 0; i < 100; i++) { + if (!TEST_UTIL.getHBaseAdmin().isTableAvailable(TABLENAME)) { + Thread.sleep(200); + } + } + assertFalse(TEST_UTIL.getHBaseAdmin().isTableAvailable(TABLENAME)); + } + private static class CustomCreateTableHandler extends CreateTableHandler { public CustomCreateTableHandler(Server server, MasterFileSystem fileSystemManager, HTableDescriptor hTableDescriptor, Configuration conf, HRegionInfo[] newRegions, MasterServices masterServices) { - super(server, fileSystemManager, hTableDescriptor, conf, newRegions, masterServices); + super(server, fileSystemManager, hTableDescriptor, conf, newRegions, masterServices, false, + hTableDescriptor.getNameAsString()); } @Override @@ -106,5 +229,45 @@ } return super.handleCreateHdfsRegions(tableRootDir, tableName); } + + @Override + void setCreatedTD(String tableName, ZKTable zkTable) + throws NoNodeException, KeeperException, IOException{ + if(creating_td){ + throw new IOException("Failure on Set creating table descriptor"); + } else{ + super.setCreatedTD(tableName, zkTable); + } + } + + @Override + void setMoveToOrigLocation(String tableName, ZKTable zkTable) + throws NoNodeException, KeeperException, IOException { + if (moved_to_orig) { + throw new IOException("Failure on Set Moving to original location"); + } else { + super.setMoveToOrigLocation(tableName, zkTable); + } + } + + @Override + void setCreatedRegionInfo(String tableName, ZKTable zkTable) + throws NoNodeException, KeeperException, IOException { + if (created_regioninfo) { + throw new IOException("Failure on Set Creating region info"); + } else { + super.setCreatedRegionInfo(tableName, zkTable); + } + } + + @Override + void setAddedToMeta(String tableName, ZKTable zkTable) + throws NoNodeException, KeeperException, IOException { + if (added_to_meta) { + throw new IOException("Failure on adding to meta"); + } else { + super.setAddedToMeta(tableName, zkTable); + } + } } }