Index: hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKTable.java =================================================================== --- hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKTable.java (revision 1459735) +++ hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZKTable.java (working copy) @@ -21,10 +21,16 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.classification.InterfaceAudience; +import org.apache.hadoop.hbase.exceptions.DeserializationException; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos; +import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus; +import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus.State; import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.KeeperException.NoNodeException; +import com.google.protobuf.InvalidProtocolBufferException; + import java.util.HashMap; import java.util.HashSet; import java.util.List; @@ -58,6 +64,7 @@ */ private final Map cache = new HashMap(); + private byte[] data; // TODO: Make it so always a table znode. Put table schema here as well as table state. // Have watcher on table znode so all are notified of state or schema change. @@ -134,7 +141,95 @@ setTableState(tableName, ZooKeeperProtos.Table.State.ENABLING); } } + + public void createTableCreationStatusNode(final String tableName) throws KeeperException{ + String tableStatusZNode = pathOfTableStatusZNode(tableName); + if (ZKUtil.checkExists(this.watcher, tableStatusZNode) == -1) { + ZKUtil.createAndFailSilent(this.watcher, tableStatusZNode); + } + } + + public void cleanUpTableStatusNode(final String tableName) throws KeeperException { + String tableStatusZNode = pathOfTableStatusZNode(tableName); + ZKUtil.deleteNodeFailSilent(this.watcher, tableStatusZNode); + } + + + /** + * If the table is found in CREATING_TABLE state the inmemory state is + * removed. This helps in cases where CreateTable is to be retried by the + * client incase of failures + * + * @param tableName + */ + public void removeCreatingTable(final String tableName) { + synchronized (this.cache) { + if (isCreatingTable(tableName)) { + this.cache.remove(tableName); + } + } + } + + public static Set getCreatingTables(ZooKeeperWatcher zkw) throws KeeperException { + return getAllTables(zkw, ZooKeeperProtos.Table.State.CREATING); + } + + + public TableCreationStatus.State getCurrentTableCreationState(String tableName) throws KeeperException { + String tableStatusZNode = pathOfTableStatusZNode(tableName); + byte[] data = ZKUtil.getData(this.watcher, tableStatusZNode); + try { + ProtobufUtil.expectPBMagicPrefix(data); + ZooKeeperProtos.TableCreationStatus.Builder builder = ZooKeeperProtos.TableCreationStatus.newBuilder(); + int magicLen = ProtobufUtil.lengthOfPBMagic(); + ZooKeeperProtos.TableCreationStatus t = builder.mergeFrom(data, magicLen, data.length - magicLen).build(); + return t.getState(); + } catch (InvalidProtocolBufferException e) { + KeeperException ke = new KeeperException.DataInconsistencyException(); + ke.initCause(e); + throw ke; + } catch (DeserializationException e) { + throw ZKUtil.convert(e); + } + } + + public void setStatusCreatedTableDescriptor(final String tableName) throws NoNodeException, + KeeperException { + setStatusOfTableCreationNode(tableName, TableCreationStatus.State.CREATED_TD); + } + + public void setStatusCreatedRegionInfo(final String tableName) throws NoNodeException, + KeeperException { + setStatusOfTableCreationNode(tableName, TableCreationStatus.State.CREATED_REGIONINFO); + } + + public void setStatusMovedToOriginalLocation(final String tableName) throws NoNodeException, + KeeperException { + setStatusOfTableCreationNode(tableName, TableCreationStatus.State.MOVED_TO_ORIG_LOCATION); + } + + public void setStatusToAddedToMeta(final String tableName) throws NoNodeException, + KeeperException { + setStatusOfTableCreationNode(tableName, TableCreationStatus.State.ADDED_TO_META); + } + + private void setStatusOfTableCreationNode(final String tableName, final TableCreationStatus.State state) + throws NoNodeException, KeeperException { + String tableStatusZNode = pathOfTableStatusZNode(tableName); + ZooKeeperProtos.TableCreationStatus.Builder builder = ZooKeeperProtos.TableCreationStatus + .newBuilder(); + builder.setState(state); + byte[] data = ProtobufUtil.prependPBMagic(builder.build().toByteArray()); + ZKUtil.setData(this.watcher, tableStatusZNode, data); + } + + private String pathOfTableStatusZNode(final String tableName) { + String tableZNode = ZKUtil.joinZNode(this.watcher.tableZNode, tableName); + String tableStatusZNode = ZKUtil.joinZNode(tableZNode, tableName); + return tableStatusZNode; + } + /** * Sets the specified table as ENABLING in zookeeper atomically * If the table is already in ENABLING state, no operation is performed @@ -152,6 +247,24 @@ return true; } } + + /** + * Sets the specified table as ENABLING in zookeeper atomically + * If the table is already in ENABLING state, no operation is performed + * @param tableName + * @return if the operation succeeds or not + * @throws KeeperException unexpected zookeeper exception + */ + public boolean checkAndSetCreatingTable(final String tableName) + throws KeeperException { + synchronized (this.cache) { + if (isEnablingTable(tableName)) { + return false; + } + setTableState(tableName, ZooKeeperProtos.Table.State.CREATING); + return true; + } + } /** * Sets the specified table as ENABLING in zookeeper atomically @@ -215,6 +328,10 @@ public boolean isEnablingTable(final String tableName) { return isTableState(tableName, ZooKeeperProtos.Table.State.ENABLING); } + + public boolean isCreatingTable(final String tableName) { + return isTableState(tableName, ZooKeeperProtos.Table.State.CREATING); + } public boolean isEnabledTable(String tableName) { return isTableState(tableName, ZooKeeperProtos.Table.State.ENABLED); @@ -374,6 +491,7 @@ ZooKeeperProtos.Table.State state = ZKTableReadOnly.getTableState(zkw, child); for (ZooKeeperProtos.Table.State expectedState: states) { if (state == expectedState) { + LOG.debug("The table "+child+" found in "+state+" state"); allTables.add(child); break; } @@ -381,4 +499,5 @@ } return allTables; } + } Index: hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java =================================================================== --- hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java (revision 1459735) +++ hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/ZooKeeperWatcher.java (working copy) @@ -105,6 +105,8 @@ public String balancerZNode; // znode containing the lock for the tables public String tableLockZNode; + // znode that has the current state of the create table + public String tableCreationStatusNode; // Certain ZooKeeper nodes need to be world-readable public static final ArrayList CREATOR_ALL_AND_WORLD_READABLE = @@ -220,6 +222,9 @@ conf.get("zookeeper.znode.balancer", "balancer")); tableLockZNode = ZKUtil.joinZNode(baseZNode, conf.get("zookeeper.znode.tableLock", "table-lock")); + + tableLockZNode = ZKUtil.joinZNode(tableZNode, + conf.get("zookeeper.znode.tatbleStatus", "table-status")); } /** Index: hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ZooKeeperProtos.java =================================================================== --- hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ZooKeeperProtos.java (revision 1459735) +++ hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/ZooKeeperProtos.java (working copy) @@ -2812,12 +2812,14 @@ DISABLED(1, 1), DISABLING(2, 2), ENABLING(3, 3), + CREATING(4, 4), ; public static final int ENABLED_VALUE = 0; public static final int DISABLED_VALUE = 1; public static final int DISABLING_VALUE = 2; public static final int ENABLING_VALUE = 3; + public static final int CREATING_VALUE = 4; public final int getNumber() { return value; } @@ -2828,6 +2830,7 @@ case 1: return DISABLED; case 2: return DISABLING; case 3: return ENABLING; + case 4: return CREATING; default: return null; } } @@ -2858,7 +2861,7 @@ } private static final State[] VALUES = { - ENABLED, DISABLED, DISABLING, ENABLING, + ENABLED, DISABLED, DISABLING, ENABLING, CREATING, }; public static State valueOf( @@ -3234,6 +3237,469 @@ // @@protoc_insertion_point(class_scope:Table) } + public interface TableCreationStatusOrBuilder + extends com.google.protobuf.MessageOrBuilder { + + // required .TableCreationStatus.State state = 1 [default = CREATED_TD]; + boolean hasState(); + org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus.State getState(); + } + public static final class TableCreationStatus extends + com.google.protobuf.GeneratedMessage + implements TableCreationStatusOrBuilder { + // Use TableCreationStatus.newBuilder() to construct. + private TableCreationStatus(Builder builder) { + super(builder); + } + private TableCreationStatus(boolean noInit) {} + + private static final TableCreationStatus defaultInstance; + public static TableCreationStatus getDefaultInstance() { + return defaultInstance; + } + + public TableCreationStatus getDefaultInstanceForType() { + return defaultInstance; + } + + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.internal_static_TableCreationStatus_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.internal_static_TableCreationStatus_fieldAccessorTable; + } + + public enum State + implements com.google.protobuf.ProtocolMessageEnum { + CREATED_TD(0, 0), + CREATED_REGIONINFO(1, 1), + MOVED_TO_ORIG_LOCATION(2, 2), + ADDED_TO_META(3, 3), + ; + + public static final int CREATED_TD_VALUE = 0; + public static final int CREATED_REGIONINFO_VALUE = 1; + public static final int MOVED_TO_ORIG_LOCATION_VALUE = 2; + public static final int ADDED_TO_META_VALUE = 3; + + + public final int getNumber() { return value; } + + public static State valueOf(int value) { + switch (value) { + case 0: return CREATED_TD; + case 1: return CREATED_REGIONINFO; + case 2: return MOVED_TO_ORIG_LOCATION; + case 3: return ADDED_TO_META; + default: return null; + } + } + + public static com.google.protobuf.Internal.EnumLiteMap + internalGetValueMap() { + return internalValueMap; + } + private static com.google.protobuf.Internal.EnumLiteMap + internalValueMap = + new com.google.protobuf.Internal.EnumLiteMap() { + public State findValueByNumber(int number) { + return State.valueOf(number); + } + }; + + public final com.google.protobuf.Descriptors.EnumValueDescriptor + getValueDescriptor() { + return getDescriptor().getValues().get(index); + } + public final com.google.protobuf.Descriptors.EnumDescriptor + getDescriptorForType() { + return getDescriptor(); + } + public static final com.google.protobuf.Descriptors.EnumDescriptor + getDescriptor() { + return org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus.getDescriptor().getEnumTypes().get(0); + } + + private static final State[] VALUES = { + CREATED_TD, CREATED_REGIONINFO, MOVED_TO_ORIG_LOCATION, ADDED_TO_META, + }; + + public static State valueOf( + com.google.protobuf.Descriptors.EnumValueDescriptor desc) { + if (desc.getType() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "EnumValueDescriptor is not for this type."); + } + return VALUES[desc.getIndex()]; + } + + private final int index; + private final int value; + + private State(int index, int value) { + this.index = index; + this.value = value; + } + + // @@protoc_insertion_point(enum_scope:TableCreationStatus.State) + } + + private int bitField0_; + // required .TableCreationStatus.State state = 1 [default = CREATED_TD]; + public static final int STATE_FIELD_NUMBER = 1; + private org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus.State state_; + public boolean hasState() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + public org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus.State getState() { + return state_; + } + + private void initFields() { + state_ = org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus.State.CREATED_TD; + } + private byte memoizedIsInitialized = -1; + public final boolean isInitialized() { + byte isInitialized = memoizedIsInitialized; + if (isInitialized != -1) return isInitialized == 1; + + if (!hasState()) { + memoizedIsInitialized = 0; + return false; + } + memoizedIsInitialized = 1; + return true; + } + + public void writeTo(com.google.protobuf.CodedOutputStream output) + throws java.io.IOException { + getSerializedSize(); + if (((bitField0_ & 0x00000001) == 0x00000001)) { + output.writeEnum(1, state_.getNumber()); + } + getUnknownFields().writeTo(output); + } + + private int memoizedSerializedSize = -1; + public int getSerializedSize() { + int size = memoizedSerializedSize; + if (size != -1) return size; + + size = 0; + if (((bitField0_ & 0x00000001) == 0x00000001)) { + size += com.google.protobuf.CodedOutputStream + .computeEnumSize(1, state_.getNumber()); + } + size += getUnknownFields().getSerializedSize(); + memoizedSerializedSize = size; + return size; + } + + private static final long serialVersionUID = 0L; + @java.lang.Override + protected java.lang.Object writeReplace() + throws java.io.ObjectStreamException { + return super.writeReplace(); + } + + @java.lang.Override + public boolean equals(final java.lang.Object obj) { + if (obj == this) { + return true; + } + if (!(obj instanceof org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus)) { + return super.equals(obj); + } + org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus other = (org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus) obj; + + boolean result = true; + result = result && (hasState() == other.hasState()); + if (hasState()) { + result = result && + (getState() == other.getState()); + } + result = result && + getUnknownFields().equals(other.getUnknownFields()); + return result; + } + + @java.lang.Override + public int hashCode() { + int hash = 41; + hash = (19 * hash) + getDescriptorForType().hashCode(); + if (hasState()) { + hash = (37 * hash) + STATE_FIELD_NUMBER; + hash = (53 * hash) + hashEnum(getState()); + } + hash = (29 * hash) + getUnknownFields().hashCode(); + return hash; + } + + public static org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus parseFrom( + com.google.protobuf.ByteString data) + throws com.google.protobuf.InvalidProtocolBufferException { + return newBuilder().mergeFrom(data).buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus parseFrom( + com.google.protobuf.ByteString data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return newBuilder().mergeFrom(data, extensionRegistry) + .buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus parseFrom(byte[] data) + throws com.google.protobuf.InvalidProtocolBufferException { + return newBuilder().mergeFrom(data).buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus parseFrom( + byte[] data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return newBuilder().mergeFrom(data, extensionRegistry) + .buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus parseFrom(java.io.InputStream input) + throws java.io.IOException { + return newBuilder().mergeFrom(input).buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus parseFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return newBuilder().mergeFrom(input, extensionRegistry) + .buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus parseDelimitedFrom(java.io.InputStream input) + throws java.io.IOException { + Builder builder = newBuilder(); + if (builder.mergeDelimitedFrom(input)) { + return builder.buildParsed(); + } else { + return null; + } + } + public static org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus parseDelimitedFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + Builder builder = newBuilder(); + if (builder.mergeDelimitedFrom(input, extensionRegistry)) { + return builder.buildParsed(); + } else { + return null; + } + } + public static org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus parseFrom( + com.google.protobuf.CodedInputStream input) + throws java.io.IOException { + return newBuilder().mergeFrom(input).buildParsed(); + } + public static org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus parseFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return newBuilder().mergeFrom(input, extensionRegistry) + .buildParsed(); + } + + public static Builder newBuilder() { return Builder.create(); } + public Builder newBuilderForType() { return newBuilder(); } + public static Builder newBuilder(org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus prototype) { + return newBuilder().mergeFrom(prototype); + } + public Builder toBuilder() { return newBuilder(this); } + + @java.lang.Override + protected Builder newBuilderForType( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + Builder builder = new Builder(parent); + return builder; + } + public static final class Builder extends + com.google.protobuf.GeneratedMessage.Builder + implements org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatusOrBuilder { + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.internal_static_TableCreationStatus_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.internal_static_TableCreationStatus_fieldAccessorTable; + } + + // Construct using org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus.newBuilder() + private Builder() { + maybeForceBuilderInitialization(); + } + + private Builder(BuilderParent parent) { + super(parent); + maybeForceBuilderInitialization(); + } + private void maybeForceBuilderInitialization() { + if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) { + } + } + private static Builder create() { + return new Builder(); + } + + public Builder clear() { + super.clear(); + state_ = org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus.State.CREATED_TD; + bitField0_ = (bitField0_ & ~0x00000001); + return this; + } + + public Builder clone() { + return create().mergeFrom(buildPartial()); + } + + public com.google.protobuf.Descriptors.Descriptor + getDescriptorForType() { + return org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus.getDescriptor(); + } + + public org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus getDefaultInstanceForType() { + return org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus.getDefaultInstance(); + } + + public org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus build() { + org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException(result); + } + return result; + } + + private org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus buildParsed() + throws com.google.protobuf.InvalidProtocolBufferException { + org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException( + result).asInvalidProtocolBufferException(); + } + return result; + } + + public org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus buildPartial() { + org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus result = new org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus(this); + int from_bitField0_ = bitField0_; + int to_bitField0_ = 0; + if (((from_bitField0_ & 0x00000001) == 0x00000001)) { + to_bitField0_ |= 0x00000001; + } + result.state_ = state_; + result.bitField0_ = to_bitField0_; + onBuilt(); + return result; + } + + public Builder mergeFrom(com.google.protobuf.Message other) { + if (other instanceof org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus) { + return mergeFrom((org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus)other); + } else { + super.mergeFrom(other); + return this; + } + } + + public Builder mergeFrom(org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus other) { + if (other == org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus.getDefaultInstance()) return this; + if (other.hasState()) { + setState(other.getState()); + } + this.mergeUnknownFields(other.getUnknownFields()); + return this; + } + + public final boolean isInitialized() { + if (!hasState()) { + + return false; + } + return true; + } + + public Builder mergeFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + com.google.protobuf.UnknownFieldSet.Builder unknownFields = + com.google.protobuf.UnknownFieldSet.newBuilder( + this.getUnknownFields()); + while (true) { + int tag = input.readTag(); + switch (tag) { + case 0: + this.setUnknownFields(unknownFields.build()); + onChanged(); + return this; + default: { + if (!parseUnknownField(input, unknownFields, + extensionRegistry, tag)) { + this.setUnknownFields(unknownFields.build()); + onChanged(); + return this; + } + break; + } + case 8: { + int rawValue = input.readEnum(); + org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus.State value = org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus.State.valueOf(rawValue); + if (value == null) { + unknownFields.mergeVarintField(1, rawValue); + } else { + bitField0_ |= 0x00000001; + state_ = value; + } + break; + } + } + } + } + + private int bitField0_; + + // required .TableCreationStatus.State state = 1 [default = CREATED_TD]; + private org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus.State state_ = org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus.State.CREATED_TD; + public boolean hasState() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + public org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus.State getState() { + return state_; + } + public Builder setState(org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus.State value) { + if (value == null) { + throw new NullPointerException(); + } + bitField0_ |= 0x00000001; + state_ = value; + onChanged(); + return this; + } + public Builder clearState() { + bitField0_ = (bitField0_ & ~0x00000001); + state_ = org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus.State.CREATED_TD; + onChanged(); + return this; + } + + // @@protoc_insertion_point(builder_scope:TableCreationStatus) + } + + static { + defaultInstance = new TableCreationStatus(true); + defaultInstance.initFields(); + } + + // @@protoc_insertion_point(class_scope:TableCreationStatus) + } + public interface ReplicationPeerOrBuilder extends com.google.protobuf.MessageOrBuilder { @@ -5706,6 +6172,11 @@ com.google.protobuf.GeneratedMessage.FieldAccessorTable internal_static_Table_fieldAccessorTable; private static com.google.protobuf.Descriptors.Descriptor + internal_static_TableCreationStatus_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_TableCreationStatus_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor internal_static_ReplicationPeer_descriptor; private static com.google.protobuf.GeneratedMessage.FieldAccessorTable @@ -5750,19 +6221,24 @@ "plitLogTask.State\022\037\n\nserverName\030\002 \002(\0132\013." + "ServerName\"C\n\005State\022\016\n\nUNASSIGNED\020\000\022\t\n\005O", "WNED\020\001\022\014\n\010RESIGNED\020\002\022\010\n\004DONE\020\003\022\007\n\003ERR\020\004\"" + - "n\n\005Table\022$\n\005state\030\001 \002(\0162\014.Table.State:\007E" + - "NABLED\"?\n\005State\022\013\n\007ENABLED\020\000\022\014\n\010DISABLED" + - "\020\001\022\r\n\tDISABLING\020\002\022\014\n\010ENABLING\020\003\"%\n\017Repli" + - "cationPeer\022\022\n\nclusterkey\030\001 \002(\t\"^\n\020Replic" + - "ationState\022&\n\005state\030\001 \002(\0162\027.ReplicationS" + - "tate.State\"\"\n\005State\022\013\n\007ENABLED\020\000\022\014\n\010DISA" + - "BLED\020\001\"+\n\027ReplicationHLogPosition\022\020\n\010pos" + - "ition\030\001 \002(\003\"$\n\017ReplicationLock\022\021\n\tlockOw" + - "ner\030\001 \002(\t\"s\n\tTableLock\022\021\n\ttableName\030\001 \001(", - "\014\022\036\n\tlockOwner\030\002 \001(\0132\013.ServerName\022\020\n\010thr" + - "eadId\030\003 \001(\003\022\020\n\010isShared\030\004 \001(\010\022\017\n\007purpose" + - "\030\005 \001(\tBE\n*org.apache.hadoop.hbase.protob" + - "uf.generatedB\017ZooKeeperProtosH\001\210\001\001\240\001\001" + "|\n\005Table\022$\n\005state\030\001 \002(\0162\014.Table.State:\007E" + + "NABLED\"M\n\005State\022\013\n\007ENABLED\020\000\022\014\n\010DISABLED" + + "\020\001\022\r\n\tDISABLING\020\002\022\014\n\010ENABLING\020\003\022\014\n\010CREAT" + + "ING\020\004\"\254\001\n\023TableCreationStatus\0225\n\005state\030\001" + + " \002(\0162\032.TableCreationStatus.State:\nCREATE" + + "D_TD\"^\n\005State\022\016\n\nCREATED_TD\020\000\022\026\n\022CREATED" + + "_REGIONINFO\020\001\022\032\n\026MOVED_TO_ORIG_LOCATION\020" + + "\002\022\021\n\rADDED_TO_META\020\003\"%\n\017ReplicationPeer\022" + + "\022\n\nclusterkey\030\001 \002(\t\"^\n\020ReplicationState\022", + "&\n\005state\030\001 \002(\0162\027.ReplicationState.State\"" + + "\"\n\005State\022\013\n\007ENABLED\020\000\022\014\n\010DISABLED\020\001\"+\n\027R" + + "eplicationHLogPosition\022\020\n\010position\030\001 \002(\003" + + "\"$\n\017ReplicationLock\022\021\n\tlockOwner\030\001 \002(\t\"s" + + "\n\tTableLock\022\021\n\ttableName\030\001 \001(\014\022\036\n\tlockOw" + + "ner\030\002 \001(\0132\013.ServerName\022\020\n\010threadId\030\003 \001(\003" + + "\022\020\n\010isShared\030\004 \001(\010\022\017\n\007purpose\030\005 \001(\tBE\n*o" + + "rg.apache.hadoop.hbase.protobuf.generate" + + "dB\017ZooKeeperProtosH\001\210\001\001\240\001\001" }; com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner = new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() { @@ -5817,8 +6293,16 @@ new java.lang.String[] { "State", }, org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.Table.class, org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.Table.Builder.class); + internal_static_TableCreationStatus_descriptor = + getDescriptor().getMessageTypes().get(6); + internal_static_TableCreationStatus_fieldAccessorTable = new + com.google.protobuf.GeneratedMessage.FieldAccessorTable( + internal_static_TableCreationStatus_descriptor, + new java.lang.String[] { "State", }, + org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus.class, + org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus.Builder.class); internal_static_ReplicationPeer_descriptor = - getDescriptor().getMessageTypes().get(6); + getDescriptor().getMessageTypes().get(7); internal_static_ReplicationPeer_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_ReplicationPeer_descriptor, @@ -5826,7 +6310,7 @@ org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.ReplicationPeer.class, org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.ReplicationPeer.Builder.class); internal_static_ReplicationState_descriptor = - getDescriptor().getMessageTypes().get(7); + getDescriptor().getMessageTypes().get(8); internal_static_ReplicationState_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_ReplicationState_descriptor, @@ -5834,7 +6318,7 @@ org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.ReplicationState.class, org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.ReplicationState.Builder.class); internal_static_ReplicationHLogPosition_descriptor = - getDescriptor().getMessageTypes().get(8); + getDescriptor().getMessageTypes().get(9); internal_static_ReplicationHLogPosition_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_ReplicationHLogPosition_descriptor, @@ -5842,7 +6326,7 @@ org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.ReplicationHLogPosition.class, org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.ReplicationHLogPosition.Builder.class); internal_static_ReplicationLock_descriptor = - getDescriptor().getMessageTypes().get(9); + getDescriptor().getMessageTypes().get(10); internal_static_ReplicationLock_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_ReplicationLock_descriptor, @@ -5850,7 +6334,7 @@ org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.ReplicationLock.class, org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.ReplicationLock.Builder.class); internal_static_TableLock_descriptor = - getDescriptor().getMessageTypes().get(10); + getDescriptor().getMessageTypes().get(11); internal_static_TableLock_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_TableLock_descriptor, Index: hbase-protocol/src/main/protobuf/ZooKeeper.proto =================================================================== --- hbase-protocol/src/main/protobuf/ZooKeeper.proto (revision 1459735) +++ hbase-protocol/src/main/protobuf/ZooKeeper.proto (working copy) @@ -93,12 +93,26 @@ DISABLED = 1; DISABLING = 2; ENABLING = 3; + CREATING = 4; } // This is the table's state. If no znode for a table, // its state is presumed enabled. See o.a.h.h.zookeeper.ZKTable class // for more. required State state = 1 [default = ENABLED]; } +message TableCreationStatus { + // Table's current state + enum State { + CREATED_TD = 0; + CREATED_REGIONINFO = 1; + MOVED_TO_ORIG_LOCATION = 2; + ADDED_TO_META = 3; + } + // This is the table's state. If no znode for a table, + // its state is presumed enabled. See o.a.h.h.zookeeper.ZKTable class + // for more. + required State state = 1 [default = CREATED_TD]; +} /** * Used by replication. Holds a replication peer key. Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (revision 1459735) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java (working copy) @@ -27,6 +27,7 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import java.util.Map.Entry; import java.util.NavigableMap; import java.util.Set; import java.util.TreeMap; @@ -53,7 +54,9 @@ import org.apache.hadoop.hbase.Server; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.Stoppable; +import org.apache.hadoop.hbase.exceptions.NotAllMetaRegionsOnlineException; import org.apache.hadoop.hbase.exceptions.ServerNotRunningYetException; +import org.apache.hadoop.hbase.exceptions.TableExistsException; import org.apache.hadoop.hbase.exceptions.TableNotFoundException; import org.apache.hadoop.hbase.catalog.CatalogTracker; import org.apache.hadoop.hbase.catalog.MetaReader; @@ -62,6 +65,7 @@ import org.apache.hadoop.hbase.executor.EventType; import org.apache.hadoop.hbase.executor.ExecutorService; import org.apache.hadoop.hbase.master.handler.ClosedRegionHandler; +import org.apache.hadoop.hbase.master.handler.CreateTableHandler; import org.apache.hadoop.hbase.master.handler.DisableTableHandler; import org.apache.hadoop.hbase.master.handler.EnableTableHandler; import org.apache.hadoop.hbase.master.handler.OpenedRegionHandler; @@ -368,7 +372,9 @@ // Scan META to build list of existing regions, servers, and assignment // Returns servers who have not checked in (assumed dead) and their regions - Map> deadServers = rebuildUserRegions(); + Pair>, Map>> recoveryInfo + = rebuildUserRegions(); + Map> deadServers = recoveryInfo.getFirst(); // This method will assign all user regions if a clean server startup or // it will reconstruct master state and cleanup any leftovers from @@ -377,6 +383,8 @@ recoverTableInDisablingState(); recoverTableInEnablingState(); + + recoverTableInCreation(recoveryInfo.getSecond()); } /** @@ -2359,7 +2367,9 @@ * in META * @throws IOException */ - Map> rebuildUserRegions() throws IOException, KeeperException { + Pair>, Map>> rebuildUserRegions() + throws IOException, KeeperException { + Set creatingTables = ZKTable.getCreatingTables(watcher); Set enablingTables = ZKTable.getEnablingTables(watcher); Set disabledOrEnablingTables = ZKTable.getDisabledTables(watcher); disabledOrEnablingTables.addAll(enablingTables); @@ -2370,9 +2380,12 @@ List results = MetaReader.fullScan(this.catalogTracker); // Get any new but slow to checkin region server that joined the cluster Set onlineServers = serverManager.getOnlineServers().keySet(); + Pair>, Map>> pair = + new Pair>, Map>>(); // Map of offline servers and their regions to be returned Map> offlineServers = new TreeMap>(); + Map> hriOfCreatingTables = new HashMap>(); // Iterate regions in META for (Result result : results) { Pair region = HRegionInfo.getHRegionInfoAndServerName(result); @@ -2398,6 +2411,14 @@ " has null regionLocation." + " But its table " + tableName + " isn't in ENABLING state."); } + // If the table is in CREATING_TABLE state and if the regionLocation is null + // it means they are not yet assigned while table creation + + // Collect the hris for the regions of the table that is under creation + if (creatingTables.contains(tableName)) { + populateCreatingTableHRIs(hriOfCreatingTables, regionInfo, tableName, true); + } + } else if (!onlineServers.contains(regionLocation)) { // Region is located on a server that isn't online List offlineRegions = offlineServers.get(regionLocation); @@ -2406,11 +2427,14 @@ offlineServers.put(regionLocation, offlineRegions); } offlineRegions.add(regionInfo); + populateCreatingTableHRIs(hriOfCreatingTables, regionInfo, tableName, false); // need to enable the table if not disabled or disabling or enabling // this will be used in rolling restarts - if (!disabledOrDisablingOrEnabling.contains(tableName) - && !getZKTable().isEnabledTable(tableName)) { - setEnabledTable(tableName); + if (!creatingTables.contains(tableName)) { + if (!disabledOrDisablingOrEnabling.contains(tableName) + && !getZKTable().isEnabledTable(tableName)) { + setEnabledTable(tableName); + } } } else { // If region is in offline and split state check the ZKNode @@ -2428,20 +2452,43 @@ } // Region is being served and on an active server // add only if region not in disabled or enabling table + // Even if in CreatingTable state we can make the region as online + // Ensure we take care while recovery if (!disabledOrEnablingTables.contains(tableName)) { regionStates.regionOnline(regionInfo, regionLocation); + populateCreatingTableHRIs(hriOfCreatingTables, regionInfo, tableName, false); } // need to enable the table if not disabled or disabling or enabling // this will be used in rolling restarts - if (!disabledOrDisablingOrEnabling.contains(tableName) - && !getZKTable().isEnabledTable(tableName)) { - setEnabledTable(tableName); + if (!creatingTables.contains(tableName)) { + if (!disabledOrDisablingOrEnabling.contains(tableName) + && !getZKTable().isEnabledTable(tableName)) { + setEnabledTable(tableName); + } } } } - return offlineServers; + pair.setFirst(offlineServers); + pair.setSecond(hriOfCreatingTables); + return pair; } + private void populateCreatingTableHRIs( + Map> hriOfCreatingTables, HRegionInfo regionInfo, + String tableName, boolean process) { + if (hriOfCreatingTables.get(tableName) == null) { + Map infoDetails = new HashMap(); + infoDetails.put(regionInfo, process); + LOG.debug("The region " + regionInfo + " is part of partially created table" + + ((process) ? (" that is not assigned to any RS") : ("that is already assigned"))); + hriOfCreatingTables.put(tableName, infoDetails); + } else { + LOG.debug("The region " + regionInfo + " is part of partially created table" + + ((process) ? (" that is not assigned to any RS") : ("that is already assigned"))); + hriOfCreatingTables.get(tableName).put(regionInfo, process); + } + } + /** * Recover the tables that were not fully moved to DISABLED state. These * tables are in DISABLING state when the master restarted/switched. @@ -2489,6 +2536,35 @@ } } } + + private void recoverTableInCreation(Map> hriOfCreatingTable) + throws NotAllMetaRegionsOnlineException, TableExistsException, IOException { + if (hriOfCreatingTable != null) { + Iterator>> iterator = hriOfCreatingTable.entrySet() + .iterator(); + while (iterator.hasNext()) { + // Recover by calling CreateTableHandler + Entry> details = iterator.next(); + String tableName = details.getKey(); + Map hriDetails = details.getValue(); + Iterator> hriIterator = hriDetails.entrySet().iterator(); + HRegionInfo[] newRegions = new HRegionInfo[hriDetails.size()]; + int count = 0; + while (hriIterator.hasNext()) { + Entry hri = hriIterator.next(); + if (hri.getValue() == true) { + newRegions[count] = hri.getKey(); + count++; + } + } + LOG.info("The table " + tableName + + " is in CREATINGTABLE state. Hence trying to create the table"); + // Note that the table descriptor is passed as null + new CreateTableHandler(this.server, ((HMaster) this.server).getMasterFileSystem(), null, + server.getConfiguration(), newRegions, (MasterServices) this.server, true, tableName).prepare().process(); + } + } + } /** * Processes list of dead servers from result of META scan and regions in RIT Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/CreateTableHandler.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/CreateTableHandler.java (revision 1459735) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/CreateTableHandler.java (working copy) @@ -20,6 +20,8 @@ import java.io.IOException; import java.io.InterruptedIOException; +import java.util.Arrays; +import java.util.Collections; import java.util.List; import org.apache.commons.logging.Log; @@ -45,10 +47,16 @@ import org.apache.hadoop.hbase.master.MasterServices; import org.apache.hadoop.hbase.master.TableLockManager; import org.apache.hadoop.hbase.master.TableLockManager.TableLock; +import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.TableCreationStatus; +import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.FSTableDescriptors; import org.apache.hadoop.hbase.util.ModifyRegionUtils; +import org.apache.hadoop.hbase.zookeeper.ZKTable; import org.apache.zookeeper.KeeperException; +import org.apache.zookeeper.KeeperException.NoNodeException; +import com.google.common.collect.Lists; + /** * Handler to create a table. */ @@ -63,21 +71,28 @@ private final TableLockManager tableLockManager; private final HRegionInfo [] newRegions; private final TableLock tableLock; + private final boolean isRecovery; + private final String tableName; public CreateTableHandler(Server server, MasterFileSystem fileSystemManager, HTableDescriptor hTableDescriptor, Configuration conf, HRegionInfo [] newRegions, - MasterServices masterServices) { + MasterServices masterServices, boolean isRecovery, String tableName) { super(server, EventType.C_M_CREATE_TABLE); this.fileSystemManager = fileSystemManager; + // This can be null when the constructor is called from the recovery flow. this.hTableDescriptor = hTableDescriptor; this.conf = conf; this.newRegions = newRegions; this.catalogTracker = masterServices.getCatalogTracker(); this.assignmentManager = masterServices.getAssignmentManager(); this.tableLockManager = masterServices.getTableLockManager(); + this.isRecovery = isRecovery; + this.tableName = (this.hTableDescriptor == null) ? tableName : this.hTableDescriptor + .getNameAsString(); - this.tableLock = this.tableLockManager.writeLock(this.hTableDescriptor.getName() + byte[] name = ((this.hTableDescriptor == null)? Bytes.toBytes(tableName):this.hTableDescriptor.getName()); + this.tableLock = this.tableLockManager.writeLock(name , EventType.C_M_CREATE_TABLE.toString()); } @@ -100,7 +115,7 @@ this.tableLock.acquire(); boolean success = false; try { - String tableName = this.hTableDescriptor.getNameAsString(); + String tableName = this.tableName; if (MetaReader.tableExists(catalogTracker, tableName)) { throw new TableExistsException(tableName); } @@ -109,17 +124,31 @@ // same time, given the async nature of the operation, the table // could be in a state where .META. table hasn't been updated yet in // the process() function. - // Use enabling state to tell if there is already a request for the same + // Use CREATINGTABLE state to tell if there is already a request for the same // table in progress. This will introduce a new zookeeper call. Given // createTable isn't a frequent operation, that should be ok. //TODO: now that we have table locks, re-evaluate above try { - if (!this.assignmentManager.getZKTable().checkAndSetEnablingTable(tableName)) { + if (!this.assignmentManager.getZKTable().checkAndSetCreatingTable(tableName)) { throw new TableExistsException(tableName); } + // Create the status znode under the /hbase/table/tablename znode + // If this creation fails we can throw IOException. If the master + // aborts just after this + // then we can just clean up the znode on master restart. Client will + // anyways recreate the table + // as isTableAvailable() will always be false + + if (!this.isRecovery) { + this.assignmentManager.getZKTable().createTableCreationStatusNode(tableName); + } else { + LOG.debug("The table " + + tableName + + " is partially created and the new master is trying to complete the table creation."); + } } catch (KeeperException e) { - throw new IOException("Unable to ensure that the table will be" + - " enabling because of a ZooKeeper issue", e); + throw new IOException("Unable to ensure that the table will be" + + " enabling because of a ZooKeeper issue", e); } success = true; } finally { @@ -130,6 +159,115 @@ return this; } + private void handlePartiallyCreatedTable(String tableName) throws IOException { + ZKTable zkTable = this.assignmentManager.getZKTable(); + try { + TableCreationStatus.State currentTableCreationState = zkTable + .getCurrentTableCreationState(tableName); + Path tempDir = null; + FileSystem fs = null; + if (currentTableCreationState == null) { + LOG.error("The table " + tableName + " could not be recovered. Try recreating the table."); + return; + } + RegionInfoExtractor infoExtractor = null; + List regionInfos = null; + List currentHRegions = null; + switch (currentTableCreationState) { + case CREATED_TD: + // Just clear the table znode and tableStatus znode. Upto the client to recreate the table + // I think handling this case would be difficult without knowing the split keys + LOG.error("The table " + tableName + " could not be recovered. Try recreating the table."); + break; + case CREATED_REGIONINFO: + // Try forming the region infos from the tmp directory. Try creating the table once again + tempDir = this.fileSystemManager.getTempDir(); + fs = this.fileSystemManager.getFileSystem(); + + Path tempTableDir = new Path(tempDir, tableName); + infoExtractor = new RegionInfoExtractor(tempTableDir, fs, tableName); + regionInfos = infoExtractor.collectRegionInfos(); + LOG.debug("Actual no of regioninfos found in tabledir is "+regionInfos.size()); + // Remove those which are already assigned + currentHRegions = Arrays.asList(this.newRegions); + regionInfos.retainAll(currentHRegions); + LOG.debug("Regions to be processed is "+regionInfos.size()); + addToMetaAndBulkAssign(tableName, zkTable, regionInfos); + + // 8. Set the status in the zookeeper as ASSIGN_USER_REGIONS + // zkTable.setStatusAssignUserRegions(tableName); + + setEnabledState(tableName); + + + break; + case MOVED_TO_ORIG_LOCATION: + Path rootDir = this.fileSystemManager.getRootDir(); + fs = this.fileSystemManager.getFileSystem(); + try { + // Try forming the region infos from the table directory. Try + // creating the table once again + Path tableDir = HTableDescriptor.getTableDir(rootDir, Bytes.toBytes(tableName)); + infoExtractor = new RegionInfoExtractor(tableDir, fs, tableName); + regionInfos = infoExtractor.collectRegionInfos(); + LOG.debug("Actual no of regioninfos found in tabledir is "+regionInfos.size()); + // Remove those which are already assigned + currentHRegions = Arrays.asList(this.newRegions); + regionInfos.retainAll(currentHRegions); + LOG.debug("Regions to be processed is "+regionInfos.size()); + addToMetaAndBulkAssign(tableName, zkTable, regionInfos); + + // 8. Set the status in the zookeeper as ASSIGN_USER_REGIONS + //zkTable.setStatusAssignUserRegions(tableName); + + setEnabledState(tableName); + + } catch (IOException ioe) { + LOG.error("Cannot obtain the tabledescriptor from the tempdir for the table " + + tableName); + //cleanUpFailedCreation(); + } + break; + case ADDED_TO_META : + // Do the assignment of all the regions added to META + try { + regionInfos = MetaReader.getTableRegions(this.catalogTracker, Bytes.toBytes(tableName)); + LOG.debug("Actual no of regioninfos found in tabledir is "+regionInfos.size()); + // Remove those which are already assigned + currentHRegions = Arrays.asList(this.newRegions); + regionInfos.retainAll(currentHRegions); + LOG.debug("Regions to be processed is "+regionInfos.size()); + // 7. Trigger immediate assignment of the regions in round-robin + // fashion + try { + assignmentManager.getRegionStates().createRegionStates(regionInfos); + assignmentManager.assign(regionInfos); + } catch (InterruptedException e) { + LOG.error("Caught " + e + " during round-robin assignment"); + InterruptedIOException ie = new InterruptedIOException(e.getMessage()); + ie.initCause(e); + throw ie; + } + } catch (IOException e) { + LOG.error("Error while collecting the regions for the table "+tableName+" from META.", e); + } + //zkTable.setStatusAssignUserRegions(tableName); + setEnabledState(tableName); + break; + default: + throw new IllegalArgumentException("Invalid state from table status node for the table "+ tableName); + } + } catch (KeeperException e) { + LOG.error("Unable to get data from the znode ", e); + } + } + + private void cleanUpFailedCreation() { + releaseTableLock(); + this.assignmentManager.getZKTable().removeCreatingTable( + this.tableName); + } + @Override public String toString() { String name = "UnknownServerName"; @@ -137,27 +275,41 @@ name = server.getServerName().toString(); } return getClass().getSimpleName() + "-" + name + "-" + getSeqid() + "-" + - this.hTableDescriptor.getNameAsString(); + this.tableName; } @Override public void process() { - String tableName = this.hTableDescriptor.getNameAsString(); - LOG.info("Attempting to create the table " + tableName); + if (!this.isRecovery) { + String tableName = this.tableName; + LOG.info("Attempting to create the table " + tableName); - try { - MasterCoprocessorHost cpHost = ((HMaster) this.server).getCoprocessorHost(); - if (cpHost != null) { - cpHost.preCreateTableHandler(this.hTableDescriptor, this.newRegions); + try { + MasterCoprocessorHost cpHost = ((HMaster) this.server).getCoprocessorHost(); + if (cpHost != null) { + cpHost.preCreateTableHandler(this.hTableDescriptor, this.newRegions); + } + handleCreateTable(tableName); + completed(null); + if (cpHost != null) { + cpHost.postCreateTableHandler(this.hTableDescriptor, this.newRegions); + } + } catch (Throwable e) { + LOG.error("Error trying to create the table " + tableName, e); + completed(e); } - handleCreateTable(tableName); - completed(null); - if (cpHost != null) { - cpHost.postCreateTableHandler(this.hTableDescriptor, this.newRegions); + } else { + // This is called when the backup master comes alive and sees a table + // that was partially created + Throwable t = null; + try { + LOG.info("Found table "+tableName +" in partially created state"); + handlePartiallyCreatedTable(tableName); + } catch (Throwable e) { + t = e; + } finally { + completed(t); } - } catch (Throwable e) { - LOG.error("Error trying to create the table " + tableName, e); - completed(e); } } @@ -166,14 +318,21 @@ * @param exception null if process() is successful or not null if something has failed. */ protected void completed(final Throwable exception) { + // TODO: clean up the table status node also here + String tableName = this.tableName; + try { + this.assignmentManager.getZKTable().cleanUpTableStatusNode(tableName); + } catch (KeeperException e) { + LOG.error("Could not delete the table status znode for the table "+tableName); + } releaseTableLock(); if (exception != null) { // Try deleting the enabling node in case of error // If this does not happen then if the client tries to create the table // again with the same Active master // It will block the creation saying TableAlreadyExists. - this.assignmentManager.getZKTable().removeEnablingTable( - this.hTableDescriptor.getNameAsString()); + this.assignmentManager.getZKTable().removeCreatingTable( + tableName); } } @@ -197,23 +356,61 @@ // 1. Create Table Descriptor FSTableDescriptors.createTableDescriptor(fs, tempdir, this.hTableDescriptor); + + // 2. Set the status in the zookeeper as CREATED_TD + ZKTable zkTable = this.assignmentManager.getZKTable(); + zkTable.setStatusCreatedTableDescriptor(tableName); + Path tempTableDir = new Path(tempdir, tableName); Path tableDir = new Path(fileSystemManager.getRootDir(), tableName); - // 2. Create Regions + // 3. Create Regions List regionInfos = handleCreateHdfsRegions(tempdir, tableName); + + // 4. Set the status in the zookeeper as CREATED_REGIONINFO + zkTable.setStatusCreatedRegionInfo(tableName); - // 3. Move Table temp directory to the hbase root location + // 5. Move Table temp directory to the hbase root location if (!fs.rename(tempTableDir, tableDir)) { throw new IOException("Unable to move table from temp=" + tempTableDir + " to hbase root=" + tableDir); } + + // 6. Set the status in the zookeeper as MOVED_TO_ORIG_LOCATION + zkTable.setStatusMovedToOriginalLocation(tableName); + + // 7. Add to meta and assign the regions + addToMetaAndBulkAssign(tableName, zkTable, regionInfos); + + // 8. Set the status in the zookeeper as ASSIGN_USER_REGIONS + // This state may not be needed + //zkTable.setStatusAssignUserRegions(tableName); + + // 9. Set table enabled flag up in zk. + setEnabledState(tableName); + } + private void setEnabledState(String tableName) throws IOException { + try { + assignmentManager.getZKTable().setEnabledTable(tableName); + } catch (KeeperException e) { + throw new IOException("Unable to ensure that " + tableName + " will be" + + " enabled because of a ZooKeeper issue", e); + } + } + + private void addToMetaAndBulkAssign(String tableName, ZKTable zkTable, + List regionInfos) throws IOException, NoNodeException, KeeperException, + InterruptedIOException { + // Note that the regionInfo cannot be 0. The HMaster creation of HRegionInfo[] takes care of it. if (regionInfos != null && regionInfos.size() > 0) { - // 4. Add regions to META + // Add regions to META MetaEditor.addRegionsToMeta(this.catalogTracker, regionInfos); + + // Set the status in the zookeeper as ADD_TO_META + zkTable.setStatusToAddedToMeta(tableName); - // 5. Trigger immediate assignment of the regions in round-robin fashion + // Trigger immediate assignment of the regions in round-robin fashion try { assignmentManager.getRegionStates().createRegionStates(regionInfos); assignmentManager.assign(regionInfos); Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/RegionInfoExtractor.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/RegionInfoExtractor.java (revision 0) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/RegionInfoExtractor.java (working copy) @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.handler; + +import java.io.IOException; +import java.util.Comparator; +import java.util.LinkedList; +import java.util.List; +import java.util.TreeSet; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.PathFilter; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.master.AssignmentManager; +import org.apache.hadoop.hbase.regionserver.HRegionFileSystem; +import org.apache.hadoop.hbase.util.Bytes; + +public class RegionInfoExtractor { + + private final Path path; + private final FileSystem fs; + private final String tableName; + private static final Log LOG = LogFactory.getLog(RegionInfoExtractor.class); + private final PathFilter regionInfoFilter = new PathFilter() { + + @Override + public boolean accept(Path path) { + if (!path.getName().endsWith(".regioninfo")) { + return false; + } + return true; + } + }; + + public RegionInfoExtractor(Path path, FileSystem fs, String tableName) { + this.path = path; + this.fs = fs; + this.tableName = tableName; + } + + public List collectRegionInfos() throws IOException { + TreeSet hris = new TreeSet(new RegionInfoComparatorBasedOnStartKey()); + FileStatus[] listStatus = fs.listStatus(this.path); + for (FileStatus regionDir : listStatus) { + if (regionDir.isDir()) { + HRegionInfo hri = HRegionFileSystem.loadRegionInfoFileContent(fs, regionDir.getPath()); + LOG.debug("The hri found in the regioninfo file in the path " + regionDir.getPath() + + " is " + hri.getRegionNameAsString()); + hris.add(hri); + } + } + + List hriList= new LinkedList(); + hriList.addAll(hris); + return hriList; + } + + private static class RegionInfoComparatorBasedOnStartKey implements Comparator { + @Override + public int compare(HRegionInfo l, HRegionInfo r) { + int compareTo = Bytes.compareTo(l.getStartKey(), r.getStartKey()); + if (compareTo < 0) + return -1; + if (compareTo > 0) + return 1; + return 0; + } + } + +} Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java (revision 1459735) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java (working copy) @@ -1482,7 +1482,7 @@ this.executorService.submit(new CreateTableHandler(this, this.fileSystemManager, hTableDescriptor, conf, - newRegions, this).prepare()); + newRegions, this, false, hTableDescriptor.getNameAsString()).prepare()); if (cpHost != null) { cpHost.postCreateTable(hTableDescriptor, newRegions); } Index: hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/CloneSnapshotHandler.java =================================================================== --- hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/CloneSnapshotHandler.java (revision 1459735) +++ hbase-server/src/main/java/org/apache/hadoop/hbase/master/snapshot/CloneSnapshotHandler.java (working copy) @@ -67,7 +67,7 @@ final SnapshotDescription snapshot, final HTableDescriptor hTableDescriptor) throws NotAllMetaRegionsOnlineException, TableExistsException, IOException { super(masterServices, masterServices.getMasterFileSystem(), hTableDescriptor, - masterServices.getConfiguration(), null, masterServices); + masterServices.getConfiguration(), null, masterServices, false, hTableDescriptor.getNameAsString()); // Snapshot information this.snapshot = snapshot;