diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/HRegionInfo.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/HRegionInfo.java index 3c7b2ce..175515c 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/HRegionInfo.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/HRegionInfo.java @@ -988,7 +988,7 @@ public class HRegionInfo implements Comparable { } /** - * Convert a HRegionInfo to a RegionInfo + * Convert a HRegionInfo to the protobuf RegionInfo * * @return the converted RegionInfo */ diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/replication/ReplicationQueuesZKImpl.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/replication/ReplicationQueuesZKImpl.java index 6a30511..0a6ba44 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/replication/ReplicationQueuesZKImpl.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/replication/ReplicationQueuesZKImpl.java @@ -168,7 +168,7 @@ public class ReplicationQueuesZKImpl extends ReplicationStateZKBase implements R SortedMap> newQueues = new TreeMap>(); // check whether there is multi support. If yes, use it. if (conf.getBoolean(HConstants.ZOOKEEPER_USEMULTI, true)) { - LOG.info("Atomically moving " + regionserverZnode + "'s wals to my queue"); + LOG.info("Atomically moving " + regionserverZnode + "'s WALs to my queue"); newQueues = copyQueuesFromRSUsingMulti(regionserverZnode); } else { LOG.info("Moving " + regionserverZnode + "'s wals to my queue"); @@ -336,9 +336,9 @@ public class ReplicationQueuesZKImpl extends ReplicationStateZKBase implements R } // add delete op for dead rs listOfOps.add(ZKUtilOp.deleteNodeFailSilent(deadRSZnodePath)); - LOG.debug(" The multi list size is: " + listOfOps.size()); + if (LOG.isTraceEnabled()) LOG.trace(" The multi list size is: " + listOfOps.size()); ZKUtil.multiOrSequential(this.zookeeper, listOfOps, false); - LOG.info("Atomically moved the dead regionserver logs. "); + if (LOG.isTraceEnabled()) LOG.trace("Atomically moved the dead regionserver logs. "); } catch (KeeperException e) { // Multi call failed; it looks like some other regionserver took away the logs. LOG.warn("Got exception in copyQueuesFromRSUsingMulti: ", e); diff --git a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/MetaTableLocator.java b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/MetaTableLocator.java index 54b6ef4..0975c14 100644 --- a/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/MetaTableLocator.java +++ b/hbase-client/src/main/java/org/apache/hadoop/hbase/zookeeper/MetaTableLocator.java @@ -17,21 +17,28 @@ */ package org.apache.hadoop.hbase.zookeeper; -import com.google.common.base.Stopwatch; -import com.google.protobuf.InvalidProtocolBufferException; +import java.io.EOFException; +import java.io.IOException; +import java.net.ConnectException; +import java.net.NoRouteToHostException; +import java.net.SocketException; +import java.net.SocketTimeoutException; +import java.rmi.UnknownHostException; +import java.util.ArrayList; +import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; import org.apache.hadoop.hbase.NotAllMetaRegionsOnlineException; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.client.HConnection; import org.apache.hadoop.hbase.client.RegionReplicaUtil; import org.apache.hadoop.hbase.client.RetriesExhaustedException; import org.apache.hadoop.hbase.exceptions.DeserializationException; -import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.ipc.FailedServerException; import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException; import org.apache.hadoop.hbase.master.RegionState; @@ -47,18 +54,8 @@ import org.apache.hadoop.hbase.util.Pair; import org.apache.hadoop.ipc.RemoteException; import org.apache.zookeeper.KeeperException; -import java.io.EOFException; -import java.io.IOException; -import java.net.ConnectException; -import java.net.NoRouteToHostException; -import java.net.SocketException; -import java.net.SocketTimeoutException; -import java.rmi.UnknownHostException; - -import java.util.List; -import java.util.ArrayList; - -import javax.annotation.Nullable; +import com.google.common.base.Stopwatch; +import com.google.protobuf.InvalidProtocolBufferException; /** * Utility class to perform operation (get/wait for/verify/set/delete) on znode in ZooKeeper @@ -620,4 +617,4 @@ public class MetaTableLocator { stopped = true; } } -} +} \ No newline at end of file diff --git a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java index c59071c..ff54ebe 100644 --- a/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java +++ b/hbase-common/src/main/java/org/apache/hadoop/hbase/util/Bytes.java @@ -1186,7 +1186,7 @@ public class Bytes { * @param offset Offset into array at which vint begins. * @throws java.io.IOException e * @return deserialized long from buffer. - * @deprecated Use {@link #readAsVLong()} instead. + * @deprecated Use {@link #readAsVLong(byte[], int)} instead. */ @Deprecated public static long readVLong(final byte [] buffer, final int offset) diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java index 338fcad..5fbd470 100644 --- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java +++ b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/Procedure.java @@ -168,6 +168,16 @@ public abstract class Procedure implements Comparable { // no-op } + /** + * By default, the executor will run procedures start to finish. Return true to make the executor + * yield between each flow step to give other procedures time to run their flow steps. + * @return Return true if the executor should yield on completion of a flow state step. + * Defaults to return false. + */ + protected boolean isYieldAfterSuccessfulFlowStateStep() { + return false; + } + @Override public String toString() { StringBuilder sb = new StringBuilder(); @@ -677,4 +687,4 @@ public abstract class Procedure implements Comparable { return proc; } -} \ No newline at end of file +} diff --git a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.java b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.java index 6e87997..6dbe5c6 100644 --- a/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.java +++ b/hbase-procedure/src/main/java/org/apache/hadoop/hbase/procedure2/ProcedureExecutor.java @@ -148,8 +148,8 @@ public class ProcedureExecutor { public void periodicExecute(final TEnvironment env) { if (completed.isEmpty()) { - if (LOG.isDebugEnabled()) { - LOG.debug("No completed procedures to cleanup."); + if (LOG.isTraceEnabled()) { + LOG.trace("No completed procedures to cleanup."); } return; } @@ -1094,4 +1094,4 @@ public class ProcedureExecutor { } return new ProcedureResult(proc.getStartTime(), proc.getLastUpdate(), proc.getResult()); } -} \ No newline at end of file +} diff --git a/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/MasterProcedureProtos.java b/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/MasterProcedureProtos.java index e0a4775..0a44199 100644 --- a/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/MasterProcedureProtos.java +++ b/hbase-protocol/src/main/java/org/apache/hadoop/hbase/protobuf/generated/MasterProcedureProtos.java @@ -1070,6 +1070,160 @@ public final class MasterProcedureProtos { // @@protoc_insertion_point(enum_scope:DisableTableState) } + /** + * Protobuf enum {@code ServerCrashState} + */ + public enum ServerCrashState + implements com.google.protobuf.ProtocolMessageEnum { + /** + * SERVER_CRASH_START = 1; + */ + SERVER_CRASH_START(0, 1), + /** + * SERVER_CRASH_PROCESS_META = 2; + */ + SERVER_CRASH_PROCESS_META(1, 2), + /** + * SERVER_CRASH_GET_REGIONS = 3; + */ + SERVER_CRASH_GET_REGIONS(2, 3), + /** + * SERVER_CRASH_NO_SPLIT_LOGS = 4; + */ + SERVER_CRASH_NO_SPLIT_LOGS(3, 4), + /** + * SERVER_CRASH_SPLIT_LOGS = 5; + */ + SERVER_CRASH_SPLIT_LOGS(4, 5), + /** + * SERVER_CRASH_PREPARE_LOG_REPLAY = 6; + */ + SERVER_CRASH_PREPARE_LOG_REPLAY(5, 6), + /** + * SERVER_CRASH_CALC_REGIONS_TO_ASSIGN = 7; + */ + SERVER_CRASH_CALC_REGIONS_TO_ASSIGN(6, 7), + /** + * SERVER_CRASH_ASSIGN = 8; + */ + SERVER_CRASH_ASSIGN(7, 8), + /** + * SERVER_CRASH_WAIT_ON_ASSIGN = 9; + */ + SERVER_CRASH_WAIT_ON_ASSIGN(8, 9), + /** + * SERVER_CRASH_FINISH = 100; + */ + SERVER_CRASH_FINISH(9, 100), + ; + + /** + * SERVER_CRASH_START = 1; + */ + public static final int SERVER_CRASH_START_VALUE = 1; + /** + * SERVER_CRASH_PROCESS_META = 2; + */ + public static final int SERVER_CRASH_PROCESS_META_VALUE = 2; + /** + * SERVER_CRASH_GET_REGIONS = 3; + */ + public static final int SERVER_CRASH_GET_REGIONS_VALUE = 3; + /** + * SERVER_CRASH_NO_SPLIT_LOGS = 4; + */ + public static final int SERVER_CRASH_NO_SPLIT_LOGS_VALUE = 4; + /** + * SERVER_CRASH_SPLIT_LOGS = 5; + */ + public static final int SERVER_CRASH_SPLIT_LOGS_VALUE = 5; + /** + * SERVER_CRASH_PREPARE_LOG_REPLAY = 6; + */ + public static final int SERVER_CRASH_PREPARE_LOG_REPLAY_VALUE = 6; + /** + * SERVER_CRASH_CALC_REGIONS_TO_ASSIGN = 7; + */ + public static final int SERVER_CRASH_CALC_REGIONS_TO_ASSIGN_VALUE = 7; + /** + * SERVER_CRASH_ASSIGN = 8; + */ + public static final int SERVER_CRASH_ASSIGN_VALUE = 8; + /** + * SERVER_CRASH_WAIT_ON_ASSIGN = 9; + */ + public static final int SERVER_CRASH_WAIT_ON_ASSIGN_VALUE = 9; + /** + * SERVER_CRASH_FINISH = 100; + */ + public static final int SERVER_CRASH_FINISH_VALUE = 100; + + + public final int getNumber() { return value; } + + public static ServerCrashState valueOf(int value) { + switch (value) { + case 1: return SERVER_CRASH_START; + case 2: return SERVER_CRASH_PROCESS_META; + case 3: return SERVER_CRASH_GET_REGIONS; + case 4: return SERVER_CRASH_NO_SPLIT_LOGS; + case 5: return SERVER_CRASH_SPLIT_LOGS; + case 6: return SERVER_CRASH_PREPARE_LOG_REPLAY; + case 7: return SERVER_CRASH_CALC_REGIONS_TO_ASSIGN; + case 8: return SERVER_CRASH_ASSIGN; + case 9: return SERVER_CRASH_WAIT_ON_ASSIGN; + case 100: return SERVER_CRASH_FINISH; + default: return null; + } + } + + public static com.google.protobuf.Internal.EnumLiteMap + internalGetValueMap() { + return internalValueMap; + } + private static com.google.protobuf.Internal.EnumLiteMap + internalValueMap = + new com.google.protobuf.Internal.EnumLiteMap() { + public ServerCrashState findValueByNumber(int number) { + return ServerCrashState.valueOf(number); + } + }; + + public final com.google.protobuf.Descriptors.EnumValueDescriptor + getValueDescriptor() { + return getDescriptor().getValues().get(index); + } + public final com.google.protobuf.Descriptors.EnumDescriptor + getDescriptorForType() { + return getDescriptor(); + } + public static final com.google.protobuf.Descriptors.EnumDescriptor + getDescriptor() { + return org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.getDescriptor().getEnumTypes().get(9); + } + + private static final ServerCrashState[] VALUES = values(); + + public static ServerCrashState valueOf( + com.google.protobuf.Descriptors.EnumValueDescriptor desc) { + if (desc.getType() != getDescriptor()) { + throw new java.lang.IllegalArgumentException( + "EnumValueDescriptor is not for this type."); + } + return VALUES[desc.getIndex()]; + } + + private final int index; + private final int value; + + private ServerCrashState(int index, int value) { + this.index = index; + this.value = value; + } + + // @@protoc_insertion_point(enum_scope:ServerCrashState) + } + public interface CreateTableStateDataOrBuilder extends com.google.protobuf.MessageOrBuilder { @@ -11200,181 +11354,1791 @@ public final class MasterProcedureProtos { // @@protoc_insertion_point(class_scope:DisableTableStateData) } - private static com.google.protobuf.Descriptors.Descriptor - internal_static_CreateTableStateData_descriptor; - private static - com.google.protobuf.GeneratedMessage.FieldAccessorTable - internal_static_CreateTableStateData_fieldAccessorTable; - private static com.google.protobuf.Descriptors.Descriptor - internal_static_ModifyTableStateData_descriptor; - private static - com.google.protobuf.GeneratedMessage.FieldAccessorTable - internal_static_ModifyTableStateData_fieldAccessorTable; - private static com.google.protobuf.Descriptors.Descriptor - internal_static_TruncateTableStateData_descriptor; - private static - com.google.protobuf.GeneratedMessage.FieldAccessorTable - internal_static_TruncateTableStateData_fieldAccessorTable; - private static com.google.protobuf.Descriptors.Descriptor - internal_static_DeleteTableStateData_descriptor; - private static - com.google.protobuf.GeneratedMessage.FieldAccessorTable - internal_static_DeleteTableStateData_fieldAccessorTable; - private static com.google.protobuf.Descriptors.Descriptor - internal_static_AddColumnFamilyStateData_descriptor; - private static - com.google.protobuf.GeneratedMessage.FieldAccessorTable - internal_static_AddColumnFamilyStateData_fieldAccessorTable; - private static com.google.protobuf.Descriptors.Descriptor - internal_static_ModifyColumnFamilyStateData_descriptor; - private static - com.google.protobuf.GeneratedMessage.FieldAccessorTable - internal_static_ModifyColumnFamilyStateData_fieldAccessorTable; - private static com.google.protobuf.Descriptors.Descriptor - internal_static_DeleteColumnFamilyStateData_descriptor; - private static - com.google.protobuf.GeneratedMessage.FieldAccessorTable - internal_static_DeleteColumnFamilyStateData_fieldAccessorTable; - private static com.google.protobuf.Descriptors.Descriptor - internal_static_EnableTableStateData_descriptor; - private static - com.google.protobuf.GeneratedMessage.FieldAccessorTable - internal_static_EnableTableStateData_fieldAccessorTable; - private static com.google.protobuf.Descriptors.Descriptor - internal_static_DisableTableStateData_descriptor; - private static - com.google.protobuf.GeneratedMessage.FieldAccessorTable - internal_static_DisableTableStateData_fieldAccessorTable; + public interface ServerCrashStateDataOrBuilder + extends com.google.protobuf.MessageOrBuilder { - public static com.google.protobuf.Descriptors.FileDescriptor - getDescriptor() { - return descriptor; + // required .ServerName server_name = 1; + /** + * required .ServerName server_name = 1; + */ + boolean hasServerName(); + /** + * required .ServerName server_name = 1; + */ + org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName getServerName(); + /** + * required .ServerName server_name = 1; + */ + org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerNameOrBuilder getServerNameOrBuilder(); + + // optional bool distributed_log_replay = 2; + /** + * optional bool distributed_log_replay = 2; + */ + boolean hasDistributedLogReplay(); + /** + * optional bool distributed_log_replay = 2; + */ + boolean getDistributedLogReplay(); + + // repeated .RegionInfo regions_on_crashed_server = 3; + /** + * repeated .RegionInfo regions_on_crashed_server = 3; + */ + java.util.List + getRegionsOnCrashedServerList(); + /** + * repeated .RegionInfo regions_on_crashed_server = 3; + */ + org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo getRegionsOnCrashedServer(int index); + /** + * repeated .RegionInfo regions_on_crashed_server = 3; + */ + int getRegionsOnCrashedServerCount(); + /** + * repeated .RegionInfo regions_on_crashed_server = 3; + */ + java.util.List + getRegionsOnCrashedServerOrBuilderList(); + /** + * repeated .RegionInfo regions_on_crashed_server = 3; + */ + org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfoOrBuilder getRegionsOnCrashedServerOrBuilder( + int index); + + // repeated .RegionInfo regions_to_assign = 4; + /** + * repeated .RegionInfo regions_to_assign = 4; + */ + java.util.List + getRegionsToAssignList(); + /** + * repeated .RegionInfo regions_to_assign = 4; + */ + org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo getRegionsToAssign(int index); + /** + * repeated .RegionInfo regions_to_assign = 4; + */ + int getRegionsToAssignCount(); + /** + * repeated .RegionInfo regions_to_assign = 4; + */ + java.util.List + getRegionsToAssignOrBuilderList(); + /** + * repeated .RegionInfo regions_to_assign = 4; + */ + org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfoOrBuilder getRegionsToAssignOrBuilder( + int index); + + // optional bool carrying_meta = 5; + /** + * optional bool carrying_meta = 5; + */ + boolean hasCarryingMeta(); + /** + * optional bool carrying_meta = 5; + */ + boolean getCarryingMeta(); + + // optional bool should_split_wal = 6 [default = true]; + /** + * optional bool should_split_wal = 6 [default = true]; + */ + boolean hasShouldSplitWal(); + /** + * optional bool should_split_wal = 6 [default = true]; + */ + boolean getShouldSplitWal(); } - private static com.google.protobuf.Descriptors.FileDescriptor - descriptor; - static { - java.lang.String[] descriptorData = { - "\n\025MasterProcedure.proto\032\013HBase.proto\032\tRP" + - "C.proto\"\201\001\n\024CreateTableStateData\022#\n\tuser" + - "_info\030\001 \002(\0132\020.UserInformation\022\"\n\014table_s" + - "chema\030\002 \002(\0132\014.TableSchema\022 \n\013region_info" + - "\030\003 \003(\0132\013.RegionInfo\"\277\001\n\024ModifyTableState" + - "Data\022#\n\tuser_info\030\001 \002(\0132\020.UserInformatio" + - "n\022-\n\027unmodified_table_schema\030\002 \001(\0132\014.Tab" + - "leSchema\022+\n\025modified_table_schema\030\003 \002(\0132" + - "\014.TableSchema\022&\n\036delete_column_family_in" + - "_modify\030\004 \002(\010\"\274\001\n\026TruncateTableStateData", - "\022#\n\tuser_info\030\001 \002(\0132\020.UserInformation\022\027\n" + - "\017preserve_splits\030\002 \002(\010\022\036\n\ntable_name\030\003 \001" + - "(\0132\n.TableName\022\"\n\014table_schema\030\004 \001(\0132\014.T" + - "ableSchema\022 \n\013region_info\030\005 \003(\0132\013.Region" + - "Info\"}\n\024DeleteTableStateData\022#\n\tuser_inf" + - "o\030\001 \002(\0132\020.UserInformation\022\036\n\ntable_name\030" + - "\002 \002(\0132\n.TableName\022 \n\013region_info\030\003 \003(\0132\013" + - ".RegionInfo\"\300\001\n\030AddColumnFamilyStateData" + - "\022#\n\tuser_info\030\001 \002(\0132\020.UserInformation\022\036\n" + - "\ntable_name\030\002 \002(\0132\n.TableName\0220\n\023columnf", - "amily_schema\030\003 \002(\0132\023.ColumnFamilySchema\022" + - "-\n\027unmodified_table_schema\030\004 \001(\0132\014.Table" + - "Schema\"\303\001\n\033ModifyColumnFamilyStateData\022#" + - "\n\tuser_info\030\001 \002(\0132\020.UserInformation\022\036\n\nt" + - "able_name\030\002 \002(\0132\n.TableName\0220\n\023columnfam" + - "ily_schema\030\003 \002(\0132\023.ColumnFamilySchema\022-\n" + - "\027unmodified_table_schema\030\004 \001(\0132\014.TableSc" + - "hema\"\254\001\n\033DeleteColumnFamilyStateData\022#\n\t" + - "user_info\030\001 \002(\0132\020.UserInformation\022\036\n\ntab" + - "le_name\030\002 \002(\0132\n.TableName\022\031\n\021columnfamil", - "y_name\030\003 \002(\014\022-\n\027unmodified_table_schema\030" + - "\004 \001(\0132\014.TableSchema\"{\n\024EnableTableStateD" + - "ata\022#\n\tuser_info\030\001 \002(\0132\020.UserInformation" + - "\022\036\n\ntable_name\030\002 \002(\0132\n.TableName\022\036\n\026skip" + - "_table_state_check\030\003 \002(\010\"|\n\025DisableTable" + - "StateData\022#\n\tuser_info\030\001 \002(\0132\020.UserInfor" + - "mation\022\036\n\ntable_name\030\002 \002(\0132\n.TableName\022\036" + - "\n\026skip_table_state_check\030\003 \002(\010*\330\001\n\020Creat" + - "eTableState\022\036\n\032CREATE_TABLE_PRE_OPERATIO" + - "N\020\001\022 \n\034CREATE_TABLE_WRITE_FS_LAYOUT\020\002\022\034\n", - "\030CREATE_TABLE_ADD_TO_META\020\003\022\037\n\033CREATE_TA" + - "BLE_ASSIGN_REGIONS\020\004\022\"\n\036CREATE_TABLE_UPD" + - "ATE_DESC_CACHE\020\005\022\037\n\033CREATE_TABLE_POST_OP" + - "ERATION\020\006*\207\002\n\020ModifyTableState\022\030\n\024MODIFY" + - "_TABLE_PREPARE\020\001\022\036\n\032MODIFY_TABLE_PRE_OPE" + - "RATION\020\002\022(\n$MODIFY_TABLE_UPDATE_TABLE_DE" + - "SCRIPTOR\020\003\022&\n\"MODIFY_TABLE_REMOVE_REPLIC" + - "A_COLUMN\020\004\022!\n\035MODIFY_TABLE_DELETE_FS_LAY" + - "OUT\020\005\022\037\n\033MODIFY_TABLE_POST_OPERATION\020\006\022#" + - "\n\037MODIFY_TABLE_REOPEN_ALL_REGIONS\020\007*\212\002\n\022", - "TruncateTableState\022 \n\034TRUNCATE_TABLE_PRE" + - "_OPERATION\020\001\022#\n\037TRUNCATE_TABLE_REMOVE_FR" + - "OM_META\020\002\022\"\n\036TRUNCATE_TABLE_CLEAR_FS_LAY" + - "OUT\020\003\022#\n\037TRUNCATE_TABLE_CREATE_FS_LAYOUT" + - "\020\004\022\036\n\032TRUNCATE_TABLE_ADD_TO_META\020\005\022!\n\035TR" + - "UNCATE_TABLE_ASSIGN_REGIONS\020\006\022!\n\035TRUNCAT" + - "E_TABLE_POST_OPERATION\020\007*\337\001\n\020DeleteTable" + - "State\022\036\n\032DELETE_TABLE_PRE_OPERATION\020\001\022!\n" + - "\035DELETE_TABLE_REMOVE_FROM_META\020\002\022 \n\034DELE" + - "TE_TABLE_CLEAR_FS_LAYOUT\020\003\022\"\n\036DELETE_TAB", - "LE_UPDATE_DESC_CACHE\020\004\022!\n\035DELETE_TABLE_U" + - "NASSIGN_REGIONS\020\005\022\037\n\033DELETE_TABLE_POST_O" + - "PERATION\020\006*\331\001\n\024AddColumnFamilyState\022\035\n\031A" + - "DD_COLUMN_FAMILY_PREPARE\020\001\022#\n\037ADD_COLUMN" + - "_FAMILY_PRE_OPERATION\020\002\022-\n)ADD_COLUMN_FA" + - "MILY_UPDATE_TABLE_DESCRIPTOR\020\003\022$\n ADD_CO" + - "LUMN_FAMILY_POST_OPERATION\020\004\022(\n$ADD_COLU" + - "MN_FAMILY_REOPEN_ALL_REGIONS\020\005*\353\001\n\027Modif" + - "yColumnFamilyState\022 \n\034MODIFY_COLUMN_FAMI" + - "LY_PREPARE\020\001\022&\n\"MODIFY_COLUMN_FAMILY_PRE", - "_OPERATION\020\002\0220\n,MODIFY_COLUMN_FAMILY_UPD" + - "ATE_TABLE_DESCRIPTOR\020\003\022\'\n#MODIFY_COLUMN_" + - "FAMILY_POST_OPERATION\020\004\022+\n\'MODIFY_COLUMN" + - "_FAMILY_REOPEN_ALL_REGIONS\020\005*\226\002\n\027DeleteC" + - "olumnFamilyState\022 \n\034DELETE_COLUMN_FAMILY" + - "_PREPARE\020\001\022&\n\"DELETE_COLUMN_FAMILY_PRE_O" + - "PERATION\020\002\0220\n,DELETE_COLUMN_FAMILY_UPDAT" + - "E_TABLE_DESCRIPTOR\020\003\022)\n%DELETE_COLUMN_FA" + - "MILY_DELETE_FS_LAYOUT\020\004\022\'\n#DELETE_COLUMN" + - "_FAMILY_POST_OPERATION\020\005\022+\n\'DELETE_COLUM", - "N_FAMILY_REOPEN_ALL_REGIONS\020\006*\350\001\n\020Enable" + - "TableState\022\030\n\024ENABLE_TABLE_PREPARE\020\001\022\036\n\032" + - "ENABLE_TABLE_PRE_OPERATION\020\002\022)\n%ENABLE_T" + - "ABLE_SET_ENABLING_TABLE_STATE\020\003\022$\n ENABL" + - "E_TABLE_MARK_REGIONS_ONLINE\020\004\022(\n$ENABLE_" + - "TABLE_SET_ENABLED_TABLE_STATE\020\005\022\037\n\033ENABL" + - "E_TABLE_POST_OPERATION\020\006*\362\001\n\021DisableTabl" + - "eState\022\031\n\025DISABLE_TABLE_PREPARE\020\001\022\037\n\033DIS" + - "ABLE_TABLE_PRE_OPERATION\020\002\022+\n\'DISABLE_TA" + - "BLE_SET_DISABLING_TABLE_STATE\020\003\022&\n\"DISAB", - "LE_TABLE_MARK_REGIONS_OFFLINE\020\004\022*\n&DISAB" + - "LE_TABLE_SET_DISABLED_TABLE_STATE\020\005\022 \n\034D" + - "ISABLE_TABLE_POST_OPERATION\020\006BK\n*org.apa" + - "che.hadoop.hbase.protobuf.generatedB\025Mas" + - "terProcedureProtosH\001\210\001\001\240\001\001" - }; - com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner = - new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() { - public com.google.protobuf.ExtensionRegistry assignDescriptors( - com.google.protobuf.Descriptors.FileDescriptor root) { - descriptor = root; - internal_static_CreateTableStateData_descriptor = - getDescriptor().getMessageTypes().get(0); - internal_static_CreateTableStateData_fieldAccessorTable = new - com.google.protobuf.GeneratedMessage.FieldAccessorTable( - internal_static_CreateTableStateData_descriptor, - new java.lang.String[] { "UserInfo", "TableSchema", "RegionInfo", }); - internal_static_ModifyTableStateData_descriptor = - getDescriptor().getMessageTypes().get(1); - internal_static_ModifyTableStateData_fieldAccessorTable = new - com.google.protobuf.GeneratedMessage.FieldAccessorTable( - internal_static_ModifyTableStateData_descriptor, - new java.lang.String[] { "UserInfo", "UnmodifiedTableSchema", "ModifiedTableSchema", "DeleteColumnFamilyInModify", }); - internal_static_TruncateTableStateData_descriptor = - getDescriptor().getMessageTypes().get(2); - internal_static_TruncateTableStateData_fieldAccessorTable = new - com.google.protobuf.GeneratedMessage.FieldAccessorTable( - internal_static_TruncateTableStateData_descriptor, - new java.lang.String[] { "UserInfo", "PreserveSplits", "TableName", "TableSchema", "RegionInfo", }); - internal_static_DeleteTableStateData_descriptor = - getDescriptor().getMessageTypes().get(3); + /** + * Protobuf type {@code ServerCrashStateData} + */ + public static final class ServerCrashStateData extends + com.google.protobuf.GeneratedMessage + implements ServerCrashStateDataOrBuilder { + // Use ServerCrashStateData.newBuilder() to construct. + private ServerCrashStateData(com.google.protobuf.GeneratedMessage.Builder builder) { + super(builder); + this.unknownFields = builder.getUnknownFields(); + } + private ServerCrashStateData(boolean noInit) { this.unknownFields = com.google.protobuf.UnknownFieldSet.getDefaultInstance(); } + + private static final ServerCrashStateData defaultInstance; + public static ServerCrashStateData getDefaultInstance() { + return defaultInstance; + } + + public ServerCrashStateData getDefaultInstanceForType() { + return defaultInstance; + } + + private final com.google.protobuf.UnknownFieldSet unknownFields; + @java.lang.Override + public final com.google.protobuf.UnknownFieldSet + getUnknownFields() { + return this.unknownFields; + } + private ServerCrashStateData( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + initFields(); + int mutable_bitField0_ = 0; + com.google.protobuf.UnknownFieldSet.Builder unknownFields = + com.google.protobuf.UnknownFieldSet.newBuilder(); + try { + boolean done = false; + while (!done) { + int tag = input.readTag(); + switch (tag) { + case 0: + done = true; + break; + default: { + if (!parseUnknownField(input, unknownFields, + extensionRegistry, tag)) { + done = true; + } + break; + } + case 10: { + org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName.Builder subBuilder = null; + if (((bitField0_ & 0x00000001) == 0x00000001)) { + subBuilder = serverName_.toBuilder(); + } + serverName_ = input.readMessage(org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName.PARSER, extensionRegistry); + if (subBuilder != null) { + subBuilder.mergeFrom(serverName_); + serverName_ = subBuilder.buildPartial(); + } + bitField0_ |= 0x00000001; + break; + } + case 16: { + bitField0_ |= 0x00000002; + distributedLogReplay_ = input.readBool(); + break; + } + case 26: { + if (!((mutable_bitField0_ & 0x00000004) == 0x00000004)) { + regionsOnCrashedServer_ = new java.util.ArrayList(); + mutable_bitField0_ |= 0x00000004; + } + regionsOnCrashedServer_.add(input.readMessage(org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo.PARSER, extensionRegistry)); + break; + } + case 34: { + if (!((mutable_bitField0_ & 0x00000008) == 0x00000008)) { + regionsToAssign_ = new java.util.ArrayList(); + mutable_bitField0_ |= 0x00000008; + } + regionsToAssign_.add(input.readMessage(org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo.PARSER, extensionRegistry)); + break; + } + case 40: { + bitField0_ |= 0x00000004; + carryingMeta_ = input.readBool(); + break; + } + case 48: { + bitField0_ |= 0x00000008; + shouldSplitWal_ = input.readBool(); + break; + } + } + } + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + throw e.setUnfinishedMessage(this); + } catch (java.io.IOException e) { + throw new com.google.protobuf.InvalidProtocolBufferException( + e.getMessage()).setUnfinishedMessage(this); + } finally { + if (((mutable_bitField0_ & 0x00000004) == 0x00000004)) { + regionsOnCrashedServer_ = java.util.Collections.unmodifiableList(regionsOnCrashedServer_); + } + if (((mutable_bitField0_ & 0x00000008) == 0x00000008)) { + regionsToAssign_ = java.util.Collections.unmodifiableList(regionsToAssign_); + } + this.unknownFields = unknownFields.build(); + makeExtensionsImmutable(); + } + } + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.internal_static_ServerCrashStateData_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.internal_static_ServerCrashStateData_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.ServerCrashStateData.class, org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.ServerCrashStateData.Builder.class); + } + + public static com.google.protobuf.Parser PARSER = + new com.google.protobuf.AbstractParser() { + public ServerCrashStateData parsePartialFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return new ServerCrashStateData(input, extensionRegistry); + } + }; + + @java.lang.Override + public com.google.protobuf.Parser getParserForType() { + return PARSER; + } + + private int bitField0_; + // required .ServerName server_name = 1; + public static final int SERVER_NAME_FIELD_NUMBER = 1; + private org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName serverName_; + /** + * required .ServerName server_name = 1; + */ + public boolean hasServerName() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + /** + * required .ServerName server_name = 1; + */ + public org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName getServerName() { + return serverName_; + } + /** + * required .ServerName server_name = 1; + */ + public org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerNameOrBuilder getServerNameOrBuilder() { + return serverName_; + } + + // optional bool distributed_log_replay = 2; + public static final int DISTRIBUTED_LOG_REPLAY_FIELD_NUMBER = 2; + private boolean distributedLogReplay_; + /** + * optional bool distributed_log_replay = 2; + */ + public boolean hasDistributedLogReplay() { + return ((bitField0_ & 0x00000002) == 0x00000002); + } + /** + * optional bool distributed_log_replay = 2; + */ + public boolean getDistributedLogReplay() { + return distributedLogReplay_; + } + + // repeated .RegionInfo regions_on_crashed_server = 3; + public static final int REGIONS_ON_CRASHED_SERVER_FIELD_NUMBER = 3; + private java.util.List regionsOnCrashedServer_; + /** + * repeated .RegionInfo regions_on_crashed_server = 3; + */ + public java.util.List getRegionsOnCrashedServerList() { + return regionsOnCrashedServer_; + } + /** + * repeated .RegionInfo regions_on_crashed_server = 3; + */ + public java.util.List + getRegionsOnCrashedServerOrBuilderList() { + return regionsOnCrashedServer_; + } + /** + * repeated .RegionInfo regions_on_crashed_server = 3; + */ + public int getRegionsOnCrashedServerCount() { + return regionsOnCrashedServer_.size(); + } + /** + * repeated .RegionInfo regions_on_crashed_server = 3; + */ + public org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo getRegionsOnCrashedServer(int index) { + return regionsOnCrashedServer_.get(index); + } + /** + * repeated .RegionInfo regions_on_crashed_server = 3; + */ + public org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfoOrBuilder getRegionsOnCrashedServerOrBuilder( + int index) { + return regionsOnCrashedServer_.get(index); + } + + // repeated .RegionInfo regions_to_assign = 4; + public static final int REGIONS_TO_ASSIGN_FIELD_NUMBER = 4; + private java.util.List regionsToAssign_; + /** + * repeated .RegionInfo regions_to_assign = 4; + */ + public java.util.List getRegionsToAssignList() { + return regionsToAssign_; + } + /** + * repeated .RegionInfo regions_to_assign = 4; + */ + public java.util.List + getRegionsToAssignOrBuilderList() { + return regionsToAssign_; + } + /** + * repeated .RegionInfo regions_to_assign = 4; + */ + public int getRegionsToAssignCount() { + return regionsToAssign_.size(); + } + /** + * repeated .RegionInfo regions_to_assign = 4; + */ + public org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo getRegionsToAssign(int index) { + return regionsToAssign_.get(index); + } + /** + * repeated .RegionInfo regions_to_assign = 4; + */ + public org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfoOrBuilder getRegionsToAssignOrBuilder( + int index) { + return regionsToAssign_.get(index); + } + + // optional bool carrying_meta = 5; + public static final int CARRYING_META_FIELD_NUMBER = 5; + private boolean carryingMeta_; + /** + * optional bool carrying_meta = 5; + */ + public boolean hasCarryingMeta() { + return ((bitField0_ & 0x00000004) == 0x00000004); + } + /** + * optional bool carrying_meta = 5; + */ + public boolean getCarryingMeta() { + return carryingMeta_; + } + + // optional bool should_split_wal = 6 [default = true]; + public static final int SHOULD_SPLIT_WAL_FIELD_NUMBER = 6; + private boolean shouldSplitWal_; + /** + * optional bool should_split_wal = 6 [default = true]; + */ + public boolean hasShouldSplitWal() { + return ((bitField0_ & 0x00000008) == 0x00000008); + } + /** + * optional bool should_split_wal = 6 [default = true]; + */ + public boolean getShouldSplitWal() { + return shouldSplitWal_; + } + + private void initFields() { + serverName_ = org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName.getDefaultInstance(); + distributedLogReplay_ = false; + regionsOnCrashedServer_ = java.util.Collections.emptyList(); + regionsToAssign_ = java.util.Collections.emptyList(); + carryingMeta_ = false; + shouldSplitWal_ = true; + } + private byte memoizedIsInitialized = -1; + public final boolean isInitialized() { + byte isInitialized = memoizedIsInitialized; + if (isInitialized != -1) return isInitialized == 1; + + if (!hasServerName()) { + memoizedIsInitialized = 0; + return false; + } + if (!getServerName().isInitialized()) { + memoizedIsInitialized = 0; + return false; + } + for (int i = 0; i < getRegionsOnCrashedServerCount(); i++) { + if (!getRegionsOnCrashedServer(i).isInitialized()) { + memoizedIsInitialized = 0; + return false; + } + } + for (int i = 0; i < getRegionsToAssignCount(); i++) { + if (!getRegionsToAssign(i).isInitialized()) { + memoizedIsInitialized = 0; + return false; + } + } + memoizedIsInitialized = 1; + return true; + } + + public void writeTo(com.google.protobuf.CodedOutputStream output) + throws java.io.IOException { + getSerializedSize(); + if (((bitField0_ & 0x00000001) == 0x00000001)) { + output.writeMessage(1, serverName_); + } + if (((bitField0_ & 0x00000002) == 0x00000002)) { + output.writeBool(2, distributedLogReplay_); + } + for (int i = 0; i < regionsOnCrashedServer_.size(); i++) { + output.writeMessage(3, regionsOnCrashedServer_.get(i)); + } + for (int i = 0; i < regionsToAssign_.size(); i++) { + output.writeMessage(4, regionsToAssign_.get(i)); + } + if (((bitField0_ & 0x00000004) == 0x00000004)) { + output.writeBool(5, carryingMeta_); + } + if (((bitField0_ & 0x00000008) == 0x00000008)) { + output.writeBool(6, shouldSplitWal_); + } + getUnknownFields().writeTo(output); + } + + private int memoizedSerializedSize = -1; + public int getSerializedSize() { + int size = memoizedSerializedSize; + if (size != -1) return size; + + size = 0; + if (((bitField0_ & 0x00000001) == 0x00000001)) { + size += com.google.protobuf.CodedOutputStream + .computeMessageSize(1, serverName_); + } + if (((bitField0_ & 0x00000002) == 0x00000002)) { + size += com.google.protobuf.CodedOutputStream + .computeBoolSize(2, distributedLogReplay_); + } + for (int i = 0; i < regionsOnCrashedServer_.size(); i++) { + size += com.google.protobuf.CodedOutputStream + .computeMessageSize(3, regionsOnCrashedServer_.get(i)); + } + for (int i = 0; i < regionsToAssign_.size(); i++) { + size += com.google.protobuf.CodedOutputStream + .computeMessageSize(4, regionsToAssign_.get(i)); + } + if (((bitField0_ & 0x00000004) == 0x00000004)) { + size += com.google.protobuf.CodedOutputStream + .computeBoolSize(5, carryingMeta_); + } + if (((bitField0_ & 0x00000008) == 0x00000008)) { + size += com.google.protobuf.CodedOutputStream + .computeBoolSize(6, shouldSplitWal_); + } + size += getUnknownFields().getSerializedSize(); + memoizedSerializedSize = size; + return size; + } + + private static final long serialVersionUID = 0L; + @java.lang.Override + protected java.lang.Object writeReplace() + throws java.io.ObjectStreamException { + return super.writeReplace(); + } + + @java.lang.Override + public boolean equals(final java.lang.Object obj) { + if (obj == this) { + return true; + } + if (!(obj instanceof org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.ServerCrashStateData)) { + return super.equals(obj); + } + org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.ServerCrashStateData other = (org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.ServerCrashStateData) obj; + + boolean result = true; + result = result && (hasServerName() == other.hasServerName()); + if (hasServerName()) { + result = result && getServerName() + .equals(other.getServerName()); + } + result = result && (hasDistributedLogReplay() == other.hasDistributedLogReplay()); + if (hasDistributedLogReplay()) { + result = result && (getDistributedLogReplay() + == other.getDistributedLogReplay()); + } + result = result && getRegionsOnCrashedServerList() + .equals(other.getRegionsOnCrashedServerList()); + result = result && getRegionsToAssignList() + .equals(other.getRegionsToAssignList()); + result = result && (hasCarryingMeta() == other.hasCarryingMeta()); + if (hasCarryingMeta()) { + result = result && (getCarryingMeta() + == other.getCarryingMeta()); + } + result = result && (hasShouldSplitWal() == other.hasShouldSplitWal()); + if (hasShouldSplitWal()) { + result = result && (getShouldSplitWal() + == other.getShouldSplitWal()); + } + result = result && + getUnknownFields().equals(other.getUnknownFields()); + return result; + } + + private int memoizedHashCode = 0; + @java.lang.Override + public int hashCode() { + if (memoizedHashCode != 0) { + return memoizedHashCode; + } + int hash = 41; + hash = (19 * hash) + getDescriptorForType().hashCode(); + if (hasServerName()) { + hash = (37 * hash) + SERVER_NAME_FIELD_NUMBER; + hash = (53 * hash) + getServerName().hashCode(); + } + if (hasDistributedLogReplay()) { + hash = (37 * hash) + DISTRIBUTED_LOG_REPLAY_FIELD_NUMBER; + hash = (53 * hash) + hashBoolean(getDistributedLogReplay()); + } + if (getRegionsOnCrashedServerCount() > 0) { + hash = (37 * hash) + REGIONS_ON_CRASHED_SERVER_FIELD_NUMBER; + hash = (53 * hash) + getRegionsOnCrashedServerList().hashCode(); + } + if (getRegionsToAssignCount() > 0) { + hash = (37 * hash) + REGIONS_TO_ASSIGN_FIELD_NUMBER; + hash = (53 * hash) + getRegionsToAssignList().hashCode(); + } + if (hasCarryingMeta()) { + hash = (37 * hash) + CARRYING_META_FIELD_NUMBER; + hash = (53 * hash) + hashBoolean(getCarryingMeta()); + } + if (hasShouldSplitWal()) { + hash = (37 * hash) + SHOULD_SPLIT_WAL_FIELD_NUMBER; + hash = (53 * hash) + hashBoolean(getShouldSplitWal()); + } + hash = (29 * hash) + getUnknownFields().hashCode(); + memoizedHashCode = hash; + return hash; + } + + public static org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.ServerCrashStateData parseFrom( + com.google.protobuf.ByteString data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.ServerCrashStateData parseFrom( + com.google.protobuf.ByteString data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.ServerCrashStateData parseFrom(byte[] data) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data); + } + public static org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.ServerCrashStateData parseFrom( + byte[] data, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws com.google.protobuf.InvalidProtocolBufferException { + return PARSER.parseFrom(data, extensionRegistry); + } + public static org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.ServerCrashStateData parseFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.ServerCrashStateData parseFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + public static org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.ServerCrashStateData parseDelimitedFrom(java.io.InputStream input) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input); + } + public static org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.ServerCrashStateData parseDelimitedFrom( + java.io.InputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseDelimitedFrom(input, extensionRegistry); + } + public static org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.ServerCrashStateData parseFrom( + com.google.protobuf.CodedInputStream input) + throws java.io.IOException { + return PARSER.parseFrom(input); + } + public static org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.ServerCrashStateData parseFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + return PARSER.parseFrom(input, extensionRegistry); + } + + public static Builder newBuilder() { return Builder.create(); } + public Builder newBuilderForType() { return newBuilder(); } + public static Builder newBuilder(org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.ServerCrashStateData prototype) { + return newBuilder().mergeFrom(prototype); + } + public Builder toBuilder() { return newBuilder(this); } + + @java.lang.Override + protected Builder newBuilderForType( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + Builder builder = new Builder(parent); + return builder; + } + /** + * Protobuf type {@code ServerCrashStateData} + */ + public static final class Builder extends + com.google.protobuf.GeneratedMessage.Builder + implements org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.ServerCrashStateDataOrBuilder { + public static final com.google.protobuf.Descriptors.Descriptor + getDescriptor() { + return org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.internal_static_ServerCrashStateData_descriptor; + } + + protected com.google.protobuf.GeneratedMessage.FieldAccessorTable + internalGetFieldAccessorTable() { + return org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.internal_static_ServerCrashStateData_fieldAccessorTable + .ensureFieldAccessorsInitialized( + org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.ServerCrashStateData.class, org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.ServerCrashStateData.Builder.class); + } + + // Construct using org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.ServerCrashStateData.newBuilder() + private Builder() { + maybeForceBuilderInitialization(); + } + + private Builder( + com.google.protobuf.GeneratedMessage.BuilderParent parent) { + super(parent); + maybeForceBuilderInitialization(); + } + private void maybeForceBuilderInitialization() { + if (com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders) { + getServerNameFieldBuilder(); + getRegionsOnCrashedServerFieldBuilder(); + getRegionsToAssignFieldBuilder(); + } + } + private static Builder create() { + return new Builder(); + } + + public Builder clear() { + super.clear(); + if (serverNameBuilder_ == null) { + serverName_ = org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName.getDefaultInstance(); + } else { + serverNameBuilder_.clear(); + } + bitField0_ = (bitField0_ & ~0x00000001); + distributedLogReplay_ = false; + bitField0_ = (bitField0_ & ~0x00000002); + if (regionsOnCrashedServerBuilder_ == null) { + regionsOnCrashedServer_ = java.util.Collections.emptyList(); + bitField0_ = (bitField0_ & ~0x00000004); + } else { + regionsOnCrashedServerBuilder_.clear(); + } + if (regionsToAssignBuilder_ == null) { + regionsToAssign_ = java.util.Collections.emptyList(); + bitField0_ = (bitField0_ & ~0x00000008); + } else { + regionsToAssignBuilder_.clear(); + } + carryingMeta_ = false; + bitField0_ = (bitField0_ & ~0x00000010); + shouldSplitWal_ = true; + bitField0_ = (bitField0_ & ~0x00000020); + return this; + } + + public Builder clone() { + return create().mergeFrom(buildPartial()); + } + + public com.google.protobuf.Descriptors.Descriptor + getDescriptorForType() { + return org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.internal_static_ServerCrashStateData_descriptor; + } + + public org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.ServerCrashStateData getDefaultInstanceForType() { + return org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.ServerCrashStateData.getDefaultInstance(); + } + + public org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.ServerCrashStateData build() { + org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.ServerCrashStateData result = buildPartial(); + if (!result.isInitialized()) { + throw newUninitializedMessageException(result); + } + return result; + } + + public org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.ServerCrashStateData buildPartial() { + org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.ServerCrashStateData result = new org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.ServerCrashStateData(this); + int from_bitField0_ = bitField0_; + int to_bitField0_ = 0; + if (((from_bitField0_ & 0x00000001) == 0x00000001)) { + to_bitField0_ |= 0x00000001; + } + if (serverNameBuilder_ == null) { + result.serverName_ = serverName_; + } else { + result.serverName_ = serverNameBuilder_.build(); + } + if (((from_bitField0_ & 0x00000002) == 0x00000002)) { + to_bitField0_ |= 0x00000002; + } + result.distributedLogReplay_ = distributedLogReplay_; + if (regionsOnCrashedServerBuilder_ == null) { + if (((bitField0_ & 0x00000004) == 0x00000004)) { + regionsOnCrashedServer_ = java.util.Collections.unmodifiableList(regionsOnCrashedServer_); + bitField0_ = (bitField0_ & ~0x00000004); + } + result.regionsOnCrashedServer_ = regionsOnCrashedServer_; + } else { + result.regionsOnCrashedServer_ = regionsOnCrashedServerBuilder_.build(); + } + if (regionsToAssignBuilder_ == null) { + if (((bitField0_ & 0x00000008) == 0x00000008)) { + regionsToAssign_ = java.util.Collections.unmodifiableList(regionsToAssign_); + bitField0_ = (bitField0_ & ~0x00000008); + } + result.regionsToAssign_ = regionsToAssign_; + } else { + result.regionsToAssign_ = regionsToAssignBuilder_.build(); + } + if (((from_bitField0_ & 0x00000010) == 0x00000010)) { + to_bitField0_ |= 0x00000004; + } + result.carryingMeta_ = carryingMeta_; + if (((from_bitField0_ & 0x00000020) == 0x00000020)) { + to_bitField0_ |= 0x00000008; + } + result.shouldSplitWal_ = shouldSplitWal_; + result.bitField0_ = to_bitField0_; + onBuilt(); + return result; + } + + public Builder mergeFrom(com.google.protobuf.Message other) { + if (other instanceof org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.ServerCrashStateData) { + return mergeFrom((org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.ServerCrashStateData)other); + } else { + super.mergeFrom(other); + return this; + } + } + + public Builder mergeFrom(org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.ServerCrashStateData other) { + if (other == org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.ServerCrashStateData.getDefaultInstance()) return this; + if (other.hasServerName()) { + mergeServerName(other.getServerName()); + } + if (other.hasDistributedLogReplay()) { + setDistributedLogReplay(other.getDistributedLogReplay()); + } + if (regionsOnCrashedServerBuilder_ == null) { + if (!other.regionsOnCrashedServer_.isEmpty()) { + if (regionsOnCrashedServer_.isEmpty()) { + regionsOnCrashedServer_ = other.regionsOnCrashedServer_; + bitField0_ = (bitField0_ & ~0x00000004); + } else { + ensureRegionsOnCrashedServerIsMutable(); + regionsOnCrashedServer_.addAll(other.regionsOnCrashedServer_); + } + onChanged(); + } + } else { + if (!other.regionsOnCrashedServer_.isEmpty()) { + if (regionsOnCrashedServerBuilder_.isEmpty()) { + regionsOnCrashedServerBuilder_.dispose(); + regionsOnCrashedServerBuilder_ = null; + regionsOnCrashedServer_ = other.regionsOnCrashedServer_; + bitField0_ = (bitField0_ & ~0x00000004); + regionsOnCrashedServerBuilder_ = + com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders ? + getRegionsOnCrashedServerFieldBuilder() : null; + } else { + regionsOnCrashedServerBuilder_.addAllMessages(other.regionsOnCrashedServer_); + } + } + } + if (regionsToAssignBuilder_ == null) { + if (!other.regionsToAssign_.isEmpty()) { + if (regionsToAssign_.isEmpty()) { + regionsToAssign_ = other.regionsToAssign_; + bitField0_ = (bitField0_ & ~0x00000008); + } else { + ensureRegionsToAssignIsMutable(); + regionsToAssign_.addAll(other.regionsToAssign_); + } + onChanged(); + } + } else { + if (!other.regionsToAssign_.isEmpty()) { + if (regionsToAssignBuilder_.isEmpty()) { + regionsToAssignBuilder_.dispose(); + regionsToAssignBuilder_ = null; + regionsToAssign_ = other.regionsToAssign_; + bitField0_ = (bitField0_ & ~0x00000008); + regionsToAssignBuilder_ = + com.google.protobuf.GeneratedMessage.alwaysUseFieldBuilders ? + getRegionsToAssignFieldBuilder() : null; + } else { + regionsToAssignBuilder_.addAllMessages(other.regionsToAssign_); + } + } + } + if (other.hasCarryingMeta()) { + setCarryingMeta(other.getCarryingMeta()); + } + if (other.hasShouldSplitWal()) { + setShouldSplitWal(other.getShouldSplitWal()); + } + this.mergeUnknownFields(other.getUnknownFields()); + return this; + } + + public final boolean isInitialized() { + if (!hasServerName()) { + + return false; + } + if (!getServerName().isInitialized()) { + + return false; + } + for (int i = 0; i < getRegionsOnCrashedServerCount(); i++) { + if (!getRegionsOnCrashedServer(i).isInitialized()) { + + return false; + } + } + for (int i = 0; i < getRegionsToAssignCount(); i++) { + if (!getRegionsToAssign(i).isInitialized()) { + + return false; + } + } + return true; + } + + public Builder mergeFrom( + com.google.protobuf.CodedInputStream input, + com.google.protobuf.ExtensionRegistryLite extensionRegistry) + throws java.io.IOException { + org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.ServerCrashStateData parsedMessage = null; + try { + parsedMessage = PARSER.parsePartialFrom(input, extensionRegistry); + } catch (com.google.protobuf.InvalidProtocolBufferException e) { + parsedMessage = (org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.ServerCrashStateData) e.getUnfinishedMessage(); + throw e; + } finally { + if (parsedMessage != null) { + mergeFrom(parsedMessage); + } + } + return this; + } + private int bitField0_; + + // required .ServerName server_name = 1; + private org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName serverName_ = org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName.getDefaultInstance(); + private com.google.protobuf.SingleFieldBuilder< + org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName, org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName.Builder, org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerNameOrBuilder> serverNameBuilder_; + /** + * required .ServerName server_name = 1; + */ + public boolean hasServerName() { + return ((bitField0_ & 0x00000001) == 0x00000001); + } + /** + * required .ServerName server_name = 1; + */ + public org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName getServerName() { + if (serverNameBuilder_ == null) { + return serverName_; + } else { + return serverNameBuilder_.getMessage(); + } + } + /** + * required .ServerName server_name = 1; + */ + public Builder setServerName(org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName value) { + if (serverNameBuilder_ == null) { + if (value == null) { + throw new NullPointerException(); + } + serverName_ = value; + onChanged(); + } else { + serverNameBuilder_.setMessage(value); + } + bitField0_ |= 0x00000001; + return this; + } + /** + * required .ServerName server_name = 1; + */ + public Builder setServerName( + org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName.Builder builderForValue) { + if (serverNameBuilder_ == null) { + serverName_ = builderForValue.build(); + onChanged(); + } else { + serverNameBuilder_.setMessage(builderForValue.build()); + } + bitField0_ |= 0x00000001; + return this; + } + /** + * required .ServerName server_name = 1; + */ + public Builder mergeServerName(org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName value) { + if (serverNameBuilder_ == null) { + if (((bitField0_ & 0x00000001) == 0x00000001) && + serverName_ != org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName.getDefaultInstance()) { + serverName_ = + org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName.newBuilder(serverName_).mergeFrom(value).buildPartial(); + } else { + serverName_ = value; + } + onChanged(); + } else { + serverNameBuilder_.mergeFrom(value); + } + bitField0_ |= 0x00000001; + return this; + } + /** + * required .ServerName server_name = 1; + */ + public Builder clearServerName() { + if (serverNameBuilder_ == null) { + serverName_ = org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName.getDefaultInstance(); + onChanged(); + } else { + serverNameBuilder_.clear(); + } + bitField0_ = (bitField0_ & ~0x00000001); + return this; + } + /** + * required .ServerName server_name = 1; + */ + public org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName.Builder getServerNameBuilder() { + bitField0_ |= 0x00000001; + onChanged(); + return getServerNameFieldBuilder().getBuilder(); + } + /** + * required .ServerName server_name = 1; + */ + public org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerNameOrBuilder getServerNameOrBuilder() { + if (serverNameBuilder_ != null) { + return serverNameBuilder_.getMessageOrBuilder(); + } else { + return serverName_; + } + } + /** + * required .ServerName server_name = 1; + */ + private com.google.protobuf.SingleFieldBuilder< + org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName, org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName.Builder, org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerNameOrBuilder> + getServerNameFieldBuilder() { + if (serverNameBuilder_ == null) { + serverNameBuilder_ = new com.google.protobuf.SingleFieldBuilder< + org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName, org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerName.Builder, org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.ServerNameOrBuilder>( + serverName_, + getParentForChildren(), + isClean()); + serverName_ = null; + } + return serverNameBuilder_; + } + + // optional bool distributed_log_replay = 2; + private boolean distributedLogReplay_ ; + /** + * optional bool distributed_log_replay = 2; + */ + public boolean hasDistributedLogReplay() { + return ((bitField0_ & 0x00000002) == 0x00000002); + } + /** + * optional bool distributed_log_replay = 2; + */ + public boolean getDistributedLogReplay() { + return distributedLogReplay_; + } + /** + * optional bool distributed_log_replay = 2; + */ + public Builder setDistributedLogReplay(boolean value) { + bitField0_ |= 0x00000002; + distributedLogReplay_ = value; + onChanged(); + return this; + } + /** + * optional bool distributed_log_replay = 2; + */ + public Builder clearDistributedLogReplay() { + bitField0_ = (bitField0_ & ~0x00000002); + distributedLogReplay_ = false; + onChanged(); + return this; + } + + // repeated .RegionInfo regions_on_crashed_server = 3; + private java.util.List regionsOnCrashedServer_ = + java.util.Collections.emptyList(); + private void ensureRegionsOnCrashedServerIsMutable() { + if (!((bitField0_ & 0x00000004) == 0x00000004)) { + regionsOnCrashedServer_ = new java.util.ArrayList(regionsOnCrashedServer_); + bitField0_ |= 0x00000004; + } + } + + private com.google.protobuf.RepeatedFieldBuilder< + org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo, org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo.Builder, org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfoOrBuilder> regionsOnCrashedServerBuilder_; + + /** + * repeated .RegionInfo regions_on_crashed_server = 3; + */ + public java.util.List getRegionsOnCrashedServerList() { + if (regionsOnCrashedServerBuilder_ == null) { + return java.util.Collections.unmodifiableList(regionsOnCrashedServer_); + } else { + return regionsOnCrashedServerBuilder_.getMessageList(); + } + } + /** + * repeated .RegionInfo regions_on_crashed_server = 3; + */ + public int getRegionsOnCrashedServerCount() { + if (regionsOnCrashedServerBuilder_ == null) { + return regionsOnCrashedServer_.size(); + } else { + return regionsOnCrashedServerBuilder_.getCount(); + } + } + /** + * repeated .RegionInfo regions_on_crashed_server = 3; + */ + public org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo getRegionsOnCrashedServer(int index) { + if (regionsOnCrashedServerBuilder_ == null) { + return regionsOnCrashedServer_.get(index); + } else { + return regionsOnCrashedServerBuilder_.getMessage(index); + } + } + /** + * repeated .RegionInfo regions_on_crashed_server = 3; + */ + public Builder setRegionsOnCrashedServer( + int index, org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo value) { + if (regionsOnCrashedServerBuilder_ == null) { + if (value == null) { + throw new NullPointerException(); + } + ensureRegionsOnCrashedServerIsMutable(); + regionsOnCrashedServer_.set(index, value); + onChanged(); + } else { + regionsOnCrashedServerBuilder_.setMessage(index, value); + } + return this; + } + /** + * repeated .RegionInfo regions_on_crashed_server = 3; + */ + public Builder setRegionsOnCrashedServer( + int index, org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo.Builder builderForValue) { + if (regionsOnCrashedServerBuilder_ == null) { + ensureRegionsOnCrashedServerIsMutable(); + regionsOnCrashedServer_.set(index, builderForValue.build()); + onChanged(); + } else { + regionsOnCrashedServerBuilder_.setMessage(index, builderForValue.build()); + } + return this; + } + /** + * repeated .RegionInfo regions_on_crashed_server = 3; + */ + public Builder addRegionsOnCrashedServer(org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo value) { + if (regionsOnCrashedServerBuilder_ == null) { + if (value == null) { + throw new NullPointerException(); + } + ensureRegionsOnCrashedServerIsMutable(); + regionsOnCrashedServer_.add(value); + onChanged(); + } else { + regionsOnCrashedServerBuilder_.addMessage(value); + } + return this; + } + /** + * repeated .RegionInfo regions_on_crashed_server = 3; + */ + public Builder addRegionsOnCrashedServer( + int index, org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo value) { + if (regionsOnCrashedServerBuilder_ == null) { + if (value == null) { + throw new NullPointerException(); + } + ensureRegionsOnCrashedServerIsMutable(); + regionsOnCrashedServer_.add(index, value); + onChanged(); + } else { + regionsOnCrashedServerBuilder_.addMessage(index, value); + } + return this; + } + /** + * repeated .RegionInfo regions_on_crashed_server = 3; + */ + public Builder addRegionsOnCrashedServer( + org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo.Builder builderForValue) { + if (regionsOnCrashedServerBuilder_ == null) { + ensureRegionsOnCrashedServerIsMutable(); + regionsOnCrashedServer_.add(builderForValue.build()); + onChanged(); + } else { + regionsOnCrashedServerBuilder_.addMessage(builderForValue.build()); + } + return this; + } + /** + * repeated .RegionInfo regions_on_crashed_server = 3; + */ + public Builder addRegionsOnCrashedServer( + int index, org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo.Builder builderForValue) { + if (regionsOnCrashedServerBuilder_ == null) { + ensureRegionsOnCrashedServerIsMutable(); + regionsOnCrashedServer_.add(index, builderForValue.build()); + onChanged(); + } else { + regionsOnCrashedServerBuilder_.addMessage(index, builderForValue.build()); + } + return this; + } + /** + * repeated .RegionInfo regions_on_crashed_server = 3; + */ + public Builder addAllRegionsOnCrashedServer( + java.lang.Iterable values) { + if (regionsOnCrashedServerBuilder_ == null) { + ensureRegionsOnCrashedServerIsMutable(); + super.addAll(values, regionsOnCrashedServer_); + onChanged(); + } else { + regionsOnCrashedServerBuilder_.addAllMessages(values); + } + return this; + } + /** + * repeated .RegionInfo regions_on_crashed_server = 3; + */ + public Builder clearRegionsOnCrashedServer() { + if (regionsOnCrashedServerBuilder_ == null) { + regionsOnCrashedServer_ = java.util.Collections.emptyList(); + bitField0_ = (bitField0_ & ~0x00000004); + onChanged(); + } else { + regionsOnCrashedServerBuilder_.clear(); + } + return this; + } + /** + * repeated .RegionInfo regions_on_crashed_server = 3; + */ + public Builder removeRegionsOnCrashedServer(int index) { + if (regionsOnCrashedServerBuilder_ == null) { + ensureRegionsOnCrashedServerIsMutable(); + regionsOnCrashedServer_.remove(index); + onChanged(); + } else { + regionsOnCrashedServerBuilder_.remove(index); + } + return this; + } + /** + * repeated .RegionInfo regions_on_crashed_server = 3; + */ + public org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo.Builder getRegionsOnCrashedServerBuilder( + int index) { + return getRegionsOnCrashedServerFieldBuilder().getBuilder(index); + } + /** + * repeated .RegionInfo regions_on_crashed_server = 3; + */ + public org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfoOrBuilder getRegionsOnCrashedServerOrBuilder( + int index) { + if (regionsOnCrashedServerBuilder_ == null) { + return regionsOnCrashedServer_.get(index); } else { + return regionsOnCrashedServerBuilder_.getMessageOrBuilder(index); + } + } + /** + * repeated .RegionInfo regions_on_crashed_server = 3; + */ + public java.util.List + getRegionsOnCrashedServerOrBuilderList() { + if (regionsOnCrashedServerBuilder_ != null) { + return regionsOnCrashedServerBuilder_.getMessageOrBuilderList(); + } else { + return java.util.Collections.unmodifiableList(regionsOnCrashedServer_); + } + } + /** + * repeated .RegionInfo regions_on_crashed_server = 3; + */ + public org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo.Builder addRegionsOnCrashedServerBuilder() { + return getRegionsOnCrashedServerFieldBuilder().addBuilder( + org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo.getDefaultInstance()); + } + /** + * repeated .RegionInfo regions_on_crashed_server = 3; + */ + public org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo.Builder addRegionsOnCrashedServerBuilder( + int index) { + return getRegionsOnCrashedServerFieldBuilder().addBuilder( + index, org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo.getDefaultInstance()); + } + /** + * repeated .RegionInfo regions_on_crashed_server = 3; + */ + public java.util.List + getRegionsOnCrashedServerBuilderList() { + return getRegionsOnCrashedServerFieldBuilder().getBuilderList(); + } + private com.google.protobuf.RepeatedFieldBuilder< + org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo, org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo.Builder, org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfoOrBuilder> + getRegionsOnCrashedServerFieldBuilder() { + if (regionsOnCrashedServerBuilder_ == null) { + regionsOnCrashedServerBuilder_ = new com.google.protobuf.RepeatedFieldBuilder< + org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo, org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo.Builder, org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfoOrBuilder>( + regionsOnCrashedServer_, + ((bitField0_ & 0x00000004) == 0x00000004), + getParentForChildren(), + isClean()); + regionsOnCrashedServer_ = null; + } + return regionsOnCrashedServerBuilder_; + } + + // repeated .RegionInfo regions_to_assign = 4; + private java.util.List regionsToAssign_ = + java.util.Collections.emptyList(); + private void ensureRegionsToAssignIsMutable() { + if (!((bitField0_ & 0x00000008) == 0x00000008)) { + regionsToAssign_ = new java.util.ArrayList(regionsToAssign_); + bitField0_ |= 0x00000008; + } + } + + private com.google.protobuf.RepeatedFieldBuilder< + org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo, org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo.Builder, org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfoOrBuilder> regionsToAssignBuilder_; + + /** + * repeated .RegionInfo regions_to_assign = 4; + */ + public java.util.List getRegionsToAssignList() { + if (regionsToAssignBuilder_ == null) { + return java.util.Collections.unmodifiableList(regionsToAssign_); + } else { + return regionsToAssignBuilder_.getMessageList(); + } + } + /** + * repeated .RegionInfo regions_to_assign = 4; + */ + public int getRegionsToAssignCount() { + if (regionsToAssignBuilder_ == null) { + return regionsToAssign_.size(); + } else { + return regionsToAssignBuilder_.getCount(); + } + } + /** + * repeated .RegionInfo regions_to_assign = 4; + */ + public org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo getRegionsToAssign(int index) { + if (regionsToAssignBuilder_ == null) { + return regionsToAssign_.get(index); + } else { + return regionsToAssignBuilder_.getMessage(index); + } + } + /** + * repeated .RegionInfo regions_to_assign = 4; + */ + public Builder setRegionsToAssign( + int index, org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo value) { + if (regionsToAssignBuilder_ == null) { + if (value == null) { + throw new NullPointerException(); + } + ensureRegionsToAssignIsMutable(); + regionsToAssign_.set(index, value); + onChanged(); + } else { + regionsToAssignBuilder_.setMessage(index, value); + } + return this; + } + /** + * repeated .RegionInfo regions_to_assign = 4; + */ + public Builder setRegionsToAssign( + int index, org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo.Builder builderForValue) { + if (regionsToAssignBuilder_ == null) { + ensureRegionsToAssignIsMutable(); + regionsToAssign_.set(index, builderForValue.build()); + onChanged(); + } else { + regionsToAssignBuilder_.setMessage(index, builderForValue.build()); + } + return this; + } + /** + * repeated .RegionInfo regions_to_assign = 4; + */ + public Builder addRegionsToAssign(org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo value) { + if (regionsToAssignBuilder_ == null) { + if (value == null) { + throw new NullPointerException(); + } + ensureRegionsToAssignIsMutable(); + regionsToAssign_.add(value); + onChanged(); + } else { + regionsToAssignBuilder_.addMessage(value); + } + return this; + } + /** + * repeated .RegionInfo regions_to_assign = 4; + */ + public Builder addRegionsToAssign( + int index, org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo value) { + if (regionsToAssignBuilder_ == null) { + if (value == null) { + throw new NullPointerException(); + } + ensureRegionsToAssignIsMutable(); + regionsToAssign_.add(index, value); + onChanged(); + } else { + regionsToAssignBuilder_.addMessage(index, value); + } + return this; + } + /** + * repeated .RegionInfo regions_to_assign = 4; + */ + public Builder addRegionsToAssign( + org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo.Builder builderForValue) { + if (regionsToAssignBuilder_ == null) { + ensureRegionsToAssignIsMutable(); + regionsToAssign_.add(builderForValue.build()); + onChanged(); + } else { + regionsToAssignBuilder_.addMessage(builderForValue.build()); + } + return this; + } + /** + * repeated .RegionInfo regions_to_assign = 4; + */ + public Builder addRegionsToAssign( + int index, org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo.Builder builderForValue) { + if (regionsToAssignBuilder_ == null) { + ensureRegionsToAssignIsMutable(); + regionsToAssign_.add(index, builderForValue.build()); + onChanged(); + } else { + regionsToAssignBuilder_.addMessage(index, builderForValue.build()); + } + return this; + } + /** + * repeated .RegionInfo regions_to_assign = 4; + */ + public Builder addAllRegionsToAssign( + java.lang.Iterable values) { + if (regionsToAssignBuilder_ == null) { + ensureRegionsToAssignIsMutable(); + super.addAll(values, regionsToAssign_); + onChanged(); + } else { + regionsToAssignBuilder_.addAllMessages(values); + } + return this; + } + /** + * repeated .RegionInfo regions_to_assign = 4; + */ + public Builder clearRegionsToAssign() { + if (regionsToAssignBuilder_ == null) { + regionsToAssign_ = java.util.Collections.emptyList(); + bitField0_ = (bitField0_ & ~0x00000008); + onChanged(); + } else { + regionsToAssignBuilder_.clear(); + } + return this; + } + /** + * repeated .RegionInfo regions_to_assign = 4; + */ + public Builder removeRegionsToAssign(int index) { + if (regionsToAssignBuilder_ == null) { + ensureRegionsToAssignIsMutable(); + regionsToAssign_.remove(index); + onChanged(); + } else { + regionsToAssignBuilder_.remove(index); + } + return this; + } + /** + * repeated .RegionInfo regions_to_assign = 4; + */ + public org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo.Builder getRegionsToAssignBuilder( + int index) { + return getRegionsToAssignFieldBuilder().getBuilder(index); + } + /** + * repeated .RegionInfo regions_to_assign = 4; + */ + public org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfoOrBuilder getRegionsToAssignOrBuilder( + int index) { + if (regionsToAssignBuilder_ == null) { + return regionsToAssign_.get(index); } else { + return regionsToAssignBuilder_.getMessageOrBuilder(index); + } + } + /** + * repeated .RegionInfo regions_to_assign = 4; + */ + public java.util.List + getRegionsToAssignOrBuilderList() { + if (regionsToAssignBuilder_ != null) { + return regionsToAssignBuilder_.getMessageOrBuilderList(); + } else { + return java.util.Collections.unmodifiableList(regionsToAssign_); + } + } + /** + * repeated .RegionInfo regions_to_assign = 4; + */ + public org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo.Builder addRegionsToAssignBuilder() { + return getRegionsToAssignFieldBuilder().addBuilder( + org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo.getDefaultInstance()); + } + /** + * repeated .RegionInfo regions_to_assign = 4; + */ + public org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo.Builder addRegionsToAssignBuilder( + int index) { + return getRegionsToAssignFieldBuilder().addBuilder( + index, org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo.getDefaultInstance()); + } + /** + * repeated .RegionInfo regions_to_assign = 4; + */ + public java.util.List + getRegionsToAssignBuilderList() { + return getRegionsToAssignFieldBuilder().getBuilderList(); + } + private com.google.protobuf.RepeatedFieldBuilder< + org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo, org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo.Builder, org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfoOrBuilder> + getRegionsToAssignFieldBuilder() { + if (regionsToAssignBuilder_ == null) { + regionsToAssignBuilder_ = new com.google.protobuf.RepeatedFieldBuilder< + org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo, org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo.Builder, org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfoOrBuilder>( + regionsToAssign_, + ((bitField0_ & 0x00000008) == 0x00000008), + getParentForChildren(), + isClean()); + regionsToAssign_ = null; + } + return regionsToAssignBuilder_; + } + + // optional bool carrying_meta = 5; + private boolean carryingMeta_ ; + /** + * optional bool carrying_meta = 5; + */ + public boolean hasCarryingMeta() { + return ((bitField0_ & 0x00000010) == 0x00000010); + } + /** + * optional bool carrying_meta = 5; + */ + public boolean getCarryingMeta() { + return carryingMeta_; + } + /** + * optional bool carrying_meta = 5; + */ + public Builder setCarryingMeta(boolean value) { + bitField0_ |= 0x00000010; + carryingMeta_ = value; + onChanged(); + return this; + } + /** + * optional bool carrying_meta = 5; + */ + public Builder clearCarryingMeta() { + bitField0_ = (bitField0_ & ~0x00000010); + carryingMeta_ = false; + onChanged(); + return this; + } + + // optional bool should_split_wal = 6 [default = true]; + private boolean shouldSplitWal_ = true; + /** + * optional bool should_split_wal = 6 [default = true]; + */ + public boolean hasShouldSplitWal() { + return ((bitField0_ & 0x00000020) == 0x00000020); + } + /** + * optional bool should_split_wal = 6 [default = true]; + */ + public boolean getShouldSplitWal() { + return shouldSplitWal_; + } + /** + * optional bool should_split_wal = 6 [default = true]; + */ + public Builder setShouldSplitWal(boolean value) { + bitField0_ |= 0x00000020; + shouldSplitWal_ = value; + onChanged(); + return this; + } + /** + * optional bool should_split_wal = 6 [default = true]; + */ + public Builder clearShouldSplitWal() { + bitField0_ = (bitField0_ & ~0x00000020); + shouldSplitWal_ = true; + onChanged(); + return this; + } + + // @@protoc_insertion_point(builder_scope:ServerCrashStateData) + } + + static { + defaultInstance = new ServerCrashStateData(true); + defaultInstance.initFields(); + } + + // @@protoc_insertion_point(class_scope:ServerCrashStateData) + } + + private static com.google.protobuf.Descriptors.Descriptor + internal_static_CreateTableStateData_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_CreateTableStateData_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_ModifyTableStateData_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_ModifyTableStateData_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_TruncateTableStateData_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_TruncateTableStateData_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_DeleteTableStateData_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_DeleteTableStateData_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_AddColumnFamilyStateData_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_AddColumnFamilyStateData_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_ModifyColumnFamilyStateData_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_ModifyColumnFamilyStateData_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_DeleteColumnFamilyStateData_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_DeleteColumnFamilyStateData_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_EnableTableStateData_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_EnableTableStateData_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_DisableTableStateData_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_DisableTableStateData_fieldAccessorTable; + private static com.google.protobuf.Descriptors.Descriptor + internal_static_ServerCrashStateData_descriptor; + private static + com.google.protobuf.GeneratedMessage.FieldAccessorTable + internal_static_ServerCrashStateData_fieldAccessorTable; + + public static com.google.protobuf.Descriptors.FileDescriptor + getDescriptor() { + return descriptor; + } + private static com.google.protobuf.Descriptors.FileDescriptor + descriptor; + static { + java.lang.String[] descriptorData = { + "\n\025MasterProcedure.proto\032\013HBase.proto\032\tRP" + + "C.proto\"\201\001\n\024CreateTableStateData\022#\n\tuser" + + "_info\030\001 \002(\0132\020.UserInformation\022\"\n\014table_s" + + "chema\030\002 \002(\0132\014.TableSchema\022 \n\013region_info" + + "\030\003 \003(\0132\013.RegionInfo\"\277\001\n\024ModifyTableState" + + "Data\022#\n\tuser_info\030\001 \002(\0132\020.UserInformatio" + + "n\022-\n\027unmodified_table_schema\030\002 \001(\0132\014.Tab" + + "leSchema\022+\n\025modified_table_schema\030\003 \002(\0132" + + "\014.TableSchema\022&\n\036delete_column_family_in" + + "_modify\030\004 \002(\010\"\274\001\n\026TruncateTableStateData", + "\022#\n\tuser_info\030\001 \002(\0132\020.UserInformation\022\027\n" + + "\017preserve_splits\030\002 \002(\010\022\036\n\ntable_name\030\003 \001" + + "(\0132\n.TableName\022\"\n\014table_schema\030\004 \001(\0132\014.T" + + "ableSchema\022 \n\013region_info\030\005 \003(\0132\013.Region" + + "Info\"}\n\024DeleteTableStateData\022#\n\tuser_inf" + + "o\030\001 \002(\0132\020.UserInformation\022\036\n\ntable_name\030" + + "\002 \002(\0132\n.TableName\022 \n\013region_info\030\003 \003(\0132\013" + + ".RegionInfo\"\300\001\n\030AddColumnFamilyStateData" + + "\022#\n\tuser_info\030\001 \002(\0132\020.UserInformation\022\036\n" + + "\ntable_name\030\002 \002(\0132\n.TableName\0220\n\023columnf", + "amily_schema\030\003 \002(\0132\023.ColumnFamilySchema\022" + + "-\n\027unmodified_table_schema\030\004 \001(\0132\014.Table" + + "Schema\"\303\001\n\033ModifyColumnFamilyStateData\022#" + + "\n\tuser_info\030\001 \002(\0132\020.UserInformation\022\036\n\nt" + + "able_name\030\002 \002(\0132\n.TableName\0220\n\023columnfam" + + "ily_schema\030\003 \002(\0132\023.ColumnFamilySchema\022-\n" + + "\027unmodified_table_schema\030\004 \001(\0132\014.TableSc" + + "hema\"\254\001\n\033DeleteColumnFamilyStateData\022#\n\t" + + "user_info\030\001 \002(\0132\020.UserInformation\022\036\n\ntab" + + "le_name\030\002 \002(\0132\n.TableName\022\031\n\021columnfamil", + "y_name\030\003 \002(\014\022-\n\027unmodified_table_schema\030" + + "\004 \001(\0132\014.TableSchema\"{\n\024EnableTableStateD" + + "ata\022#\n\tuser_info\030\001 \002(\0132\020.UserInformation" + + "\022\036\n\ntable_name\030\002 \002(\0132\n.TableName\022\036\n\026skip" + + "_table_state_check\030\003 \002(\010\"|\n\025DisableTable" + + "StateData\022#\n\tuser_info\030\001 \002(\0132\020.UserInfor" + + "mation\022\036\n\ntable_name\030\002 \002(\0132\n.TableName\022\036" + + "\n\026skip_table_state_check\030\003 \002(\010\"\347\001\n\024Serve" + + "rCrashStateData\022 \n\013server_name\030\001 \002(\0132\013.S" + + "erverName\022\036\n\026distributed_log_replay\030\002 \001(", + "\010\022.\n\031regions_on_crashed_server\030\003 \003(\0132\013.R" + + "egionInfo\022&\n\021regions_to_assign\030\004 \003(\0132\013.R" + + "egionInfo\022\025\n\rcarrying_meta\030\005 \001(\010\022\036\n\020shou" + + "ld_split_wal\030\006 \001(\010:\004true*\330\001\n\020CreateTable" + + "State\022\036\n\032CREATE_TABLE_PRE_OPERATION\020\001\022 \n" + + "\034CREATE_TABLE_WRITE_FS_LAYOUT\020\002\022\034\n\030CREAT" + + "E_TABLE_ADD_TO_META\020\003\022\037\n\033CREATE_TABLE_AS" + + "SIGN_REGIONS\020\004\022\"\n\036CREATE_TABLE_UPDATE_DE" + + "SC_CACHE\020\005\022\037\n\033CREATE_TABLE_POST_OPERATIO" + + "N\020\006*\207\002\n\020ModifyTableState\022\030\n\024MODIFY_TABLE", + "_PREPARE\020\001\022\036\n\032MODIFY_TABLE_PRE_OPERATION" + + "\020\002\022(\n$MODIFY_TABLE_UPDATE_TABLE_DESCRIPT" + + "OR\020\003\022&\n\"MODIFY_TABLE_REMOVE_REPLICA_COLU" + + "MN\020\004\022!\n\035MODIFY_TABLE_DELETE_FS_LAYOUT\020\005\022" + + "\037\n\033MODIFY_TABLE_POST_OPERATION\020\006\022#\n\037MODI" + + "FY_TABLE_REOPEN_ALL_REGIONS\020\007*\212\002\n\022Trunca" + + "teTableState\022 \n\034TRUNCATE_TABLE_PRE_OPERA" + + "TION\020\001\022#\n\037TRUNCATE_TABLE_REMOVE_FROM_MET" + + "A\020\002\022\"\n\036TRUNCATE_TABLE_CLEAR_FS_LAYOUT\020\003\022" + + "#\n\037TRUNCATE_TABLE_CREATE_FS_LAYOUT\020\004\022\036\n\032", + "TRUNCATE_TABLE_ADD_TO_META\020\005\022!\n\035TRUNCATE" + + "_TABLE_ASSIGN_REGIONS\020\006\022!\n\035TRUNCATE_TABL" + + "E_POST_OPERATION\020\007*\337\001\n\020DeleteTableState\022" + + "\036\n\032DELETE_TABLE_PRE_OPERATION\020\001\022!\n\035DELET" + + "E_TABLE_REMOVE_FROM_META\020\002\022 \n\034DELETE_TAB" + + "LE_CLEAR_FS_LAYOUT\020\003\022\"\n\036DELETE_TABLE_UPD" + + "ATE_DESC_CACHE\020\004\022!\n\035DELETE_TABLE_UNASSIG" + + "N_REGIONS\020\005\022\037\n\033DELETE_TABLE_POST_OPERATI" + + "ON\020\006*\331\001\n\024AddColumnFamilyState\022\035\n\031ADD_COL" + + "UMN_FAMILY_PREPARE\020\001\022#\n\037ADD_COLUMN_FAMIL", + "Y_PRE_OPERATION\020\002\022-\n)ADD_COLUMN_FAMILY_U" + + "PDATE_TABLE_DESCRIPTOR\020\003\022$\n ADD_COLUMN_F" + + "AMILY_POST_OPERATION\020\004\022(\n$ADD_COLUMN_FAM" + + "ILY_REOPEN_ALL_REGIONS\020\005*\353\001\n\027ModifyColum" + + "nFamilyState\022 \n\034MODIFY_COLUMN_FAMILY_PRE" + + "PARE\020\001\022&\n\"MODIFY_COLUMN_FAMILY_PRE_OPERA" + + "TION\020\002\0220\n,MODIFY_COLUMN_FAMILY_UPDATE_TA" + + "BLE_DESCRIPTOR\020\003\022\'\n#MODIFY_COLUMN_FAMILY" + + "_POST_OPERATION\020\004\022+\n\'MODIFY_COLUMN_FAMIL" + + "Y_REOPEN_ALL_REGIONS\020\005*\226\002\n\027DeleteColumnF", + "amilyState\022 \n\034DELETE_COLUMN_FAMILY_PREPA" + + "RE\020\001\022&\n\"DELETE_COLUMN_FAMILY_PRE_OPERATI" + + "ON\020\002\0220\n,DELETE_COLUMN_FAMILY_UPDATE_TABL" + + "E_DESCRIPTOR\020\003\022)\n%DELETE_COLUMN_FAMILY_D" + + "ELETE_FS_LAYOUT\020\004\022\'\n#DELETE_COLUMN_FAMIL" + + "Y_POST_OPERATION\020\005\022+\n\'DELETE_COLUMN_FAMI" + + "LY_REOPEN_ALL_REGIONS\020\006*\350\001\n\020EnableTableS" + + "tate\022\030\n\024ENABLE_TABLE_PREPARE\020\001\022\036\n\032ENABLE" + + "_TABLE_PRE_OPERATION\020\002\022)\n%ENABLE_TABLE_S" + + "ET_ENABLING_TABLE_STATE\020\003\022$\n ENABLE_TABL", + "E_MARK_REGIONS_ONLINE\020\004\022(\n$ENABLE_TABLE_" + + "SET_ENABLED_TABLE_STATE\020\005\022\037\n\033ENABLE_TABL" + + "E_POST_OPERATION\020\006*\362\001\n\021DisableTableState" + + "\022\031\n\025DISABLE_TABLE_PREPARE\020\001\022\037\n\033DISABLE_T" + + "ABLE_PRE_OPERATION\020\002\022+\n\'DISABLE_TABLE_SE" + + "T_DISABLING_TABLE_STATE\020\003\022&\n\"DISABLE_TAB" + + "LE_MARK_REGIONS_OFFLINE\020\004\022*\n&DISABLE_TAB" + + "LE_SET_DISABLED_TABLE_STATE\020\005\022 \n\034DISABLE" + + "_TABLE_POST_OPERATION\020\006*\305\002\n\020ServerCrashS" + + "tate\022\026\n\022SERVER_CRASH_START\020\001\022\035\n\031SERVER_C", + "RASH_PROCESS_META\020\002\022\034\n\030SERVER_CRASH_GET_" + + "REGIONS\020\003\022\036\n\032SERVER_CRASH_NO_SPLIT_LOGS\020" + + "\004\022\033\n\027SERVER_CRASH_SPLIT_LOGS\020\005\022#\n\037SERVER" + + "_CRASH_PREPARE_LOG_REPLAY\020\006\022\'\n#SERVER_CR" + + "ASH_CALC_REGIONS_TO_ASSIGN\020\007\022\027\n\023SERVER_C" + + "RASH_ASSIGN\020\010\022\037\n\033SERVER_CRASH_WAIT_ON_AS" + + "SIGN\020\t\022\027\n\023SERVER_CRASH_FINISH\020dBK\n*org.a" + + "pache.hadoop.hbase.protobuf.generatedB\025M" + + "asterProcedureProtosH\001\210\001\001\240\001\001" + }; + com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner = + new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() { + public com.google.protobuf.ExtensionRegistry assignDescriptors( + com.google.protobuf.Descriptors.FileDescriptor root) { + descriptor = root; + internal_static_CreateTableStateData_descriptor = + getDescriptor().getMessageTypes().get(0); + internal_static_CreateTableStateData_fieldAccessorTable = new + com.google.protobuf.GeneratedMessage.FieldAccessorTable( + internal_static_CreateTableStateData_descriptor, + new java.lang.String[] { "UserInfo", "TableSchema", "RegionInfo", }); + internal_static_ModifyTableStateData_descriptor = + getDescriptor().getMessageTypes().get(1); + internal_static_ModifyTableStateData_fieldAccessorTable = new + com.google.protobuf.GeneratedMessage.FieldAccessorTable( + internal_static_ModifyTableStateData_descriptor, + new java.lang.String[] { "UserInfo", "UnmodifiedTableSchema", "ModifiedTableSchema", "DeleteColumnFamilyInModify", }); + internal_static_TruncateTableStateData_descriptor = + getDescriptor().getMessageTypes().get(2); + internal_static_TruncateTableStateData_fieldAccessorTable = new + com.google.protobuf.GeneratedMessage.FieldAccessorTable( + internal_static_TruncateTableStateData_descriptor, + new java.lang.String[] { "UserInfo", "PreserveSplits", "TableName", "TableSchema", "RegionInfo", }); + internal_static_DeleteTableStateData_descriptor = + getDescriptor().getMessageTypes().get(3); internal_static_DeleteTableStateData_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_DeleteTableStateData_descriptor, @@ -11409,6 +13173,12 @@ public final class MasterProcedureProtos { com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_DisableTableStateData_descriptor, new java.lang.String[] { "UserInfo", "TableName", "SkipTableStateCheck", }); + internal_static_ServerCrashStateData_descriptor = + getDescriptor().getMessageTypes().get(9); + internal_static_ServerCrashStateData_fieldAccessorTable = new + com.google.protobuf.GeneratedMessage.FieldAccessorTable( + internal_static_ServerCrashStateData_descriptor, + new java.lang.String[] { "ServerName", "DistributedLogReplay", "RegionsOnCrashedServer", "RegionsToAssign", "CarryingMeta", "ShouldSplitWal", }); return null; } }; diff --git a/hbase-protocol/src/main/protobuf/MasterProcedure.proto b/hbase-protocol/src/main/protobuf/MasterProcedure.proto index e1c6880..5e94721 100644 --- a/hbase-protocol/src/main/protobuf/MasterProcedure.proto +++ b/hbase-protocol/src/main/protobuf/MasterProcedure.proto @@ -183,3 +183,25 @@ message DisableTableStateData { required TableName table_name = 2; required bool skip_table_state_check = 3; } + +message ServerCrashStateData { + required ServerName server_name = 1; + optional bool distributed_log_replay = 2; + repeated RegionInfo regions_on_crashed_server = 3; + repeated RegionInfo regions_to_assign = 4; + optional bool carrying_meta = 5; + optional bool should_split_wal = 6 [default = true]; +} + +enum ServerCrashState { + SERVER_CRASH_START = 1; + SERVER_CRASH_PROCESS_META = 2; + SERVER_CRASH_GET_REGIONS = 3; + SERVER_CRASH_NO_SPLIT_LOGS = 4; + SERVER_CRASH_SPLIT_LOGS = 5; + SERVER_CRASH_PREPARE_LOG_REPLAY = 6; + SERVER_CRASH_CALC_REGIONS_TO_ASSIGN = 7; + SERVER_CRASH_ASSIGN = 8; + SERVER_CRASH_WAIT_ON_ASSIGN = 9; + SERVER_CRASH_FINISH = 100; +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/coordination/ZKSplitLogManagerCoordination.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/coordination/ZKSplitLogManagerCoordination.java index acdcf60..556a143 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/coordination/ZKSplitLogManagerCoordination.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/coordination/ZKSplitLogManagerCoordination.java @@ -69,7 +69,7 @@ import org.apache.zookeeper.data.Stat; /** * ZooKeeper based implementation of - * {@link org.apache.hadoop.hbase.master.SplitLogManagerCoordination} + * {@link SplitLogManagerCoordination} */ @InterfaceAudience.Private public class ZKSplitLogManagerCoordination extends ZooKeeperListener implements @@ -647,7 +647,7 @@ public class ZKSplitLogManagerCoordination extends ZooKeeperListener implements ZKUtil.createSetData(this.watcher, nodePath, ZKUtil.regionSequenceIdsToByteArray(lastSequenceId, null)); if (LOG.isDebugEnabled()) { - LOG.debug("Marked " + regionEncodeName + " as recovering from " + serverName + + LOG.debug("Marked " + regionEncodeName + " recovering from " + serverName + ": " + nodePath); } // break retry loop @@ -684,7 +684,7 @@ public class ZKSplitLogManagerCoordination extends ZooKeeperListener implements /** * ZooKeeper implementation of - * {@link org.apache.hadoop.hbase.master. + * {@link org.apache.hadoop.hbase.coordination. * SplitLogManagerCoordination#removeStaleRecoveringRegions(Set)} */ @Override diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/coordination/ZkSplitLogWorkerCoordination.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/coordination/ZkSplitLogWorkerCoordination.java index 637920b..b682764 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/coordination/ZkSplitLogWorkerCoordination.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/coordination/ZkSplitLogWorkerCoordination.java @@ -104,7 +104,7 @@ public class ZkSplitLogWorkerCoordination extends ZooKeeperListener implements @Override public void nodeChildrenChanged(String path) { if (path.equals(watcher.splitLogZNode)) { - LOG.debug("tasks arrived or departed"); + if (LOG.isTraceEnabled()) LOG.trace("tasks arrived or departed on " + path); synchronized (taskReadyLock) { taskReadySeq++; taskReadyLock.notify(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java index cd0e9d7..808a53f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/AssignmentManager.java @@ -486,8 +486,7 @@ public class AssignmentManager extends ZooKeeperListener { Set deadServers = rebuildUserRegions(); // This method will assign all user regions if a clean server startup or - // it will reconstruct master state and cleanup any leftovers from - // previous master process. + // it will reconstruct master state and cleanup any leftovers from previous master process. boolean failover = processDeadServersAndRegionsInTransition(deadServers); if (!useZKForAssignment) { @@ -502,20 +501,18 @@ public class AssignmentManager extends ZooKeeperListener { /** * Process all regions that are in transition in zookeeper and also - * processes the list of dead servers by scanning the META. + * processes the list of dead servers. * Used by master joining an cluster. If we figure this is a clean cluster * startup, will assign all user regions. - * @param deadServers - * Map of dead servers and their regions. Can be null. + * @param deadServers Set of servers that are offline probably legitimately that were carrying + * regions according to a scan of hbase:meta. Can be null. * @throws KeeperException * @throws IOException * @throws InterruptedException */ - boolean processDeadServersAndRegionsInTransition( - final Set deadServers) throws KeeperException, - IOException, InterruptedException, CoordinatedStateException { - List nodes = ZKUtil.listChildrenNoWatch(watcher, - watcher.assignmentZNode); + boolean processDeadServersAndRegionsInTransition(final Set deadServers) + throws KeeperException, IOException, InterruptedException, CoordinatedStateException { + List nodes = ZKUtil.listChildrenNoWatch(watcher, watcher.assignmentZNode); if (useZKForAssignment && nodes == null) { String errorMessage = "Failed to get the children from ZK"; @@ -2755,15 +2752,13 @@ public class AssignmentManager extends ZooKeeperListener { } // Generate a round-robin bulk assignment plan - Map> bulkPlan - = balancer.roundRobinAssignment(regions, servers); + Map> bulkPlan = balancer.roundRobinAssignment(regions, servers); if (bulkPlan == null) { throw new IOException("Unable to determine a plan to assign region(s)"); } processFavoredNodes(regions); - assign(regions.size(), servers.size(), - "round-robin=true", bulkPlan); + assign(regions.size(), servers.size(), "round-robin=true", bulkPlan); } private void assign(int regions, int totalServers, @@ -2903,10 +2898,8 @@ public class AssignmentManager extends ZooKeeperListener { /** * Rebuild the list of user regions and assignment information. - *

- * Returns a set of servers that are not found to be online that hosted - * some regions. - * @return set of servers not online that hosted some regions per meta + * Updates regionstates with findings as we go through list of regions. + * @return set of servers not online that hosted some regions according to a scan of hbase:meta * @throws IOException */ Set rebuildUserRegions() throws @@ -3061,22 +3054,18 @@ public class AssignmentManager extends ZooKeeperListener { } /** - * Processes list of dead servers from result of hbase:meta scan and regions in RIT - *

+ * Processes list of dead servers from result of hbase:meta scan and regions in RIT. * This is used for failover to recover the lost regions that belonged to - * RegionServers which failed while there was no active master or regions - * that were in RIT. - *

- * + * RegionServers which failed while there was no active master or are offline for whatever + * reason and for regions that were in RIT. * * @param deadServers - * The list of dead servers which failed while there was no active - * master. Can be null. + * The list of dead servers which failed while there was no active master. Can be null. * @throws IOException * @throws KeeperException */ - private void processDeadServersAndRecoverLostRegions( - Set deadServers) throws IOException, KeeperException { + private void processDeadServersAndRecoverLostRegions(Set deadServers) + throws IOException, KeeperException { if (deadServers != null && !deadServers.isEmpty()) { for (ServerName serverName: deadServers) { if (!serverManager.isServerDead(serverName)) { @@ -3098,7 +3087,7 @@ public class AssignmentManager extends ZooKeeperListener { } void processRegionInTransitionZkLess() { - // We need to send RPC call again for PENDING_OPEN/PENDING_CLOSE regions + // We need to send RPC call again for PENDING_OPEN/PENDING_CLOSE regions // in case the RPC call is not sent out yet before the master was shut down // since we update the state before we send the RPC call. We can't update // the state after the RPC call. Otherwise, we don't know what's happened @@ -3403,15 +3392,15 @@ public class AssignmentManager extends ZooKeeperListener { } /** - * Process shutdown server removing any assignments. + * Clean out crashed server removing any assignments. * @param sn Server that went down. * @return list of regions in transition on this server */ - public List processServerShutdown(final ServerName sn) { + public List cleanOutCrashedServerReferences(final ServerName sn) { // Clean out any existing assignment plans for this server synchronized (this.regionPlans) { - for (Iterator > i = - this.regionPlans.entrySet().iterator(); i.hasNext();) { + for (Iterator > i = this.regionPlans.entrySet().iterator(); + i.hasNext();) { Map.Entry e = i.next(); ServerName otherSn = e.getValue().getDestination(); // The name will be null if the region is planned for a random assign. @@ -3429,8 +3418,7 @@ public class AssignmentManager extends ZooKeeperListener { // We need a lock on the region as we could update it Lock lock = locker.acquireLock(encodedName); try { - RegionState regionState = - regionStates.getRegionTransitionState(encodedName); + RegionState regionState = regionStates.getRegionTransitionState(encodedName); if (regionState == null || (regionState.getServerName() != null && !regionState.isOnServer(sn)) || !(regionState.isFailedClose() || regionState.isOffline() @@ -3635,8 +3623,7 @@ public class AssignmentManager extends ZooKeeperListener { } } - private void onRegionOpen( - final HRegionInfo hri, final ServerName sn, long openSeqNum) { + private void onRegionOpen(final HRegionInfo hri, final ServerName sn, long openSeqNum) { regionOnline(hri, sn, openSeqNum); if (useZKForAssignment) { try { diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java index 83b12dd..8b16b00 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/DeadServer.java @@ -38,6 +38,7 @@ import java.util.Set; /** * Class to hold dead servers list and utility querying dead server list. + * On znode expiration, servers are added here. */ @InterfaceAudience.Private public class DeadServer { @@ -115,7 +116,7 @@ public class DeadServer { } public synchronized void finish(ServerName sn) { - LOG.debug("Finished processing " + sn); + if (LOG.isDebugEnabled()) LOG.debug("Finished " + sn + "; numProcessing=" + this.numProcessing); this.numProcessing--; } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java index 052cbcf..5409fd7 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/HMaster.java @@ -269,7 +269,7 @@ public class HMaster extends HRegionServer implements MasterServices, Server { volatile boolean serviceStarted = false; // flag set after we complete assignMeta. - private volatile boolean serverShutdownHandlerEnabled = false; + private volatile boolean serverCrashProcessingEnabled = false; LoadBalancer balancer; private BalancerChore balancerChore; @@ -665,11 +665,8 @@ public class HMaster extends HRegionServer implements MasterServices, Server { // get a list for previously failed RS which need log splitting work // we recover hbase:meta region servers inside master initialization and // handle other failed servers in SSH in order to start up master node ASAP - Set previouslyFailedServers = this.fileSystemManager - .getFailedServersFromLogFolders(); - - // remove stale recovering regions from previous run - this.fileSystemManager.removeStaleRecoveringRegionsFromZK(previouslyFailedServers); + Set previouslyFailedServers = + this.fileSystemManager.getFailedServersFromLogFolders(); // log splitting for hbase:meta server ServerName oldMetaServerLocation = metaTableLocator.getMetaRegionLocation(this.getZooKeeper()); @@ -703,14 +700,14 @@ public class HMaster extends HRegionServer implements MasterServices, Server { // Check if master is shutting down because of some issue // in initializing the regionserver or the balancer. - if(isStopped()) return; + if (isStopped()) return; // Make sure meta assigned before proceeding. status.setStatus("Assigning Meta Region"); assignMeta(status, previouslyFailedMetaRSs, HRegionInfo.DEFAULT_REPLICA_ID); // check if master is shutting down because above assignMeta could return even hbase:meta isn't // assigned when master is shutting down - if(isStopped()) return; + if (isStopped()) return; status.setStatus("Submitting log splitting work for previously failed region servers"); // Master has recovered hbase:meta region server and we put @@ -720,8 +717,7 @@ public class HMaster extends HRegionServer implements MasterServices, Server { } // Update meta with new PB serialization if required. i.e migrate all HRI to PB serialization - // in meta. This must happen before we assign all user regions or else the assignment will - // fail. + // in meta. This must happen before we assign all user regions or else the assignment will fail. if (this.conf.getBoolean("hbase.MetaMigrationConvertingToPB", true)) { MetaMigrationConvertingToPB.updateMetaIfNecessary(this); } @@ -730,11 +726,10 @@ public class HMaster extends HRegionServer implements MasterServices, Server { status.setStatus("Starting assignment manager"); this.assignmentManager.joinCluster(); - //set cluster status again after user regions are assigned + // set cluster status again after user regions are assigned this.balancer.setClusterStatus(getClusterStatus()); - // Start balancer and meta catalog janitor after meta and regions have - // been assigned. + // Start balancer and meta catalog janitor after meta and regions have been assigned. status.setStatus("Starting balancer and catalog janitor"); this.clusterStatusChore = new ClusterStatusChore(this, balancer); getChoreService().scheduleChore(clusterStatusChore); @@ -745,7 +740,7 @@ public class HMaster extends HRegionServer implements MasterServices, Server { status.setStatus("Starting namespace manager"); initNamespace(); - + if (this.cpHost != null) { try { this.cpHost.preMasterInitialization(); @@ -757,8 +752,10 @@ public class HMaster extends HRegionServer implements MasterServices, Server { status.markComplete("Initialization successful"); LOG.info("Master has completed initialization"); configurationManager.registerObserver(this.balancer); + + // Set master as 'initialized'. initialized = true; - + status.setStatus("Starting quota manager"); initQuotaManager(); @@ -921,7 +918,7 @@ public class HMaster extends HRegionServer implements MasterServices, Server { // if the meta region server is died at this time, we need it to be re-assigned // by SSH so that system tables can be assigned. // No need to wait for meta is assigned = 0 when meta is just verified. - if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) enableServerShutdownHandler(assigned != 0); + if (replicaId == HRegionInfo.DEFAULT_REPLICA_ID) enableCrashedServerProcessing(assigned != 0); LOG.info("hbase:meta with replicaId " + replicaId + " assigned=" + assigned + ", rit=" + rit + ", location=" + metaTableLocator.getMetaRegionLocation(this.getZooKeeper(), replicaId)); status.setStatus("META assigned."); @@ -970,15 +967,14 @@ public class HMaster extends HRegionServer implements MasterServices, Server { } } - private void enableServerShutdownHandler( - final boolean waitForMeta) throws IOException, InterruptedException { - // If ServerShutdownHandler is disabled, we enable it and expire those dead - // but not expired servers. This is required so that if meta is assigning to - // a server which dies after assignMeta starts assignment, - // SSH can re-assign it. Otherwise, we will be + private void enableCrashedServerProcessing(final boolean waitForMeta) + throws IOException, InterruptedException { + // If crashed server processing is disabled, we enable it and expire those dead but not expired + // servers. This is required so that if meta is assigning to a server which dies after + // assignMeta starts assignment, ServerCrashProcedure can re-assign it. Otherwise, we will be // stuck here waiting forever if waitForMeta is specified. - if (!serverShutdownHandlerEnabled) { - serverShutdownHandlerEnabled = true; + if (!serverCrashProcessingEnabled) { + serverCrashProcessingEnabled = true; this.serverManager.processQueuedDeadServers(); } @@ -2091,13 +2087,18 @@ public class HMaster extends HRegionServer implements MasterServices, Server { } /** - * ServerShutdownHandlerEnabled is set false before completing - * assignMeta to prevent processing of ServerShutdownHandler. + * ServerCrashProcessingEnabled is set false before completing assignMeta to prevent processing + * of crashed servers. * @return true if assignMeta has completed; */ @Override - public boolean isServerShutdownHandlerEnabled() { - return this.serverShutdownHandlerEnabled; + public boolean isServerCrashProcessingEnabled() { + return this.serverCrashProcessingEnabled; + } + + @VisibleForTesting + public void setServerCrashProcessingEnabled(final boolean b) { + this.serverCrashProcessingEnabled = b; } /** diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java index 1b76cd4..bcf9ba0 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterFileSystem.java @@ -58,6 +58,8 @@ import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.util.FSTableDescriptors; import org.apache.hadoop.hbase.util.FSUtils; +import com.google.common.annotations.VisibleForTesting; + /** * This class abstracts a bunch of operations the HMaster needs to interact with * the underlying file system, including splitting log files, checking file @@ -131,6 +133,11 @@ public class MasterFileSystem { this.distributedLogReplay = this.splitLogManager.isLogReplaying(); } + @VisibleForTesting + SplitLogManager getSplitLogManager() { + return this.splitLogManager; + } + /** * Create initial layout in filesystem. *

    diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java index 59a078e..dd64bc8 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/MasterServices.java @@ -177,7 +177,7 @@ public interface MasterServices extends Server { /** * @return true if master enables ServerShutdownHandler; */ - boolean isServerShutdownHandlerEnabled(); + boolean isServerCrashProcessingEnabled(); /** * Registers a new protocol buffer {@link Service} subclass as a master coprocessor endpoint. diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java index 78097ac..58a8260 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/RegionStates.java @@ -31,20 +31,19 @@ import java.util.TreeMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hbase.HConstants; import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.RegionTransition; import org.apache.hadoop.hbase.HTableDescriptor; import org.apache.hadoop.hbase.MetaTableAccessor; +import org.apache.hadoop.hbase.RegionTransition; import org.apache.hadoop.hbase.Server; import org.apache.hadoop.hbase.ServerLoad; import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableStateManager; +import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.client.RegionReplicaUtil; -import org.apache.hadoop.hbase.MetaTableAccessor; import org.apache.hadoop.hbase.master.RegionState.State; import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos; import org.apache.hadoop.hbase.util.Bytes; @@ -428,8 +427,7 @@ public class RegionStates { return updateRegionState(hri, state, serverName, HConstants.NO_SEQNUM); } - public void regionOnline( - final HRegionInfo hri, final ServerName serverName) { + public void regionOnline(final HRegionInfo hri, final ServerName serverName) { regionOnline(hri, serverName, HConstants.NO_SEQNUM); } @@ -438,16 +436,14 @@ public class RegionStates { * We can't confirm it is really online on specified region server * because it hasn't been put in region server's online region list yet. */ - public void regionOnline(final HRegionInfo hri, - final ServerName serverName, long openSeqNum) { + public void regionOnline(final HRegionInfo hri, final ServerName serverName, long openSeqNum) { String encodedName = hri.getEncodedName(); if (!serverManager.isServerOnline(serverName)) { // This is possible if the region server dies before master gets a // chance to handle ZK event in time. At this time, if the dead server // is already processed by SSH, we should ignore this event. // If not processed yet, ignore and let SSH deal with it. - LOG.warn("Ignored, " + encodedName - + " was opened on a dead server: " + serverName); + LOG.warn("Ignored, " + encodedName + " was opened on a dead server: " + serverName); return; } updateRegionState(hri, State.OPEN, serverName, openSeqNum); @@ -529,7 +525,7 @@ public class RegionStates { } long now = System.currentTimeMillis(); if (LOG.isDebugEnabled()) { - LOG.debug("Adding to processed servers " + serverName); + LOG.debug("Adding to log splitting servers " + serverName); } processedServers.put(serverName, Long.valueOf(now)); Configuration conf = server.getConfiguration(); @@ -543,7 +539,7 @@ public class RegionStates { Map.Entry e = it.next(); if (e.getValue().longValue() < cutoff) { if (LOG.isDebugEnabled()) { - LOG.debug("Removed from processed servers " + e.getKey()); + LOG.debug("Removed from log splitting servers " + e.getKey()); } it.remove(); } diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java index 5c8bd34..bdc7358 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/ServerManager.java @@ -53,8 +53,7 @@ import org.apache.hadoop.hbase.client.ConnectionFactory; import org.apache.hadoop.hbase.ipc.ServerNotRunningYetException; import org.apache.hadoop.hbase.client.RetriesExhaustedException; import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer; -import org.apache.hadoop.hbase.master.handler.MetaServerShutdownHandler; -import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler; +import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure; import org.apache.hadoop.hbase.monitoring.MonitoredTask; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.protobuf.RequestConverter; @@ -581,7 +580,7 @@ public class ServerManager { } return; } - if (!services.isServerShutdownHandlerEnabled()) { + if (!services.isServerCrashProcessingEnabled()) { LOG.info("Master doesn't enable ServerShutdownHandler during initialization, " + "delay expiring server " + serverName); this.queuedDeadServers.add(serverName); @@ -593,18 +592,8 @@ public class ServerManager { " but server shutdown already in progress"); return; } - synchronized (onlineServers) { - if (!this.onlineServers.containsKey(serverName)) { - LOG.warn("Expiration of " + serverName + " but server not online"); - } - // Remove the server from the known servers lists and update load info BUT - // add to deadservers first; do this so it'll show in dead servers list if - // not in online servers list. - this.deadservers.add(serverName); - this.onlineServers.remove(serverName); - onlineServers.notifyAll(); - } - this.rsAdmins.remove(serverName); + moveFromOnelineToDeadServers(serverName); + // If cluster is going down, yes, servers are going to be expiring; don't // process as a dead server if (this.clusterShutdown) { @@ -617,13 +606,8 @@ public class ServerManager { } boolean carryingMeta = services.getAssignmentManager().isCarryingMeta(serverName); - if (carryingMeta) { - this.services.getExecutorService().submit(new MetaServerShutdownHandler(this.master, - this.services, this.deadservers, serverName)); - } else { - this.services.getExecutorService().submit(new ServerShutdownHandler(this.master, - this.services, this.deadservers, serverName, true)); - } + this.services.getMasterProcedureExecutor(). + submitProcedure(new ServerCrashProcedure(serverName, true, carryingMeta)); LOG.debug("Added=" + serverName + " to dead servers, submitted shutdown handler to be executed meta=" + carryingMeta); @@ -635,8 +619,20 @@ public class ServerManager { } } - public synchronized void processDeadServer(final ServerName serverName) { - this.processDeadServer(serverName, false); + @VisibleForTesting + public void moveFromOnelineToDeadServers(final ServerName sn) { + synchronized (onlineServers) { + if (!this.onlineServers.containsKey(sn)) { + LOG.warn("Expiration of " + sn + " but server not online"); + } + // Remove the server from the known servers lists and update load info BUT + // add to deadservers first; do this so it'll show in dead servers list if + // not in online servers list. + this.deadservers.add(sn); + this.onlineServers.remove(sn); + onlineServers.notifyAll(); + } + this.rsAdmins.remove(sn); } public synchronized void processDeadServer(final ServerName serverName, boolean shouldSplitWal) { @@ -654,9 +650,8 @@ public class ServerManager { } this.deadservers.add(serverName); - this.services.getExecutorService().submit( - new ServerShutdownHandler(this.master, this.services, this.deadservers, serverName, - shouldSplitWal)); + this.services.getMasterProcedureExecutor(). + submitProcedure(new ServerCrashProcedure(serverName, shouldSplitWal, false)); } /** @@ -664,7 +659,7 @@ public class ServerManager { * called after HMaster#assignMeta and AssignmentManager#joinCluster. * */ synchronized void processQueuedDeadServers() { - if (!services.isServerShutdownHandlerEnabled()) { + if (!services.isServerCrashProcessingEnabled()) { LOG.info("Master hasn't enabled ServerShutdownHandler"); } Iterator serverIterator = queuedDeadServers.iterator(); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java index a716369..3fc95cc 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/SplitLogManager.java @@ -405,16 +405,15 @@ public class SplitLogManager { // the function is only used in WALEdit direct replay mode return; } + if (serverNames == null || serverNames.isEmpty()) return; Set recoveredServerNameSet = new HashSet(); - if (serverNames != null) { - for (ServerName tmpServerName : serverNames) { - recoveredServerNameSet.add(tmpServerName.getServerName()); - } + for (ServerName tmpServerName : serverNames) { + recoveredServerNameSet.add(tmpServerName.getServerName()); } - + + this.recoveringRegionLock.lock(); try { - this.recoveringRegionLock.lock(); ((BaseCoordinatedStateManager) server.getCoordinatedStateManager()) .getSplitLogManagerCoordination().removeRecoveringRegions(recoveredServerNameSet, isMetaRecovery); diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/TableLockManager.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/TableLockManager.java index cfaeb98..ef1e84f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/TableLockManager.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/TableLockManager.java @@ -25,17 +25,16 @@ import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.InterProcessLock; import org.apache.hadoop.hbase.InterProcessLock.MetadataHandler; import org.apache.hadoop.hbase.InterProcessReadWriteLock; import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.classification.InterfaceAudience; import org.apache.hadoop.hbase.exceptions.LockTimeoutException; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos; -import org.apache.hadoop.hbase.util.Bytes; import org.apache.hadoop.hbase.util.EnvironmentEdgeManager; import org.apache.hadoop.hbase.zookeeper.ZKUtil; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/LogReplayHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/LogReplayHandler.java deleted file mode 100644 index 18e564a..0000000 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/LogReplayHandler.java +++ /dev/null @@ -1,88 +0,0 @@ -/** - * Copyright The Apache Software Foundation - * - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with this - * work for additional information regarding copyright ownership. The ASF - * licenses this file to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT - * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the - * License for the specific language governing permissions and limitations - * under the License. - */ -package org.apache.hadoop.hbase.master.handler; - -import java.io.IOException; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.classification.InterfaceAudience; -import org.apache.hadoop.hbase.Server; -import org.apache.hadoop.hbase.ServerName; -import org.apache.hadoop.hbase.executor.EventHandler; -import org.apache.hadoop.hbase.executor.EventType; -import org.apache.hadoop.hbase.master.DeadServer; -import org.apache.hadoop.hbase.master.MasterServices; - -/** - * Handle logReplay work from SSH. Having a separate handler is not to block SSH in re-assigning - * regions from dead servers. Otherwise, available SSH handlers could be blocked by logReplay work - * (from {@link org.apache.hadoop.hbase.master.MasterFileSystem#splitLog(ServerName)}). - * During logReplay, if a receiving RS(say A) fails again, regions on A won't be able to be - * assigned to another live RS which causes the log replay unable to complete because WAL edits - * replay depends on receiving RS to be live - */ -@InterfaceAudience.Private -public class LogReplayHandler extends EventHandler { - private static final Log LOG = LogFactory.getLog(LogReplayHandler.class); - private final ServerName serverName; - protected final Server master; - protected final MasterServices services; - protected final DeadServer deadServers; - - public LogReplayHandler(final Server server, final MasterServices services, - final DeadServer deadServers, final ServerName serverName) { - super(server, EventType.M_LOG_REPLAY); - this.master = server; - this.services = services; - this.deadServers = deadServers; - this.serverName = serverName; - this.deadServers.add(serverName); - } - - @Override - public String toString() { - String name = serverName.toString(); - return getClass().getSimpleName() + "-" + name + "-" + getSeqid(); - } - - @Override - public void process() throws IOException { - try { - if (this.master != null && this.master.isStopped()) { - // we're exiting ... - return; - } - this.services.getMasterFileSystem().splitLog(serverName); - } catch (Exception ex) { - if (ex instanceof IOException) { - // resubmit log replay work when failed - this.services.getExecutorService().submit((LogReplayHandler) this); - this.deadServers.add(serverName); - throw new IOException("failed log replay for " + serverName + ", will retry", ex); - } else { - throw new IOException(ex); - } - } finally { - this.deadServers.finish(serverName); - } - // logReplay is the last step of SSH so log a line to indicate that - LOG.info("Finished processing shutdown of " + serverName); - } -} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/MetaServerShutdownHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/MetaServerShutdownHandler.java deleted file mode 100644 index 409ac5e..0000000 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/MetaServerShutdownHandler.java +++ /dev/null @@ -1,222 +0,0 @@ -/** - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hbase.master.handler; - -import java.io.IOException; -import java.io.InterruptedIOException; -import java.util.HashSet; -import java.util.Set; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.classification.InterfaceAudience; -import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.Server; -import org.apache.hadoop.hbase.ServerName; -import org.apache.hadoop.hbase.executor.EventType; -import org.apache.hadoop.hbase.master.AssignmentManager; -import org.apache.hadoop.hbase.master.DeadServer; -import org.apache.hadoop.hbase.master.MasterServices; -import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode; -import org.apache.hadoop.hbase.util.Threads; -import org.apache.zookeeper.KeeperException; - -import com.google.common.annotations.VisibleForTesting; - -import java.util.concurrent.atomic.AtomicInteger; - -/** - * Shutdown handler for the server hosting hbase:meta - */ -@InterfaceAudience.Private -public class MetaServerShutdownHandler extends ServerShutdownHandler { - private static final Log LOG = LogFactory.getLog(MetaServerShutdownHandler.class); - private AtomicInteger eventExceptionCount = new AtomicInteger(0); - @VisibleForTesting - static final int SHOW_STRACKTRACE_FREQUENCY = 100; - - public MetaServerShutdownHandler(final Server server, - final MasterServices services, - final DeadServer deadServers, final ServerName serverName) { - super(server, services, deadServers, serverName, - EventType.M_META_SERVER_SHUTDOWN, true); - } - - @Override - public void process() throws IOException { - boolean gotException = true; - try { - AssignmentManager am = this.services.getAssignmentManager(); - this.services.getMasterFileSystem().setLogRecoveryMode(); - boolean distributedLogReplay = - (this.services.getMasterFileSystem().getLogRecoveryMode() == RecoveryMode.LOG_REPLAY); - try { - if (this.shouldSplitWal) { - LOG.info("Splitting hbase:meta logs for " + serverName); - if (distributedLogReplay) { - Set regions = new HashSet(); - regions.add(HRegionInfo.FIRST_META_REGIONINFO); - this.services.getMasterFileSystem().prepareLogReplay(serverName, regions); - } else { - this.services.getMasterFileSystem().splitMetaLog(serverName); - } - am.getRegionStates().logSplit(HRegionInfo.FIRST_META_REGIONINFO); - } - } catch (IOException ioe) { - this.services.getExecutorService().submit(this); - this.deadServers.add(serverName); - throw new IOException("failed log splitting for " + serverName + ", will retry", ioe); - } - - // Assign meta if we were carrying it. - // Check again: region may be assigned to other where because of RIT - // timeout - if (am.isCarryingMeta(serverName)) { - LOG.info("Server " + serverName + " was carrying META. Trying to assign."); - am.regionOffline(HRegionInfo.FIRST_META_REGIONINFO); - verifyAndAssignMetaWithRetries(); - } else if (!server.getMetaTableLocator().isLocationAvailable(this.server.getZooKeeper())) { - // the meta location as per master is null. This could happen in case when meta assignment - // in previous run failed, while meta znode has been updated to null. We should try to - // assign the meta again. - verifyAndAssignMetaWithRetries(); - } else { - LOG.info("META has been assigned to otherwhere, skip assigning."); - } - - try { - if (this.shouldSplitWal && distributedLogReplay) { - if (!am.waitOnRegionToClearRegionsInTransition(HRegionInfo.FIRST_META_REGIONINFO, - regionAssignmentWaitTimeout)) { - // Wait here is to avoid log replay hits current dead server and incur a RPC timeout - // when replay happens before region assignment completes. - LOG.warn("Region " + HRegionInfo.FIRST_META_REGIONINFO.getEncodedName() - + " didn't complete assignment in time"); - } - this.services.getMasterFileSystem().splitMetaLog(serverName); - } - } catch (Exception ex) { - if (ex instanceof IOException) { - this.services.getExecutorService().submit(this); - this.deadServers.add(serverName); - throw new IOException("failed log splitting for " + serverName + ", will retry", ex); - } else { - throw new IOException(ex); - } - } - - gotException = false; - } finally { - if (gotException){ - // If we had an exception, this.deadServers.finish will be skipped in super.process() - this.deadServers.finish(serverName); - } - } - - super.process(); - // Clear this counter on successful handling. - this.eventExceptionCount.set(0); - } - - @Override - boolean isCarryingMeta() { - return true; - } - - /** - * Before assign the hbase:meta region, ensure it haven't - * been assigned by other place - *

    - * Under some scenarios, the hbase:meta region can be opened twice, so it seemed online - * in two regionserver at the same time. - * If the hbase:meta region has been assigned, so the operation can be canceled. - * @throws InterruptedException - * @throws IOException - * @throws KeeperException - */ - private void verifyAndAssignMeta() - throws InterruptedException, IOException, KeeperException { - long timeout = this.server.getConfiguration(). - getLong("hbase.catalog.verification.timeout", 1000); - if (!server.getMetaTableLocator().verifyMetaRegionLocation(server.getConnection(), - this.server.getZooKeeper(), timeout)) { - this.services.getAssignmentManager().assignMeta(HRegionInfo.FIRST_META_REGIONINFO); - } else if (serverName.equals(server.getMetaTableLocator().getMetaRegionLocation( - this.server.getZooKeeper()))) { - throw new IOException("hbase:meta is onlined on the dead server " - + serverName); - } else { - LOG.info("Skip assigning hbase:meta, because it is online on the " - + server.getMetaTableLocator().getMetaRegionLocation(this.server.getZooKeeper())); - } - } - - /** - * Failed many times, shutdown processing - * @throws IOException - */ - private void verifyAndAssignMetaWithRetries() throws IOException { - int iTimes = this.server.getConfiguration().getInt( - "hbase.catalog.verification.retries", 10); - - long waitTime = this.server.getConfiguration().getLong( - "hbase.catalog.verification.timeout", 1000); - - int iFlag = 0; - while (true) { - try { - verifyAndAssignMeta(); - break; - } catch (KeeperException e) { - this.server.abort("In server shutdown processing, assigning meta", e); - throw new IOException("Aborting", e); - } catch (Exception e) { - if (iFlag >= iTimes) { - this.server.abort("verifyAndAssignMeta failed after" + iTimes - + " times retries, aborting", e); - throw new IOException("Aborting", e); - } - try { - Thread.sleep(waitTime); - } catch (InterruptedException e1) { - LOG.warn("Interrupted when is the thread sleep", e1); - Thread.currentThread().interrupt(); - throw (InterruptedIOException)new InterruptedIOException().initCause(e1); - } - iFlag++; - } - } - } - - @Override - protected void handleException(Throwable t) { - int count = eventExceptionCount.getAndIncrement(); - if (count < 0) count = eventExceptionCount.getAndSet(0); - if (count > SHOW_STRACKTRACE_FREQUENCY) { // Too frequent, let's slow reporting - Threads.sleep(1000); - } - if (count % SHOW_STRACKTRACE_FREQUENCY == 0) { - LOG.error("Caught " + eventType + ", count=" + this.eventExceptionCount, t); - } else { - LOG.error("Caught " + eventType + ", count=" + this.eventExceptionCount + - "; " + t.getMessage() + "; stack trace shows every " + SHOW_STRACKTRACE_FREQUENCY + - "th time."); - } - } -} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java deleted file mode 100644 index 7789ee1..0000000 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/handler/ServerShutdownHandler.java +++ /dev/null @@ -1,399 +0,0 @@ -/** - * - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ -package org.apache.hadoop.hbase.master.handler; - -import java.io.IOException; -import java.io.InterruptedIOException; -import java.util.ArrayList; -import java.util.List; -import java.util.Set; -import java.util.concurrent.locks.Lock; - -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hbase.HConstants; -import org.apache.hadoop.hbase.HRegionInfo; -import org.apache.hadoop.hbase.MetaTableAccessor; -import org.apache.hadoop.hbase.Server; -import org.apache.hadoop.hbase.ServerName; -import org.apache.hadoop.hbase.classification.InterfaceAudience; -import org.apache.hadoop.hbase.client.RegionReplicaUtil; -import org.apache.hadoop.hbase.executor.EventHandler; -import org.apache.hadoop.hbase.executor.EventType; -import org.apache.hadoop.hbase.master.AssignmentManager; -import org.apache.hadoop.hbase.master.DeadServer; -import org.apache.hadoop.hbase.master.MasterFileSystem; -import org.apache.hadoop.hbase.master.MasterServices; -import org.apache.hadoop.hbase.master.RegionState; -import org.apache.hadoop.hbase.master.RegionState.State; -import org.apache.hadoop.hbase.master.RegionStates; -import org.apache.hadoop.hbase.master.ServerManager; -import org.apache.hadoop.hbase.master.balancer.BaseLoadBalancer; -import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos; -import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode; -import org.apache.hadoop.hbase.util.ConfigUtil; -import org.apache.hadoop.hbase.zookeeper.ZKAssign; -import org.apache.zookeeper.KeeperException; - -/** - * Process server shutdown. - * Server-to-handle must be already in the deadservers lists. See - * {@link ServerManager#expireServer(ServerName)} - */ -@InterfaceAudience.Private -public class ServerShutdownHandler extends EventHandler { - private static final Log LOG = LogFactory.getLog(ServerShutdownHandler.class); - protected final ServerName serverName; - protected final MasterServices services; - protected final DeadServer deadServers; - protected final boolean shouldSplitWal; // whether to split WAL or not - protected final int regionAssignmentWaitTimeout; - - public ServerShutdownHandler(final Server server, final MasterServices services, - final DeadServer deadServers, final ServerName serverName, - final boolean shouldSplitWal) { - this(server, services, deadServers, serverName, EventType.M_SERVER_SHUTDOWN, - shouldSplitWal); - } - - ServerShutdownHandler(final Server server, final MasterServices services, - final DeadServer deadServers, final ServerName serverName, EventType type, - final boolean shouldSplitWal) { - super(server, type); - this.serverName = serverName; - this.server = server; - this.services = services; - this.deadServers = deadServers; - if (!this.deadServers.isDeadServer(this.serverName)) { - LOG.warn(this.serverName + " is NOT in deadservers; it should be!"); - } - this.shouldSplitWal = shouldSplitWal; - this.regionAssignmentWaitTimeout = server.getConfiguration().getInt( - HConstants.LOG_REPLAY_WAIT_REGION_TIMEOUT, 15000); - } - - @Override - public String getInformativeName() { - if (serverName != null) { - return this.getClass().getSimpleName() + " for " + serverName; - } else { - return super.getInformativeName(); - } - } - - /** - * @return True if the server we are processing was carrying hbase:meta - */ - boolean isCarryingMeta() { - return false; - } - - @Override - public String toString() { - return getClass().getSimpleName() + "-" + serverName + "-" + getSeqid(); - } - - @Override - public void process() throws IOException { - boolean hasLogReplayWork = false; - final ServerName serverName = this.serverName; - try { - - // We don't want worker thread in the MetaServerShutdownHandler - // executor pool to block by waiting availability of hbase:meta - // Otherwise, it could run into the following issue: - // 1. The current MetaServerShutdownHandler instance For RS1 waits for the hbase:meta - // to come online. - // 2. The newly assigned hbase:meta region server RS2 was shutdown right after - // it opens the hbase:meta region. So the MetaServerShutdownHandler - // instance For RS1 will still be blocked. - // 3. The new instance of MetaServerShutdownHandler for RS2 is queued. - // 4. The newly assigned hbase:meta region server RS3 was shutdown right after - // it opens the hbase:meta region. So the MetaServerShutdownHandler - // instance For RS1 and RS2 will still be blocked. - // 5. The new instance of MetaServerShutdownHandler for RS3 is queued. - // 6. Repeat until we run out of MetaServerShutdownHandler worker threads - // The solution here is to resubmit a ServerShutdownHandler request to process - // user regions on that server so that MetaServerShutdownHandler - // executor pool is always available. - // - // If AssignmentManager hasn't finished rebuilding user regions, - // we are not ready to assign dead regions either. So we re-queue up - // the dead server for further processing too. - AssignmentManager am = services.getAssignmentManager(); - ServerManager serverManager = services.getServerManager(); - if (isCarryingMeta() /* hbase:meta */ || !am.isFailoverCleanupDone()) { - serverManager.processDeadServer(serverName, this.shouldSplitWal); - return; - } - - // Wait on meta to come online; we need it to progress. - // TODO: Best way to hold strictly here? We should build this retry logic - // into the MetaTableAccessor operations themselves. - // TODO: Is the reading of hbase:meta necessary when the Master has state of - // cluster in its head? It should be possible to do without reading hbase:meta - // in all but one case. On split, the RS updates the hbase:meta - // table and THEN informs the master of the split via zk nodes in - // 'unassigned' dir. Currently the RS puts ephemeral nodes into zk so if - // the regionserver dies, these nodes do not stick around and this server - // shutdown processing does fixup (see the fixupDaughters method below). - // If we wanted to skip the hbase:meta scan, we'd have to change at least the - // final SPLIT message to be permanent in zk so in here we'd know a SPLIT - // completed (zk is updated after edits to hbase:meta have gone in). See - // {@link SplitTransaction}. We'd also have to be figure another way for - // doing the below hbase:meta daughters fixup. - Set hris = null; - while (!this.server.isStopped()) { - try { - server.getMetaTableLocator().waitMetaRegionLocation(server.getZooKeeper()); - if (BaseLoadBalancer.tablesOnMaster(server.getConfiguration())) { - while (!this.server.isStopped() && serverManager.countOfRegionServers() < 2) { - // Wait till at least another regionserver is up besides the active master - // so that we don't assign all regions to the active master. - // This is best of efforts, because newly joined regionserver - // could crash right after that. - Thread.sleep(100); - } - } - // Skip getting user regions if the server is stopped. - if (!this.server.isStopped()) { - if (ConfigUtil.useZKForAssignment(server.getConfiguration())) { - hris = MetaTableAccessor.getServerUserRegions(this.server.getConnection(), - this.serverName).keySet(); - } else { - // Not using ZK for assignment, regionStates has everything we want - hris = am.getRegionStates().getServerRegions(serverName); - } - } - break; - } catch (InterruptedException e) { - Thread.currentThread().interrupt(); - throw (InterruptedIOException)new InterruptedIOException().initCause(e); - } catch (IOException ioe) { - LOG.info("Received exception accessing hbase:meta during server shutdown of " + - serverName + ", retrying hbase:meta read", ioe); - } - } - if (this.server.isStopped()) { - throw new IOException("Server is stopped"); - } - - // delayed to set recovery mode based on configuration only after all outstanding splitlogtask - // drained - this.services.getMasterFileSystem().setLogRecoveryMode(); - boolean distributedLogReplay = - (this.services.getMasterFileSystem().getLogRecoveryMode() == RecoveryMode.LOG_REPLAY); - - try { - if (this.shouldSplitWal) { - if (distributedLogReplay) { - LOG.info("Mark regions in recovery for crashed server " + serverName + - " before assignment; regions=" + hris); - MasterFileSystem mfs = this.services.getMasterFileSystem(); - mfs.prepareLogReplay(serverName, hris); - } else { - LOG.info("Splitting logs for " + serverName + - " before assignment; region count=" + (hris == null ? 0 : hris.size())); - this.services.getMasterFileSystem().splitLog(serverName); - } - am.getRegionStates().logSplit(serverName); - } else { - LOG.info("Skipping log splitting for " + serverName); - } - } catch (IOException ioe) { - resubmit(serverName, ioe); - } - List toAssignRegions = new ArrayList(); - int replicaCount = services.getConfiguration().getInt(HConstants.META_REPLICAS_NUM, - HConstants.DEFAULT_META_REPLICA_NUM); - for (int i = 1; i < replicaCount; i++) { - HRegionInfo metaHri = - RegionReplicaUtil.getRegionInfoForReplica(HRegionInfo.FIRST_META_REGIONINFO, i); - if (am.isCarryingMetaReplica(serverName, metaHri)) { - LOG.info("Reassigning meta replica" + metaHri + " that was on " + serverName); - toAssignRegions.add(metaHri); - } - } - // Clean out anything in regions in transition. Being conservative and - // doing after log splitting. Could do some states before -- OPENING? - // OFFLINE? -- and then others after like CLOSING that depend on log - // splitting. - List regionsInTransition = am.processServerShutdown(serverName); - LOG.info("Reassigning " + ((hris == null)? 0: hris.size()) + - " region(s) that " + (serverName == null? "null": serverName) + - " was carrying (and " + regionsInTransition.size() + - " regions(s) that were opening on this server)"); - - toAssignRegions.addAll(regionsInTransition); - - // Iterate regions that were on this server and assign them - if (hris != null && !hris.isEmpty()) { - RegionStates regionStates = am.getRegionStates(); - for (HRegionInfo hri: hris) { - if (regionsInTransition.contains(hri)) { - continue; - } - String encodedName = hri.getEncodedName(); - Lock lock = am.acquireRegionLock(encodedName); - try { - RegionState rit = regionStates.getRegionTransitionState(hri); - if (processDeadRegion(hri, am)) { - ServerName addressFromAM = regionStates.getRegionServerOfRegion(hri); - if (addressFromAM != null && !addressFromAM.equals(this.serverName)) { - // If this region is in transition on the dead server, it must be - // opening or pending_open, which should have been covered by AM#processServerShutdown - LOG.info("Skip assigning region " + hri.getRegionNameAsString() - + " because it has been opened in " + addressFromAM.getServerName()); - continue; - } - if (rit != null) { - if (rit.getServerName() != null && !rit.isOnServer(serverName)) { - // Skip regions that are in transition on other server - LOG.info("Skip assigning region in transition on other server" + rit); - continue; - } - try{ - //clean zk node - LOG.info("Reassigning region with rs = " + rit + " and deleting zk node if exists"); - ZKAssign.deleteNodeFailSilent(services.getZooKeeper(), hri); - regionStates.updateRegionState(hri, State.OFFLINE); - } catch (KeeperException ke) { - this.server.abort("Unexpected ZK exception deleting unassigned node " + hri, ke); - return; - } - } else if (regionStates.isRegionInState( - hri, State.SPLITTING_NEW, State.MERGING_NEW)) { - regionStates.updateRegionState(hri, State.OFFLINE); - } - toAssignRegions.add(hri); - } else if (rit != null) { - if ((rit.isPendingCloseOrClosing() || rit.isOffline()) - && am.getTableStateManager().isTableState(hri.getTable(), - ZooKeeperProtos.Table.State.DISABLED, ZooKeeperProtos.Table.State.DISABLING) || - am.getReplicasToClose().contains(hri)) { - // If the table was partially disabled and the RS went down, we should clear the RIT - // and remove the node for the region. - // The rit that we use may be stale in case the table was in DISABLING state - // but though we did assign we will not be clearing the znode in CLOSING state. - // Doing this will have no harm. See HBASE-5927 - regionStates.updateRegionState(hri, State.OFFLINE); - am.deleteClosingOrClosedNode(hri, rit.getServerName()); - am.offlineDisabledRegion(hri); - } else { - LOG.warn("THIS SHOULD NOT HAPPEN: unexpected region in transition " - + rit + " not to be assigned by SSH of server " + serverName); - } - } - } finally { - lock.unlock(); - } - } - } - - try { - am.assign(toAssignRegions); - } catch (InterruptedException ie) { - LOG.error("Caught " + ie + " during round-robin assignment"); - throw (InterruptedIOException)new InterruptedIOException().initCause(ie); - } catch (IOException ioe) { - LOG.info("Caught " + ioe + " during region assignment, will retry"); - // Only do wal splitting if shouldSplitWal and in DLR mode - serverManager.processDeadServer(serverName, - this.shouldSplitWal && distributedLogReplay); - return; - } - - if (this.shouldSplitWal && distributedLogReplay) { - // wait for region assignment completes - for (HRegionInfo hri : toAssignRegions) { - try { - if (!am.waitOnRegionToClearRegionsInTransition(hri, regionAssignmentWaitTimeout)) { - // Wait here is to avoid log replay hits current dead server and incur a RPC timeout - // when replay happens before region assignment completes. - LOG.warn("Region " + hri.getEncodedName() - + " didn't complete assignment in time"); - } - } catch (InterruptedException ie) { - throw new InterruptedIOException("Caught " + ie - + " during waitOnRegionToClearRegionsInTransition"); - } - } - // submit logReplay work - this.services.getExecutorService().submit( - new LogReplayHandler(this.server, this.services, this.deadServers, this.serverName)); - hasLogReplayWork = true; - } - } finally { - this.deadServers.finish(serverName); - } - - if (!hasLogReplayWork) { - LOG.info("Finished processing of shutdown of " + serverName); - } - } - - private void resubmit(final ServerName serverName, IOException ex) throws IOException { - // typecast to SSH so that we make sure that it is the SSH instance that - // gets submitted as opposed to MSSH or some other derived instance of SSH - this.services.getExecutorService().submit((ServerShutdownHandler) this); - this.deadServers.add(serverName); - throw new IOException("failed log splitting for " + serverName + ", will retry", ex); - } - - /** - * Process a dead region from a dead RS. Checks if the region is disabled or - * disabling or if the region has a partially completed split. - * @param hri - * @param assignmentManager - * @return Returns true if specified region should be assigned, false if not. - * @throws IOException - */ - public static boolean processDeadRegion(HRegionInfo hri, - AssignmentManager assignmentManager) - throws IOException { - boolean tablePresent = assignmentManager.getTableStateManager().isTablePresent(hri.getTable()); - if (!tablePresent) { - LOG.info("The table " + hri.getTable() - + " was deleted. Hence not proceeding."); - return false; - } - // If table is not disabled but the region is offlined, - boolean disabled = assignmentManager.getTableStateManager().isTableState(hri.getTable(), - ZooKeeperProtos.Table.State.DISABLED); - if (disabled){ - LOG.info("The table " + hri.getTable() - + " was disabled. Hence not proceeding."); - return false; - } - if (hri.isOffline() && hri.isSplit()) { - //HBASE-7721: Split parent and daughters are inserted into hbase:meta as an atomic operation. - //If the meta scanner saw the parent split, then it should see the daughters as assigned - //to the dead server. We don't have to do anything. - return false; - } - boolean disabling = assignmentManager.getTableStateManager().isTableState(hri.getTable(), - ZooKeeperProtos.Table.State.DISABLING); - if (disabling) { - LOG.info("The table " + hri.getTable() - + " is disabled. Hence not assigning region" + hri.getEncodedName()); - return false; - } - return true; - } -} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/AddColumnFamilyProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/AddColumnFamilyProcedure.java index 6c80dd2..a175d4e 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/AddColumnFamilyProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/AddColumnFamilyProcedure.java @@ -184,14 +184,14 @@ public class AddColumnFamilyProcedure @Override protected boolean acquireLock(final MasterProcedureEnv env) { if (!env.isInitialized()) return false; - return env.getProcedureQueue().tryAcquireTableWrite( + return env.getProcedureQueue().tryAcquireTableExclusiveLock( tableName, EventType.C_M_ADD_FAMILY.toString()); } @Override protected void releaseLock(final MasterProcedureEnv env) { - env.getProcedureQueue().releaseTableWrite(tableName); + env.getProcedureQueue().releaseTableExclusiveLock(tableName); } @Override @@ -404,4 +404,4 @@ public class AddColumnFamilyProcedure } return regionInfoList; } -} +} \ No newline at end of file diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java index 360637f..28b8550 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/CreateTableProcedure.java @@ -269,12 +269,12 @@ public class CreateTableProcedure @Override protected boolean acquireLock(final MasterProcedureEnv env) { - return env.getProcedureQueue().tryAcquireTableWrite(getTableName(), "create table"); + return env.getProcedureQueue().tryAcquireTableExclusiveLock(getTableName(), "create table"); } @Override protected void releaseLock(final MasterProcedureEnv env) { - env.getProcedureQueue().releaseTableWrite(getTableName()); + env.getProcedureQueue().releaseTableExclusiveLock(getTableName()); } private boolean prepareCreate(final MasterProcedureEnv env) throws IOException { @@ -460,4 +460,4 @@ public class CreateTableProcedure final TableName tableName) throws IOException { env.getMasterServices().getTableDescriptors().get(tableName); } -} +} \ No newline at end of file diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteColumnFamilyProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteColumnFamilyProcedure.java index 316f225..b1631d3 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteColumnFamilyProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteColumnFamilyProcedure.java @@ -200,14 +200,14 @@ public class DeleteColumnFamilyProcedure @Override protected boolean acquireLock(final MasterProcedureEnv env) { if (!env.isInitialized()) return false; - return env.getProcedureQueue().tryAcquireTableWrite( + return env.getProcedureQueue().tryAcquireTableExclusiveLock( tableName, EventType.C_M_DELETE_FAMILY.toString()); } @Override protected void releaseLock(final MasterProcedureEnv env) { - env.getProcedureQueue().releaseTableWrite(tableName); + env.getProcedureQueue().releaseTableExclusiveLock(tableName); } @Override @@ -436,4 +436,4 @@ public class DeleteColumnFamilyProcedure } return regionInfoList; } -} +} \ No newline at end of file diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java index 2ba7b42..00812ab 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DeleteTableProcedure.java @@ -200,12 +200,12 @@ public class DeleteTableProcedure @Override protected boolean acquireLock(final MasterProcedureEnv env) { if (!env.isInitialized()) return false; - return env.getProcedureQueue().tryAcquireTableWrite(getTableName(), "delete table"); + return env.getProcedureQueue().tryAcquireTableExclusiveLock(getTableName(), "delete table"); } @Override protected void releaseLock(final MasterProcedureEnv env) { - env.getProcedureQueue().releaseTableWrite(getTableName()); + env.getProcedureQueue().releaseTableExclusiveLock(getTableName()); } @Override @@ -407,4 +407,4 @@ public class DeleteTableProcedure throws IOException { ProcedureSyncWait.getMasterQuotaManager(env).removeTableFromNamespaceQuota(tableName); } -} +} \ No newline at end of file diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DisableTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DisableTableProcedure.java index bd8f29e..e34af89 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DisableTableProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/DisableTableProcedure.java @@ -214,14 +214,14 @@ public class DisableTableProcedure @Override protected boolean acquireLock(final MasterProcedureEnv env) { if (!env.isInitialized()) return false; - return env.getProcedureQueue().tryAcquireTableWrite( + return env.getProcedureQueue().tryAcquireTableExclusiveLock( tableName, EventType.C_M_DISABLE_TABLE.toString()); } @Override protected void releaseLock(final MasterProcedureEnv env) { - env.getProcedureQueue().releaseTableWrite(tableName); + env.getProcedureQueue().releaseTableExclusiveLock(tableName); } @Override @@ -559,4 +559,4 @@ public class DisableTableProcedure return regions != null && regions.isEmpty(); } } -} +} \ No newline at end of file diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/EnableTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/EnableTableProcedure.java index 989e81a..37dd271 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/EnableTableProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/EnableTableProcedure.java @@ -94,9 +94,9 @@ public class EnableTableProcedure /** * Constructor * @param env MasterProcedureEnv - * @throws IOException * @param tableName the table to operate on * @param skipTableStateCheck whether to check table state + * @throws IOException */ public EnableTableProcedure( final MasterProcedureEnv env, @@ -238,14 +238,14 @@ public class EnableTableProcedure @Override protected boolean acquireLock(final MasterProcedureEnv env) { if (!env.isInitialized()) return false; - return env.getProcedureQueue().tryAcquireTableWrite( + return env.getProcedureQueue().tryAcquireTableExclusiveLock( tableName, EventType.C_M_ENABLE_TABLE.toString()); } @Override protected void releaseLock(final MasterProcedureEnv env) { - env.getProcedureQueue().releaseTableWrite(tableName); + env.getProcedureQueue().releaseTableExclusiveLock(tableName); } @Override diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureEnv.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureEnv.java index 0a33cd4..f2f4bf3 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureEnv.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureEnv.java @@ -120,4 +120,4 @@ public class MasterProcedureEnv { public boolean isInitialized() { return master.isInitialized(); } -} +} \ No newline at end of file diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureQueue.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureQueue.java index 0dd0c3d..af9eecf 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureQueue.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureQueue.java @@ -27,6 +27,7 @@ import java.util.concurrent.locks.ReentrantLock; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.TableExistsException; import org.apache.hadoop.hbase.TableNotFoundException; @@ -43,11 +44,12 @@ import org.apache.hadoop.hbase.master.procedure.TableProcedureInterface.TableOpe * ProcedureRunnableSet for the Master Procedures. * This RunnableSet tries to provide to the ProcedureExecutor procedures * that can be executed without having to wait on a lock. - * Most of the master operations can be executed concurrently, if the they + * Most of the master operations can be executed concurrently, if they * are operating on different tables (e.g. two create table can be performed - * at the same, time assuming table A and table B). + * at the same, time assuming table A and table B) or against two different servers; say + * two servers that crashed at about the same time. * - * Each procedure should implement an interface providing information for this queue. + *

    Each procedure should implement an interface providing information for this queue. * for example table related procedures should implement TableProcedureInterface. * each procedure will be pushed in its own queue, and based on the operation type * we may take smarter decision. e.g. we can abort all the operations preceding @@ -58,7 +60,18 @@ import org.apache.hadoop.hbase.master.procedure.TableProcedureInterface.TableOpe public class MasterProcedureQueue implements ProcedureRunnableSet { private static final Log LOG = LogFactory.getLog(MasterProcedureQueue.class); - private final ProcedureFairRunQueues fairq; + // Two queues to ensure that server procedures run ahead of table precedures always. + private final ProcedureFairRunQueues tableFairQ; + /** + * Rely on basic fair q. ServerCrashProcedure will yield if meta is not assigned. This way, the + * server that was carrying meta should rise to the top of the queue (this is how it used to + * work when we had handlers and ServerShutdownHandler ran). TODO: special handling of servers + * that were carrying system tables on crash; do I need to have these servers have priority? + * + *

    Apart from the special-casing of meta and system tables, fairq is what we want + */ + private final ProcedureFairRunQueues serverFairQ; + private final ReentrantLock lock = new ReentrantLock(); private final Condition waitCond = lock.newCondition(); private final TableLockManager lockManager; @@ -66,11 +79,16 @@ public class MasterProcedureQueue implements ProcedureRunnableSet { private final int metaTablePriority; private final int userTablePriority; private final int sysTablePriority; + private static final int DEFAULT_SERVER_PRIORITY = 1; + /** + * Keeps count across server and table queues. + */ private int queueSize; public MasterProcedureQueue(final Configuration conf, final TableLockManager lockManager) { - this.fairq = new ProcedureFairRunQueues(1); + this.tableFairQ = new ProcedureFairRunQueues(1); + this.serverFairQ = new ProcedureFairRunQueues(1); this.lockManager = lockManager; // TODO: should this be part of the HTD? @@ -105,12 +123,13 @@ public class MasterProcedureQueue implements ProcedureRunnableSet { @Override public void yield(final Procedure proc) { - addFront(proc); + addBack(proc); } @Override @edu.umd.cs.findbugs.annotations.SuppressWarnings("WA_AWAIT_NOT_IN_LOOP") public Long poll() { + Long pollResult = null; lock.lock(); try { if (queueSize == 0) { @@ -119,19 +138,25 @@ public class MasterProcedureQueue implements ProcedureRunnableSet { return null; } } - - RunQueue queue = fairq.poll(); - if (queue != null && queue.isAvailable()) { - queueSize--; - return queue.poll(); + // For now, let server handling have precedence over table handling; presumption is that it + // is more important handling crashed servers than it is running the + // enabling/disabling tables, etc. + pollResult = doPoll(serverFairQ.poll()); + if (pollResult == null) { + pollResult = doPoll(tableFairQ.poll()); } } catch (InterruptedException e) { Thread.currentThread().interrupt(); - return null; } finally { lock.unlock(); } - return null; + return pollResult; + } + + private Long doPoll(final RunQueue rq) { + if (rq == null || !rq.isAvailable()) return null; + this.queueSize--; + return rq.poll(); } @Override @@ -148,7 +173,8 @@ public class MasterProcedureQueue implements ProcedureRunnableSet { public void clear() { lock.lock(); try { - fairq.clear(); + serverFairQ.clear(); + tableFairQ.clear(); queueSize = 0; } finally { lock.unlock(); @@ -169,7 +195,8 @@ public class MasterProcedureQueue implements ProcedureRunnableSet { public String toString() { lock.lock(); try { - return "MasterProcedureQueue size=" + queueSize + ": " + fairq; + return "MasterProcedureQueue size=" + queueSize + ": tableFairQ: " + tableFairQ + + ", serverFairQ: " + serverFairQ; } finally { lock.unlock(); } @@ -197,6 +224,7 @@ public class MasterProcedureQueue implements ProcedureRunnableSet { markTableAsDeleted(iProcTable.getTableName()); } } + // No cleanup for ServerProcedureInterface types, yet. } private RunQueue getRunQueueOrCreate(final Procedure proc) { @@ -204,17 +232,26 @@ public class MasterProcedureQueue implements ProcedureRunnableSet { final TableName table = ((TableProcedureInterface)proc).getTableName(); return getRunQueueOrCreate(table); } - // TODO: at the moment we only have Table procedures - // if you are implementing a non-table procedure, you have two option create - // a group for all the non-table procedures or try to find a key for your - // non-table procedure and implement something similar to the TableRunQueue. + if (proc instanceof ServerProcedureInterface) { + return getRunQueueOrCreate((ServerProcedureInterface)proc); + } + // TODO: at the moment we only have Table and Server procedures + // if you are implementing a non-table/non-server procedure, you have two options: create + // a group for all the non-table/non-server procedures or try to find a key for your + // non-table/non-server procedures and implement something similar to the TableRunQueue. throw new UnsupportedOperationException("RQs for non-table procedures are not implemented yet"); } private TableRunQueue getRunQueueOrCreate(final TableName table) { final TableRunQueue queue = getRunQueue(table); if (queue != null) return queue; - return (TableRunQueue)fairq.add(table, createTableRunQueue(table)); + return (TableRunQueue)tableFairQ.add(table, createTableRunQueue(table)); + } + + private ServerRunQueue getRunQueueOrCreate(final ServerProcedureInterface spi) { + final ServerRunQueue queue = getRunQueue(spi.getServerName()); + if (queue != null) return queue; + return (ServerRunQueue)serverFairQ.add(spi.getServerName(), createServerRunQueue(spi)); } private TableRunQueue createTableRunQueue(final TableName table) { @@ -227,8 +264,35 @@ public class MasterProcedureQueue implements ProcedureRunnableSet { return new TableRunQueue(priority); } + private ServerRunQueue createServerRunQueue(final ServerProcedureInterface spi) { + return new ServerRunQueue(DEFAULT_SERVER_PRIORITY); + } + private TableRunQueue getRunQueue(final TableName table) { - return (TableRunQueue)fairq.get(table); + return (TableRunQueue)tableFairQ.get(table); + } + + private ServerRunQueue getRunQueue(final ServerName sn) { + return (ServerRunQueue)serverFairQ.get(sn); + } + + /** + * Try to acquire the write lock on the specified table. + * other operations in the table-queue will be executed after the lock is released. + * @param table Table to lock + * @param purpose Human readable reason for locking the table + * @return true if we were able to acquire the lock on the table, otherwise false. + */ + public boolean tryAcquireTableExclusiveLock(final TableName table, final String purpose) { + return getRunQueueOrCreate(table).tryExclusiveLock(lockManager, table, purpose); + } + + /** + * Release the write lock taken with tryAcquireTableWrite() + * @param table the name of the table that has the write lock + */ + public void releaseTableExclusiveLock(final TableName table) { + getRunQueue(table).releaseExclusiveLock(lockManager, table); } /** @@ -239,35 +303,54 @@ public class MasterProcedureQueue implements ProcedureRunnableSet { * @param purpose Human readable reason for locking the table * @return true if we were able to acquire the lock on the table, otherwise false. */ - public boolean tryAcquireTableRead(final TableName table, final String purpose) { - return getRunQueueOrCreate(table).tryRead(lockManager, table, purpose); + public boolean tryAcquireTableSharedLock(final TableName table, final String purpose) { + return getRunQueueOrCreate(table).trySharedLock(lockManager, table, purpose); } /** * Release the read lock taken with tryAcquireTableRead() * @param table the name of the table that has the read lock */ - public void releaseTableRead(final TableName table) { - getRunQueue(table).releaseRead(lockManager, table); + public void releaseTableSharedLock(final TableName table) { + getRunQueue(table).releaseSharedLock(lockManager, table); } /** - * Try to acquire the write lock on the specified table. - * other operations in the table-queue will be executed after the lock is released. - * @param table Table to lock - * @param purpose Human readable reason for locking the table - * @return true if we were able to acquire the lock on the table, otherwise false. + * Try to acquire the write lock on the specified server. + * @see #releaseServerExclusiveLock(ServerProcedureInterface) + * @param spi Server to lock + * @return true if we were able to acquire the lock on the server, otherwise false. */ - public boolean tryAcquireTableWrite(final TableName table, final String purpose) { - return getRunQueueOrCreate(table).tryWrite(lockManager, table, purpose); + public boolean tryAcquireServerExclusiveLock(final ServerProcedureInterface spi) { + return getRunQueueOrCreate(spi).tryExclusiveLock(); } /** - * Release the write lock taken with tryAcquireTableWrite() - * @param table the name of the table that has the write lock + * Release the write lock + * @see #tryAcquireServerExclusiveLock(ServerProcedureInterface) + * @param spi the server that has the write lock */ - public void releaseTableWrite(final TableName table) { - getRunQueue(table).releaseWrite(lockManager, table); + public void releaseServerExclusiveLock(final ServerProcedureInterface spi) { + getRunQueue(spi.getServerName()).releaseExclusiveLock(); + } + + /** + * Try to acquire the read lock on the specified server. + * @see #releaseServerSharedLock(ServerProcedureInterface) + * @param spi Server to lock + * @return true if we were able to acquire the lock on the server, otherwise false. + */ + public boolean tryAcquireServerSharedLock(final ServerProcedureInterface spi) { + return getRunQueueOrCreate(spi).trySharedLock(); + } + + /** + * Release the read lock taken + * @see #tryAcquireServerSharedLock(ServerProcedureInterface) + * @param spi the server that has the read lock + */ + public void releaseServerSharedLock(final ServerProcedureInterface spi) { + getRunQueue(spi.getServerName()).releaseSharedLock(); } /** @@ -284,7 +367,7 @@ public class MasterProcedureQueue implements ProcedureRunnableSet { lock.lock(); try { if (queue.isEmpty() && !queue.isLocked()) { - fairq.remove(table); + tableFairQ.remove(table); // Remove the table lock try { @@ -311,114 +394,167 @@ public class MasterProcedureQueue implements ProcedureRunnableSet { } /** - * Run Queue for a Table. It contains a read-write lock that is used by the - * MasterProcedureQueue to decide if we should fetch an item from this queue - * or skip to another one which will be able to run without waiting for locks. + * Base abstract class for RunQueue implementations. + * Be careful honoring synchronizations in subclasses. In here we protect access but if you are + * acting on a state found in here, be sure dependent code keeps synchronization. + * Implements basic in-memory read/write locking mechanism to prevent procedure steps being run + * in parallel. */ - private static class TableRunQueue implements RunQueue { + private static abstract class AbstractRunQueue implements RunQueue { + // All modification of runnables happens with #lock held. private final Deque runnables = new ArrayDeque(); private final int priority; + private boolean exclusiveLock = false; + private int sharedLock = 0; - private TableLock tableLock = null; - private boolean wlock = false; - private int rlock = 0; - - public TableRunQueue(int priority) { + public AbstractRunQueue(int priority) { this.priority = priority; } + boolean isEmpty() { + return this.runnables.isEmpty(); + } + @Override - public void addFront(final Procedure proc) { - runnables.addFirst(proc.getProcId()); + public boolean isAvailable() { + synchronized (this) { + return !exclusiveLock && !runnables.isEmpty(); + } } - // TODO: Improve run-queue push with TableProcedureInterface.getType() - // we can take smart decisions based on the type of the operation (e.g. create/delete) @Override - public void addBack(final Procedure proc) { - runnables.addLast(proc.getProcId()); + public int getPriority() { + return this.priority; + } + + @Override + public void addFront(Procedure proc) { + this.runnables.addFirst(proc.getProcId()); + } + + @Override + public void addBack(Procedure proc) { + this.runnables.addLast(proc.getProcId()); } @Override public Long poll() { - return runnables.poll(); + return this.runnables.poll(); } @Override - public boolean isAvailable() { - synchronized (this) { - return !wlock && !runnables.isEmpty(); - } + public synchronized boolean isLocked() { + return isExclusiveLock() || sharedLock > 0; + } + + public synchronized boolean isExclusiveLock() { + return this.exclusiveLock; + } + + public synchronized boolean trySharedLock() { + if (isExclusiveLock()) return false; + sharedLock++; + return true; + } + + public synchronized void releaseSharedLock() { + sharedLock--; } - public boolean isEmpty() { - return runnables.isEmpty(); + /** + * @return True if only one instance of a shared lock outstanding. + */ + synchronized boolean isSingleSharedLock() { + return sharedLock == 1; } + public synchronized boolean tryExclusiveLock() { + if (isLocked()) return false; + exclusiveLock = true; + return true; + } + + public synchronized void releaseExclusiveLock() { + exclusiveLock = false; + } + @Override - public boolean isLocked() { - synchronized (this) { - return wlock || rlock > 0; - } + public String toString() { + return this.runnables.toString(); } + } - public boolean tryRead(final TableLockManager lockManager, - final TableName tableName, final String purpose) { - synchronized (this) { - if (wlock) { - return false; - } + /** + * Run Queue for Server procedures. + */ + private static class ServerRunQueue extends AbstractRunQueue { + public ServerRunQueue(int priority) { + super(priority); + } + } - // Take zk-read-lock - tableLock = lockManager.readLock(tableName, purpose); - try { - tableLock.acquire(); - } catch (IOException e) { - LOG.error("failed acquire read lock on " + tableName, e); - tableLock = null; - return false; - } + /** + * Run Queue for a Table. It contains a read-write lock that is used by the + * MasterProcedureQueue to decide if we should fetch an item from this queue + * or skip to another one which will be able to run without waiting for locks. + */ + private static class TableRunQueue extends AbstractRunQueue { + private TableLock tableLock = null; - rlock++; + public TableRunQueue(int priority) { + super(priority); + } + + // TODO: Improve run-queue push with TableProcedureInterface.getType() + // we can take smart decisions based on the type of the operation (e.g. create/delete) + @Override + public void addBack(final Procedure proc) { + super.addBack(proc); + } + + public synchronized boolean trySharedLock(final TableLockManager lockManager, + final TableName tableName, final String purpose) { + if (isExclusiveLock()) return false; + + // Take zk-read-lock + tableLock = lockManager.readLock(tableName, purpose); + try { + tableLock.acquire(); + } catch (IOException e) { + LOG.error("failed acquire read lock on " + tableName, e); + tableLock = null; + return false; } + trySharedLock(); return true; } - public void releaseRead(final TableLockManager lockManager, + public synchronized void releaseSharedLock(final TableLockManager lockManager, final TableName tableName) { - synchronized (this) { - releaseTableLock(lockManager, rlock == 1); - rlock--; - } + releaseTableLock(lockManager, isSingleSharedLock()); + releaseSharedLock(); } - public boolean tryWrite(final TableLockManager lockManager, + public synchronized boolean tryExclusiveLock(final TableLockManager lockManager, final TableName tableName, final String purpose) { - synchronized (this) { - if (wlock || rlock > 0) { - return false; - } - - // Take zk-write-lock - tableLock = lockManager.writeLock(tableName, purpose); - try { - tableLock.acquire(); - } catch (IOException e) { - LOG.error("failed acquire write lock on " + tableName, e); - tableLock = null; - return false; - } - wlock = true; + if (isLocked()) return false; + // Take zk-write-lock + tableLock = lockManager.writeLock(tableName, purpose); + try { + tableLock.acquire(); + } catch (IOException e) { + LOG.error("failed acquire write lock on " + tableName, e); + tableLock = null; + return false; } + tryExclusiveLock(); return true; } - public void releaseWrite(final TableLockManager lockManager, + public synchronized void releaseExclusiveLock(final TableLockManager lockManager, final TableName tableName) { - synchronized (this) { - releaseTableLock(lockManager, true); - wlock = false; - } + releaseTableLock(lockManager, true); + releaseExclusiveLock(); } private void releaseTableLock(final TableLockManager lockManager, boolean reset) { @@ -434,15 +570,5 @@ public class MasterProcedureQueue implements ProcedureRunnableSet { } } } - - @Override - public int getPriority() { - return priority; - } - - @Override - public String toString() { - return runnables.toString(); - } } -} +} \ No newline at end of file diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ModifyColumnFamilyProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ModifyColumnFamilyProcedure.java index 3de5202..10ad91a 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ModifyColumnFamilyProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ModifyColumnFamilyProcedure.java @@ -182,14 +182,14 @@ public class ModifyColumnFamilyProcedure @Override protected boolean acquireLock(final MasterProcedureEnv env) { if (!env.isInitialized()) return false; - return env.getProcedureQueue().tryAcquireTableWrite( + return env.getProcedureQueue().tryAcquireTableExclusiveLock( tableName, EventType.C_M_MODIFY_FAMILY.toString()); } @Override protected void releaseLock(final MasterProcedureEnv env) { - env.getProcedureQueue().releaseTableWrite(tableName); + env.getProcedureQueue().releaseTableExclusiveLock(tableName); } @Override @@ -379,4 +379,4 @@ public class ModifyColumnFamilyProcedure }); } } -} +} \ No newline at end of file diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ModifyTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ModifyTableProcedure.java index e9636e6..4e73e77 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ModifyTableProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ModifyTableProcedure.java @@ -214,14 +214,14 @@ public class ModifyTableProcedure @Override protected boolean acquireLock(final MasterProcedureEnv env) { if (!env.isInitialized()) return false; - return env.getProcedureQueue().tryAcquireTableWrite( + return env.getProcedureQueue().tryAcquireTableExclusiveLock( getTableName(), EventType.C_M_MODIFY_TABLE.toString()); } @Override protected void releaseLock(final MasterProcedureEnv env) { - env.getProcedureQueue().releaseTableWrite(getTableName()); + env.getProcedureQueue().releaseTableExclusiveLock(getTableName()); } @Override @@ -507,4 +507,4 @@ public class ModifyTableProcedure } return regionInfoList; } -} +} \ No newline at end of file diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java new file mode 100644 index 0000000..9d88d42 --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerCrashProcedure.java @@ -0,0 +1,763 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.procedure; + +import java.io.IOException; +import java.io.InputStream; +import java.io.InterruptedIOException; +import java.io.OutputStream; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashSet; +import java.util.List; +import java.util.Set; +import java.util.concurrent.locks.Lock; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.hbase.HConstants; +import org.apache.hadoop.hbase.HRegionInfo; +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.client.ClusterConnection; +import org.apache.hadoop.hbase.client.RegionReplicaUtil; +import org.apache.hadoop.hbase.master.AssignmentManager; +import org.apache.hadoop.hbase.master.MasterFileSystem; +import org.apache.hadoop.hbase.master.MasterServices; +import org.apache.hadoop.hbase.master.RegionState; +import org.apache.hadoop.hbase.master.RegionStates; +import org.apache.hadoop.hbase.procedure2.ProcedureYieldException; +import org.apache.hadoop.hbase.procedure2.StateMachineProcedure; +import org.apache.hadoop.hbase.protobuf.ProtobufUtil; +import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.RegionInfo; +import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos; +import org.apache.hadoop.hbase.protobuf.generated.MasterProcedureProtos.ServerCrashState; +import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos; +import org.apache.hadoop.hbase.protobuf.generated.ZooKeeperProtos.SplitLogTask.RecoveryMode; +import org.apache.hadoop.hbase.zookeeper.MetaTableLocator; +import org.apache.hadoop.hbase.zookeeper.ZKAssign; +import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; +import org.apache.hadoop.util.StringUtils; +import org.apache.zookeeper.KeeperException; + +/** + * Handle crashed server. This is a port to ProcedureV2 of what used to be euphemistically called + * ServerShutdownHandler. + * + *

    The procedure flow varies dependent on whether meta is assigned, if we are + * doing distributed log replay versus distributed log splitting, and if we are to split logs at + * all. + * + *

    This procedure asks that all crashed servers get processed equally; we yield after the + * completion of each successful flow step. We do this so that we do not 'deadlock' waiting on + * a region assignment so we can replay edits which could happen if a region moved there are edits + * on two servers for replay. + * + *

    TODO: ASSIGN and WAIT_ON_ASSIGN (at least) are not idempotent. Revisit when assign is pv2. + * TODO: We do not have special handling for system tables. + */ +public class ServerCrashProcedure +extends StateMachineProcedure +implements ServerProcedureInterface { + private static final Log LOG = LogFactory.getLog(ServerCrashProcedure.class); + + /** + * Configuration key to set how long to wait in ms doing a quick check on meta state. + */ + public static final String KEY_SHORT_WAIT_ON_META = + "hbase.master.servercrash.short.wait.on.meta.ms"; + + public static final int DEFAULT_SHORT_WAIT_ON_META = 1000; + + /** + * Configuration key to set how many retries to cycle before we give up on meta. + * Each attempt will wait at least {@link #KEY_SHORT_WAIT_ON_META} milliseconds. + */ + public static final String KEY_RETRIES_ON_META = + "hbase.master.servercrash.meta.retries"; + + public static final int DEFAULT_RETRIES_ON_META = 10; + + /** + * Configuration key to set how long to wait in ms on regions in transition. + */ + public static final String KEY_WAIT_ON_RIT = + "hbase.master.servercrash.wait.on.rit.ms"; + + public static final int DEFAULT_WAIT_ON_RIT = 30000; + + private static final Set META_REGION_SET = new HashSet(); + static { + META_REGION_SET.add(HRegionInfo.FIRST_META_REGIONINFO); + } + + /** + * Name of the crashed server to process. + */ + private ServerName serverName; + + /** + * Regions that were on the crashed server. + */ + private Set regionsOnCrashedServer; + + /** + * Regions to assign. Usually some subset of {@link #regionsOnCrashedServer} + */ + private List regionsToAssign; + + private boolean distributedLogReplay = false; + private boolean carryingMeta = false; + private boolean shouldSplitWal; + + /** + * Cycles on same state. Good for figuring if we are stuck. + */ + private int cycles = 0; + + /** + * Ordinal of the previous state. So we can tell if we are progressing or not. TODO: if useful, + * move this back up into StateMachineProcedure + */ + private int previousState; + + /** + * Call this constructor queuing up a Procedure. + * @param serverName Name of the crashed server. + * @param shouldSplitWal True if we should split WALs as part of crashed server processing. + * @param carryingMeta True if carrying hbase:meta table region. + */ + public ServerCrashProcedure(final ServerName serverName, + final boolean shouldSplitWal, final boolean carryingMeta) { + this.serverName = serverName; + this.shouldSplitWal = shouldSplitWal; + this.carryingMeta = carryingMeta; + // Currently not used. + } + + /** + * Used when deserializing from a procedure store; we'll construct one of these then call + * {@link #deserializeStateData(InputStream)}. Do not use directly. + */ + public ServerCrashProcedure() { + super(); + } + + private void throwProcedureYieldException(final String msg) throws ProcedureYieldException { + String logMsg = msg + "; cycle=" + this.cycles + ", running for " + + StringUtils.formatTimeDiff(System.currentTimeMillis(), getStartTime()); + // The procedure executor logs ProcedureYieldException at trace level. For now, log these + // yields for server crash processing at DEBUG. Revisit when stable. + if (LOG.isDebugEnabled()) LOG.debug(logMsg); + throw new ProcedureYieldException(logMsg); + } + + @Override + protected Flow executeFromState(MasterProcedureEnv env, ServerCrashState state) + throws ProcedureYieldException { + if (LOG.isTraceEnabled()) { + LOG.trace(state); + } + // Keep running count of cycles + if (state.ordinal() != this.previousState) { + this.previousState = state.ordinal(); + this.cycles = 0; + } else { + this.cycles++; + } + MasterServices services = env.getMasterServices(); + try { + switch (state) { + case SERVER_CRASH_START: + // Is master fully online? If not, yield. No processing of servers unless master is up + if (!services.getAssignmentManager().isFailoverCleanupDone()) { + throwProcedureYieldException("Waiting on master failover to complete"); + } + LOG.info("Start processing crashed " + this.serverName); + start(env); + // If carrying meta, process it first. Else, get list of regions on crashed server. + if (this.carryingMeta) setNextState(ServerCrashState.SERVER_CRASH_PROCESS_META); + else setNextState(ServerCrashState.SERVER_CRASH_GET_REGIONS); + break; + + case SERVER_CRASH_GET_REGIONS: + // If hbase:meta is not assigned, yield. + if (!isMetaAssignedQuickTest(env)) { + throwProcedureYieldException("Waiting on hbase:meta assignment"); + } + this.regionsOnCrashedServer = + services.getAssignmentManager().getRegionStates().getServerRegions(this.serverName); + // Where to go next? Depends on whether we should split logs at all or if we should do + // distributed log splitting (DLS) vs distributed log replay (DLR). + if (!this.shouldSplitWal) { + setNextState(ServerCrashState.SERVER_CRASH_CALC_REGIONS_TO_ASSIGN); + } else if (this.distributedLogReplay) { + setNextState(ServerCrashState.SERVER_CRASH_PREPARE_LOG_REPLAY); + } else { + setNextState(ServerCrashState.SERVER_CRASH_SPLIT_LOGS); + } + break; + + case SERVER_CRASH_PROCESS_META: + // If we fail processing hbase:meta, yield. + if (!processMeta(env)) { + throwProcedureYieldException("Waiting on regions-in-transition to clear"); + } + setNextState(ServerCrashState.SERVER_CRASH_GET_REGIONS); + break; + + case SERVER_CRASH_PREPARE_LOG_REPLAY: + prepareLogReplay(env, this.regionsOnCrashedServer); + setNextState(ServerCrashState.SERVER_CRASH_CALC_REGIONS_TO_ASSIGN); + break; + + case SERVER_CRASH_SPLIT_LOGS: + splitLogs(env); + // If DLR, go to FINISH. Otherwise, if DLS, go to SERVER_CRASH_CALC_REGIONS_TO_ASSIGN + if (this.distributedLogReplay) setNextState(ServerCrashState.SERVER_CRASH_FINISH); + else setNextState(ServerCrashState.SERVER_CRASH_CALC_REGIONS_TO_ASSIGN); + break; + + case SERVER_CRASH_CALC_REGIONS_TO_ASSIGN: + this.regionsToAssign = calcRegionsToAssign(env); + setNextState(ServerCrashState.SERVER_CRASH_ASSIGN); + break; + + case SERVER_CRASH_ASSIGN: + // Assign may not be idempotent. SSH used to requeue the SSH if we got an IOE assigning + // which is what we are mimicing here but it looks prone to double assignment if assign + // fails midway. TODO: Test. + + // If no regions to assign, skip assign and skip to the finish. + boolean regions = this.regionsToAssign != null && !this.regionsToAssign.isEmpty(); + if (regions) { + if (!assign(env, this.regionsToAssign)) { + throwProcedureYieldException("Failed assign; will retry"); + } + } + if (regions && this.shouldSplitWal && distributedLogReplay) { + setNextState(ServerCrashState.SERVER_CRASH_WAIT_ON_ASSIGN); + } else { + setNextState(ServerCrashState.SERVER_CRASH_FINISH); + } + break; + + case SERVER_CRASH_WAIT_ON_ASSIGN: + // TODO: The list of regionsToAssign may be more than we actually assigned. See down in + // AM #1629 around 'if (regionStates.wasRegionOnDeadServer(encodedName)) {' where where we + // will skip assigning a region because it is/was on a dead server. Should never happen! + // It was on this server. Worst comes to worst, we'll still wait here till other server is + // processed. + + // If the wait on assign failed, yield -- if we have regions to assign. + if (this.regionsToAssign != null && !this.regionsToAssign.isEmpty()) { + if (!waitOnAssign(env, this.regionsToAssign)) { + throwProcedureYieldException("Waiting on region assign"); + } + } + setNextState(ServerCrashState.SERVER_CRASH_SPLIT_LOGS); + break; + + case SERVER_CRASH_FINISH: + LOG.info("Finished processing of crashed " + serverName); + services.getServerManager().getDeadServers().finish(serverName); + return Flow.NO_MORE_STATE; + + default: + throw new UnsupportedOperationException("unhandled state=" + state); + } + } catch (IOException e) { + LOG.warn("Failed serverName=" + this.serverName + ", state=" + state + "; retry", e); + } catch (InterruptedException e) { + // TODO: Make executor allow IEs coming up out of execute. + LOG.warn("Interrupted serverName=" + this.serverName + ", state=" + state + "; retry", e); + Thread.currentThread().interrupt(); + } + return Flow.HAS_MORE_STATE; + } + + /** + * Start processing of crashed server. In here we'll just set configs. and return. + * @param env + * @throws IOException + */ + private void start(final MasterProcedureEnv env) throws IOException { + MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem(); + // Set recovery mode late. This is what the old ServerShutdownHandler used do. + mfs.setLogRecoveryMode(); + this.distributedLogReplay = mfs.getLogRecoveryMode() == RecoveryMode.LOG_REPLAY; + } + + /** + * @param env + * @return False if we fail to assign and split logs on meta ('process'). + * @throws IOException + * @throws InterruptedException + */ + private boolean processMeta(final MasterProcedureEnv env) + throws IOException { + if (LOG.isDebugEnabled()) LOG.debug("Processing hbase:meta that was on " + this.serverName); + MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem(); + AssignmentManager am = env.getMasterServices().getAssignmentManager(); + HRegionInfo metaHRI = HRegionInfo.FIRST_META_REGIONINFO; + if (this.shouldSplitWal) { + if (this.distributedLogReplay) { + prepareLogReplay(env, META_REGION_SET); + } else { + // TODO: Matteo. We BLOCK here but most important thing to be doing at this moment. + mfs.splitMetaLog(serverName); + am.getRegionStates().logSplit(metaHRI); + } + } + + // Assign meta if still carrying it. Check again: region may be assigned because of RIT timeout + boolean processed = true; + if (am.isCarryingMeta(serverName)) { + // TODO: May block here if hard time figuring state of meta. + am.regionOffline(HRegionInfo.FIRST_META_REGIONINFO); + verifyAndAssignMetaWithRetries(env); + if (this.shouldSplitWal && distributedLogReplay) { + int timeout = env.getMasterConfiguration().getInt(KEY_WAIT_ON_RIT, DEFAULT_WAIT_ON_RIT); + if (!waitOnRegionToClearRegionsInTransition(am, metaHRI, timeout)) { + processed = false; + } else { + // TODO: Matteo. We BLOCK here but most important thing to be doing at this moment. + mfs.splitMetaLog(serverName); + } + } + } + return processed; + } + + /** + * @return True if region cleared RIT, else false if we timed out waiting. + * @throws InterruptedIOException + */ + private boolean waitOnRegionToClearRegionsInTransition(AssignmentManager am, + final HRegionInfo hri, final int timeout) + throws InterruptedIOException { + try { + if (!am.waitOnRegionToClearRegionsInTransition(hri, timeout)) { + // Wait here is to avoid log replay hits current dead server and incur a RPC timeout + // when replay happens before region assignment completes. + LOG.warn("Region " + hri.getEncodedName() + " didn't complete assignment in time"); + return false; + } + } catch (InterruptedException ie) { + throw new InterruptedIOException("Caught " + ie + + " during waitOnRegionToClearRegionsInTransition for " + hri); + } + return true; + } + + private void prepareLogReplay(final MasterProcedureEnv env, final Set regions) + throws IOException { + if (LOG.isDebugEnabled()) { + LOG.debug("Mark " + size(this.regionsOnCrashedServer) + + " regions-in-recovery from " + this.serverName); + } + MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem(); + AssignmentManager am = env.getMasterServices().getAssignmentManager(); + mfs.prepareLogReplay(this.serverName, regions); + am.getRegionStates().logSplit(this.serverName); + } + + private void splitLogs(final MasterProcedureEnv env) throws IOException { + if (LOG.isDebugEnabled()) { + LOG.debug("Splitting logs from " + serverName + "; region count=" + + size(this.regionsOnCrashedServer)); + } + MasterFileSystem mfs = env.getMasterServices().getMasterFileSystem(); + AssignmentManager am = env.getMasterServices().getAssignmentManager(); + // TODO: For Matteo. Below BLOCKs!!!! Redo so can relinquish executor while it is running. + mfs.splitLog(this.serverName); + am.getRegionStates().logSplit(this.serverName); + } + + static int size(final Collection hris) { + return hris == null? 0: hris.size(); + } + + /** + * Figure out what we need to assign. Should be idempotent. + * @param env + * @return List of calculated regions to assign; may be empty or null. + * @throws IOException + */ + private List calcRegionsToAssign(final MasterProcedureEnv env) + throws IOException { + if (this.regionsOnCrashedServer == null || this.regionsOnCrashedServer.isEmpty()) return null; + AssignmentManager am = env.getMasterServices().getAssignmentManager(); + List regionsToAssignAggregator = new ArrayList(); + int replicaCount = env.getMasterConfiguration().getInt(HConstants.META_REPLICAS_NUM, + HConstants.DEFAULT_META_REPLICA_NUM); + for (int i = 1; i < replicaCount; i++) { + HRegionInfo metaHri = + RegionReplicaUtil.getRegionInfoForReplica(HRegionInfo.FIRST_META_REGIONINFO, i); + if (am.isCarryingMetaReplica(this.serverName, metaHri)) { + if (LOG.isDebugEnabled()) { + LOG.debug("Reassigning meta replica" + metaHri + " that was on " + this.serverName); + } + regionsToAssignAggregator.add(metaHri); + } + } + // Clean out anything in regions in transition. + List regionsInTransition = am.cleanOutCrashedServerReferences(serverName); + if (LOG.isDebugEnabled()) { + LOG.debug("Reassigning " + size(this.regionsOnCrashedServer) + + " region(s) that " + (serverName == null? "null": serverName) + + " was carrying (and " + regionsInTransition.size() + + " regions(s) that were opening on this server)"); + } + regionsToAssignAggregator.addAll(regionsInTransition); + + // Iterate regions that were on this server and figure which of these we need to reassign + if (this.regionsOnCrashedServer != null && !this.regionsOnCrashedServer.isEmpty()) { + RegionStates regionStates = am.getRegionStates(); + for (HRegionInfo hri: this.regionsOnCrashedServer) { + if (regionsInTransition.contains(hri)) continue; + String encodedName = hri.getEncodedName(); + Lock lock = am.acquireRegionLock(encodedName); + try { + RegionState rit = regionStates.getRegionTransitionState(hri); + if (processDeadRegion(hri, am)) { + ServerName addressFromAM = regionStates.getRegionServerOfRegion(hri); + if (addressFromAM != null && !addressFromAM.equals(this.serverName)) { + // If this region is in transition on the dead server, it must be + // opening or pending_open, which should have been covered by + // AM#cleanOutCrashedServerReferences + LOG.info("Skip assigning region " + hri.getRegionNameAsString() + + " because it has been opened in " + addressFromAM.getServerName()); + continue; + } + if (rit != null) { + if (rit.getServerName() != null && !rit.isOnServer(this.serverName)) { + // Skip regions that are in transition on other server + LOG.info("Skip assigning region in transition on other server" + rit); + continue; + } + LOG.info("Reassigning region " + rit + " and clearing zknode if exists"); + try { + // This clears out any RIT that might be sticking around. + ZKAssign.deleteNodeFailSilent(env.getMasterServices().getZooKeeper(), hri); + } catch (KeeperException e) { + // TODO: FIX!!!! ABORTING SERVER BECAUSE COULDN"T PURGE ZNODE. This is what we + // used to do but that doesn't make it right!!! + env.getMasterServices().abort("Unexpected error deleting RIT " + hri, e); + throw new IOException(e); + } + regionStates.updateRegionState(hri, RegionState.State.OFFLINE); + } else if (regionStates.isRegionInState( + hri, RegionState.State.SPLITTING_NEW, RegionState.State.MERGING_NEW)) { + regionStates.updateRegionState(hri, RegionState.State.OFFLINE); + } + regionsToAssignAggregator.add(hri); + // TODO: The below else if is different in branch-1 from master branch. + } else if (rit != null) { + if ((rit.isPendingCloseOrClosing() || rit.isOffline()) + && am.getTableStateManager().isTableState(hri.getTable(), + ZooKeeperProtos.Table.State.DISABLED, ZooKeeperProtos.Table.State.DISABLING) || + am.getReplicasToClose().contains(hri)) { + // If the table was partially disabled and the RS went down, we should clear the + // RIT and remove the node for the region. + // The rit that we use may be stale in case the table was in DISABLING state + // but though we did assign we will not be clearing the znode in CLOSING state. + // Doing this will have no harm. See HBASE-5927 + regionStates.updateRegionState(hri, RegionState.State.OFFLINE); + am.deleteClosingOrClosedNode(hri, rit.getServerName()); + am.offlineDisabledRegion(hri); + } else { + LOG.warn("THIS SHOULD NOT HAPPEN: unexpected region in transition " + + rit + " not to be assigned by SSH of server " + serverName); + } + } + } finally { + lock.unlock(); + } + } + } + return regionsToAssignAggregator; + } + + private boolean assign(final MasterProcedureEnv env, final List hris) + throws InterruptedIOException { + AssignmentManager am = env.getMasterServices().getAssignmentManager(); + try { + am.assign(hris); + } catch (InterruptedException ie) { + LOG.error("Caught " + ie + " during round-robin assignment"); + throw (InterruptedIOException)new InterruptedIOException().initCause(ie); + } catch (IOException ioe) { + LOG.info("Caught " + ioe + " during region assignment, will retry"); + return false; + } + return true; + } + + private boolean waitOnAssign(final MasterProcedureEnv env, final List hris) + throws InterruptedIOException { + int timeout = env.getMasterConfiguration().getInt(KEY_WAIT_ON_RIT, DEFAULT_WAIT_ON_RIT); + for (HRegionInfo hri: hris) { + // TODO: Blocks here. + if (!waitOnRegionToClearRegionsInTransition(env.getMasterServices().getAssignmentManager(), + hri, timeout)) { + return false; + } + } + return true; + } + + @Override + protected void rollbackState(MasterProcedureEnv env, ServerCrashState state) + throws IOException { + // Can't rollback. + throw new UnsupportedOperationException("unhandled state=" + state); + } + + @Override + protected ServerCrashState getState(int stateId) { + return ServerCrashState.valueOf(stateId); + } + + @Override + protected int getStateId(ServerCrashState state) { + return state.getNumber(); + } + + @Override + protected ServerCrashState getInitialState() { + return ServerCrashState.SERVER_CRASH_START; + } + + @Override + protected boolean abort(MasterProcedureEnv env) { + // TODO + return false; + } + + @Override + protected boolean acquireLock(final MasterProcedureEnv env) { + if (!env.getMasterServices().isServerCrashProcessingEnabled()) return false; + return env.getProcedureQueue().tryAcquireServerExclusiveLock(this); + } + + @Override + protected void releaseLock(final MasterProcedureEnv env) { + env.getProcedureQueue().releaseServerExclusiveLock(this); + } + + @Override + public void toStringClassDetails(StringBuilder sb) { + sb.append(getClass().getSimpleName()); + sb.append(" serverName="); + sb.append(this.serverName); + sb.append(", shouldSplitWal="); + sb.append(shouldSplitWal); + sb.append(", carryingMeta="); + sb.append(carryingMeta); + } + + @Override + public void serializeStateData(final OutputStream stream) throws IOException { + super.serializeStateData(stream); + + MasterProcedureProtos.ServerCrashStateData.Builder state = + MasterProcedureProtos.ServerCrashStateData.newBuilder(). + setServerName(ProtobufUtil.toServerName(this.serverName)). + setDistributedLogReplay(this.distributedLogReplay). + setCarryingMeta(this.carryingMeta). + setShouldSplitWal(this.shouldSplitWal); + if (this.regionsOnCrashedServer != null && !this.regionsOnCrashedServer.isEmpty()) { + for (HRegionInfo hri: this.regionsOnCrashedServer) { + state.addRegionsOnCrashedServer(HRegionInfo.convert(hri)); + } + } + if (this.regionsToAssign != null && !this.regionsToAssign.isEmpty()) { + for (HRegionInfo hri: this.regionsToAssign) { + state.addRegionsToAssign(HRegionInfo.convert(hri)); + } + } + state.build().writeDelimitedTo(stream); + } + + @Override + public void deserializeStateData(final InputStream stream) throws IOException { + super.deserializeStateData(stream); + + MasterProcedureProtos.ServerCrashStateData state = + MasterProcedureProtos.ServerCrashStateData.parseDelimitedFrom(stream); + this.serverName = ProtobufUtil.toServerName(state.getServerName()); + this.distributedLogReplay = state.hasDistributedLogReplay()? + state.getDistributedLogReplay(): false; + this.carryingMeta = state.hasCarryingMeta()? state.getCarryingMeta(): false; + // shouldSplitWAL has a default over in pb so this invocation will always work. + this.shouldSplitWal = state.getShouldSplitWal(); + int size = state.getRegionsOnCrashedServerCount(); + if (size > 0) { + this.regionsOnCrashedServer = new HashSet(size); + for (RegionInfo ri: state.getRegionsOnCrashedServerList()) { + this.regionsOnCrashedServer.add(HRegionInfo.convert(ri)); + } + } + size = state.getRegionsToAssignCount(); + if (size > 0) { + this.regionsToAssign = new ArrayList(size); + for (RegionInfo ri: state.getRegionsOnCrashedServerList()) { + this.regionsToAssign.add(HRegionInfo.convert(ri)); + } + } + } + + /** + * Process a dead region from a dead RS. Checks if the region is disabled or + * disabling or if the region has a partially completed split. + * @param hri + * @param assignmentManager + * @return Returns true if specified region should be assigned, false if not. + * @throws IOException + */ + private static boolean processDeadRegion(HRegionInfo hri, AssignmentManager assignmentManager) + throws IOException { + boolean tablePresent = assignmentManager.getTableStateManager().isTablePresent(hri.getTable()); + if (!tablePresent) { + LOG.info("The table " + hri.getTable() + " was deleted. Hence not proceeding."); + return false; + } + // If table is not disabled but the region is offlined, + boolean disabled = assignmentManager.getTableStateManager().isTableState(hri.getTable(), + ZooKeeperProtos.Table.State.DISABLED); + if (disabled){ + LOG.info("The table " + hri.getTable() + " was disabled. Hence not proceeding."); + return false; + } + if (hri.isOffline() && hri.isSplit()) { + // HBASE-7721: Split parent and daughters are inserted into hbase:meta as an atomic operation. + // If the meta scanner saw the parent split, then it should see the daughters as assigned + // to the dead server. We don't have to do anything. + return false; + } + boolean disabling = assignmentManager.getTableStateManager().isTableState(hri.getTable(), + ZooKeeperProtos.Table.State.DISABLING); + if (disabling) { + LOG.info("The table " + hri.getTable() + " is disabled. Hence not assigning region" + + hri.getEncodedName()); + return false; + } + return true; + } + + /** + * If hbase:meta is not assigned already, assign. + * @throws IOException + */ + private void verifyAndAssignMetaWithRetries(final MasterProcedureEnv env) throws IOException { + MasterServices services = env.getMasterServices(); + int iTimes = services.getConfiguration().getInt(KEY_RETRIES_ON_META, DEFAULT_RETRIES_ON_META); + // Just reuse same time as we have for short wait on meta. Adding another config is overkill. + long waitTime = + services.getConfiguration().getLong(KEY_SHORT_WAIT_ON_META, DEFAULT_SHORT_WAIT_ON_META); + int iFlag = 0; + while (true) { + try { + verifyAndAssignMeta(env); + break; + } catch (KeeperException e) { + services.abort("In server shutdown processing, assigning meta", e); + throw new IOException("Aborting", e); + } catch (Exception e) { + if (iFlag >= iTimes) { + services.abort("verifyAndAssignMeta failed after" + iTimes + " retries, aborting", e); + throw new IOException("Aborting", e); + } + try { + Thread.sleep(waitTime); + } catch (InterruptedException e1) { + LOG.warn("Interrupted when is the thread sleep", e1); + Thread.currentThread().interrupt(); + throw (InterruptedIOException)new InterruptedIOException().initCause(e1); + } + iFlag++; + } + } + } + + /** + * If hbase:meta is not assigned already, assign. + * @throws InterruptedException + * @throws IOException + * @throws KeeperException + */ + private void verifyAndAssignMeta(final MasterProcedureEnv env) + throws InterruptedException, IOException, KeeperException { + MasterServices services = env.getMasterServices(); + if (!isMetaAssignedQuickTest(env)) { + services.getAssignmentManager().assignMeta(HRegionInfo.FIRST_META_REGIONINFO); + } else if (serverName.equals(services.getMetaTableLocator(). + getMetaRegionLocation(services.getZooKeeper()))) { + throw new IOException("hbase:meta is onlined on the dead server " + this.serverName); + } else { + LOG.info("Skip assigning hbase:meta because it is online at " + + services.getMetaTableLocator().getMetaRegionLocation(services.getZooKeeper())); + } + } + + /** + * A quick test that hbase:meta is assigned; blocks for short time only. + * @return True if hbase:meta location is available and verified as good. + * @throws InterruptedException + * @throws IOException + */ + private boolean isMetaAssignedQuickTest(final MasterProcedureEnv env) + throws InterruptedException, IOException { + ZooKeeperWatcher zkw = env.getMasterServices().getZooKeeper(); + MetaTableLocator mtl = env.getMasterServices().getMetaTableLocator(); + boolean metaAssigned = false; + // Is hbase:meta location available yet? + if (mtl.isLocationAvailable(zkw)) { + ClusterConnection connection = env.getMasterServices().getConnection(); + // Is hbase:meta location good yet? + long timeout = + env.getMasterConfiguration().getLong(KEY_SHORT_WAIT_ON_META, DEFAULT_SHORT_WAIT_ON_META); + if (mtl.verifyMetaRegionLocation(connection, zkw, timeout)) { + metaAssigned = true; + } + } + return metaAssigned; + } + + @Override + public ServerName getServerName() { + return this.serverName; + } + + @Override + public boolean hasMetaTableRegion() { + return this.carryingMeta; + } + + /** + * For this procedure, yield at end of each successful flow step so that all crashed servers + * can make progress rather than do the default which has each procedure running to completion + * before we move to the next. For crashed servers, especially if running with distributed log + * replay, we will want all servers to come along; we do not want the scenario where a server is + * stuck waiting for regions to online so it can replay edits. + */ + @Override + protected boolean isYieldAfterSuccessfulFlowStateStep() { + return true; + } +} diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerProcedureInterface.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerProcedureInterface.java new file mode 100644 index 0000000..5b0c45f --- /dev/null +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/ServerProcedureInterface.java @@ -0,0 +1,40 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hbase.master.procedure; + +import org.apache.hadoop.hbase.ServerName; +import org.apache.hadoop.hbase.classification.InterfaceAudience; +import org.apache.hadoop.hbase.classification.InterfaceStability; + +/** + * Procedures that handle servers -- e.g. server crash -- must implement this Interface. + * It is used by the procedure runner to figure locking and what queuing. + */ +@InterfaceAudience.Private +@InterfaceStability.Evolving +public interface ServerProcedureInterface { + /** + * @return Name of this server instance. + */ + ServerName getServerName(); + + /** + * @return True if this server has an hbase:meta table region. + */ + boolean hasMetaTableRegion(); +} \ No newline at end of file diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TableProcedureInterface.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TableProcedureInterface.java index 6928d02..cc088f3 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TableProcedureInterface.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TableProcedureInterface.java @@ -45,4 +45,4 @@ public interface TableProcedureInterface { * @return the operation type that the procedure is executing. */ TableOperationType getTableOperationType(); -} +} \ No newline at end of file diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TruncateTableProcedure.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TruncateTableProcedure.java index c69bd8f..0300c89 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TruncateTableProcedure.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/master/procedure/TruncateTableProcedure.java @@ -183,12 +183,12 @@ public class TruncateTableProcedure @Override protected boolean acquireLock(final MasterProcedureEnv env) { if (!env.isInitialized()) return false; - return env.getProcedureQueue().tryAcquireTableWrite(getTableName(), "truncate table"); + return env.getProcedureQueue().tryAcquireTableExclusiveLock(getTableName(), "truncate table"); } @Override protected void releaseLock(final MasterProcedureEnv env) { - env.getProcedureQueue().releaseTableWrite(getTableName()); + env.getProcedureQueue().releaseTableExclusiveLock(getTableName()); } @Override @@ -287,4 +287,4 @@ public class TruncateTableProcedure }); } } -} +} \ No newline at end of file diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java index 22fdc78..94a193f 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java @@ -351,7 +351,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi private boolean disallowWritesInRecovering = false; // when a region is in recovering state, it can only accept writes not reads - private volatile boolean isRecovering = false; + private volatile boolean recovering = false; private volatile Optional configurationManager; @@ -711,7 +711,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi Map recoveringRegions = rsServices.getRecoveringRegions(); String encodedName = getRegionInfo().getEncodedName(); if (recoveringRegions != null && recoveringRegions.containsKey(encodedName)) { - this.isRecovering = true; + this.recovering = true; recoveringRegions.put(encodedName, this); } } else { @@ -841,7 +841,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi // overlaps used sequence numbers if (this.writestate.writesEnabled) { nextSeqid = WALSplitter.writeRegionSequenceIdFile(this.fs.getFileSystem(), this.fs - .getRegionDir(), nextSeqid, (this.isRecovering ? (this.flushPerChanges + 10000000) : 1)); + .getRegionDir(), nextSeqid, (this.recovering ? (this.flushPerChanges + 10000000) : 1)); } else { nextSeqid++; } @@ -1153,7 +1153,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi * Reset recovering state of current region */ public void setRecovering(boolean newState) { - boolean wasRecovering = this.isRecovering; + boolean wasRecovering = this.recovering; // before we flip the recovering switch (enabling reads) we should write the region open // event to WAL if needed if (wal != null && getRegionServerServices() != null && !writestate.readOnly @@ -1194,8 +1194,8 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi } } - this.isRecovering = newState; - if (wasRecovering && !isRecovering) { + this.recovering = newState; + if (wasRecovering && !recovering) { // Call only when wal replay is over. coprocessorHost.postLogReplay(); } @@ -1203,7 +1203,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi @Override public boolean isRecovering() { - return this.isRecovering; + return this.recovering; } @Override @@ -6212,7 +6212,7 @@ public class HRegion implements HeapSize, PropagatingConfigurationObserver, Regi this.openSeqNum = initialize(reporter); this.setSequenceId(openSeqNum); if (wal != null && getRegionServerServices() != null && !writestate.readOnly - && !isRecovering) { + && !recovering) { // Only write the region open event marker to WAL if (1) we are not read-only // (2) dist log replay is off or we are not recovering. In case region is // recovering, the open event will be written at setRecovering(false) diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java index 0fffcc6..1360fb2 100644 --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/util/FSHDFSUtils.java @@ -170,7 +170,7 @@ public class FSHDFSUtils extends FSUtils { boolean recoverDFSFileLease(final DistributedFileSystem dfs, final Path p, final Configuration conf, final CancelableProgressable reporter) throws IOException { - LOG.info("Recovering lease on dfs file " + p); + LOG.info("Recover lease on dfs file " + p); long startWaiting = EnvironmentEdgeManager.currentTime(); // Default is 15 minutes. It's huge, but the idea is that if we have a major issue, HDFS // usually needs 10 minutes before marking the nodes as dead. So we're putting ourselves @@ -259,7 +259,7 @@ public class FSHDFSUtils extends FSUtils { boolean recovered = false; try { recovered = dfs.recoverLease(p); - LOG.info("recoverLease=" + recovered + ", " + + LOG.info((recovered? "Recovered lease, ": "Failed to recover lease, ") + getLogMessageDetail(nbAttempt, p, startWaiting)); } catch (IOException e) { if (e instanceof LeaseExpiredException && e.getMessage().contains("File does not exist")) { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java index 9e8097e..ca2af24 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManager.java @@ -24,6 +24,8 @@ import static org.junit.Assert.assertTrue; import static org.junit.Assert.fail; import java.io.IOException; +import java.lang.reflect.InvocationTargetException; +import java.lang.reflect.Method; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -50,6 +52,7 @@ import org.apache.hadoop.hbase.ServerName; import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.ZooKeeperConnectionException; import org.apache.hadoop.hbase.client.ClusterConnection; +import org.apache.hadoop.hbase.client.HConnection; import org.apache.hadoop.hbase.client.HConnectionTestingUtility; import org.apache.hadoop.hbase.client.Result; import org.apache.hadoop.hbase.coordination.BaseCoordinatedStateManager; @@ -66,7 +69,9 @@ import org.apache.hadoop.hbase.master.TableLockManager.NullTableLockManager; import org.apache.hadoop.hbase.master.balancer.LoadBalancerFactory; import org.apache.hadoop.hbase.master.balancer.SimpleLoadBalancer; import org.apache.hadoop.hbase.master.handler.EnableTableHandler; -import org.apache.hadoop.hbase.master.handler.ServerShutdownHandler; +import org.apache.hadoop.hbase.master.procedure.MasterProcedureEnv; +import org.apache.hadoop.hbase.master.procedure.ServerCrashProcedure; +import org.apache.hadoop.hbase.procedure2.Procedure; import org.apache.hadoop.hbase.protobuf.ProtobufUtil; import org.apache.hadoop.hbase.protobuf.generated.ClientProtos; import org.apache.hadoop.hbase.protobuf.generated.ClientProtos.GetRequest; @@ -105,14 +110,20 @@ import com.google.protobuf.ServiceException; /** * Test {@link AssignmentManager} + * + * TODO: This test suite has rotted. It is too fragile. The smallest change throws it off. It is + * too brittle mocking up partial states in mockito trying to ensure we walk the right codepath + * to obtain expected result. Redo. */ @Category(MediumTests.class) public class TestAssignmentManager { private static final HBaseTestingUtility HTU = new HBaseTestingUtility(); - private static final ServerName SERVERNAME_A = - ServerName.valueOf("example.org", 1234, 5678); - private static final ServerName SERVERNAME_B = - ServerName.valueOf("example.org", 0, 5678); + // Let this be the server that is 'dead' in the tests below. + private static final ServerName SERVERNAME_DEAD = + ServerName.valueOf("dead.example.org", 1, 5678); + // This is the server that is 'live' in the tests below. + private static final ServerName SERVERNAME_LIVE = + ServerName.valueOf("live.example.org", 0, 5678); private static final HRegionInfo REGIONINFO = new HRegionInfo(TableName.valueOf("t"), HConstants.EMPTY_START_ROW, HConstants.EMPTY_START_ROW); @@ -177,12 +188,12 @@ public class TestAssignmentManager { // Mock a ServerManager. Say server SERVERNAME_{A,B} are online. Also // make it so if close or open, we return 'success'. this.serverManager = Mockito.mock(ServerManager.class); - Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(true); - Mockito.when(this.serverManager.isServerOnline(SERVERNAME_B)).thenReturn(true); + Mockito.when(this.serverManager.isServerOnline(SERVERNAME_DEAD)).thenReturn(true); + Mockito.when(this.serverManager.isServerOnline(SERVERNAME_LIVE)).thenReturn(true); Mockito.when(this.serverManager.getDeadServers()).thenReturn(new DeadServer()); final Map onlineServers = new HashMap(); - onlineServers.put(SERVERNAME_B, ServerLoad.EMPTY_SERVERLOAD); - onlineServers.put(SERVERNAME_A, ServerLoad.EMPTY_SERVERLOAD); + onlineServers.put(SERVERNAME_LIVE, ServerLoad.EMPTY_SERVERLOAD); + onlineServers.put(SERVERNAME_DEAD, ServerLoad.EMPTY_SERVERLOAD); Mockito.when(this.serverManager.getOnlineServersList()).thenReturn( new ArrayList(onlineServers.keySet())); Mockito.when(this.serverManager.getOnlineServers()).thenReturn(onlineServers); @@ -192,14 +203,14 @@ public class TestAssignmentManager { Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(avServers); Mockito.when(this.serverManager.createDestinationServersList(null)).thenReturn(avServers); - Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_A, REGIONINFO, -1)). + Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_DEAD, REGIONINFO, -1)). thenReturn(true); - Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_B, REGIONINFO, -1)). + Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_LIVE, REGIONINFO, -1)). thenReturn(true); // Ditto on open. - Mockito.when(this.serverManager.sendRegionOpen(SERVERNAME_A, REGIONINFO, -1, null)). + Mockito.when(this.serverManager.sendRegionOpen(SERVERNAME_DEAD, REGIONINFO, -1, null)). thenReturn(RegionOpeningState.OPENED); - Mockito.when(this.serverManager.sendRegionOpen(SERVERNAME_B, REGIONINFO, -1, null)). + Mockito.when(this.serverManager.sendRegionOpen(SERVERNAME_LIVE, REGIONINFO, -1, null)). thenReturn(RegionOpeningState.OPENED); this.master = Mockito.mock(HMaster.class); @@ -231,13 +242,13 @@ public class TestAssignmentManager { AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(this.server, this.serverManager); try { - createRegionPlanAndBalance(am, SERVERNAME_A, SERVERNAME_B, REGIONINFO); + createRegionPlanAndBalance(am, SERVERNAME_DEAD, SERVERNAME_LIVE, REGIONINFO); startFakeFailedOverMasterAssignmentManager(am, this.watcher); while (!am.processRITInvoked) Thread.sleep(1); // As part of the failover cleanup, the balancing region plan is removed. // So a random server will be used to open the region. For testing purpose, // let's assume it is going to open on server b: - am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B)); + am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_LIVE)); Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO); @@ -247,7 +258,7 @@ public class TestAssignmentManager { // region handler duplicated here because its down deep in a private // method hard to expose. int versionid = - ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1); + ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_DEAD, -1); assertNotSame(versionid, -1); Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName()); @@ -257,12 +268,12 @@ public class TestAssignmentManager { assertNotSame(-1, versionid); // This uglyness below is what the openregionhandler on RS side does. versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO, - SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE, + SERVERNAME_LIVE, EventType.M_ZK_REGION_OFFLINE, EventType.RS_ZK_REGION_OPENING, versionid); assertNotSame(-1, versionid); // Move znode from OPENING to OPENED as RS does on successful open. versionid = ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO, - SERVERNAME_B, versionid); + SERVERNAME_LIVE, versionid); assertNotSame(-1, versionid); am.gate.set(false); // Block here until our znode is cleared or until this test times out. @@ -280,13 +291,13 @@ public class TestAssignmentManager { AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(this.server, this.serverManager); try { - createRegionPlanAndBalance(am, SERVERNAME_A, SERVERNAME_B, REGIONINFO); + createRegionPlanAndBalance(am, SERVERNAME_DEAD, SERVERNAME_LIVE, REGIONINFO); startFakeFailedOverMasterAssignmentManager(am, this.watcher); while (!am.processRITInvoked) Thread.sleep(1); // As part of the failover cleanup, the balancing region plan is removed. // So a random server will be used to open the region. For testing purpose, // let's assume it is going to open on server b: - am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B)); + am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_LIVE)); Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO); @@ -296,7 +307,7 @@ public class TestAssignmentManager { // region handler duplicated here because its down deep in a private // method hard to expose. int versionid = - ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1); + ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_DEAD, -1); assertNotSame(versionid, -1); am.gate.set(false); Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName()); @@ -307,12 +318,12 @@ public class TestAssignmentManager { assertNotSame(-1, versionid); // This uglyness below is what the openregionhandler on RS side does. versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO, - SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE, + SERVERNAME_LIVE, EventType.M_ZK_REGION_OFFLINE, EventType.RS_ZK_REGION_OPENING, versionid); assertNotSame(-1, versionid); // Move znode from OPENING to OPENED as RS does on successful open. versionid = ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO, - SERVERNAME_B, versionid); + SERVERNAME_LIVE, versionid); assertNotSame(-1, versionid); // Block here until our znode is cleared or until this test timesout. @@ -330,13 +341,13 @@ public class TestAssignmentManager { AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(this.server, this.serverManager); try { - createRegionPlanAndBalance(am, SERVERNAME_A, SERVERNAME_B, REGIONINFO); + createRegionPlanAndBalance(am, SERVERNAME_DEAD, SERVERNAME_LIVE, REGIONINFO); startFakeFailedOverMasterAssignmentManager(am, this.watcher); while (!am.processRITInvoked) Thread.sleep(1); // As part of the failover cleanup, the balancing region plan is removed. // So a random server will be used to open the region. For testing purpose, // let's assume it is going to open on server b: - am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_B)); + am.addPlan(REGIONINFO.getEncodedName(), new RegionPlan(REGIONINFO, null, SERVERNAME_LIVE)); Mocking.waitForRegionFailedToCloseAndSetToPendingClose(am, REGIONINFO); @@ -346,7 +357,7 @@ public class TestAssignmentManager { // region handler duplicated here because its down deep in a private // method hard to expose. int versionid = - ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1); + ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_DEAD, -1); assertNotSame(versionid, -1); Mocking.waitForRegionPendingOpenInRIT(am, REGIONINFO.getEncodedName()); @@ -357,12 +368,12 @@ public class TestAssignmentManager { assertNotSame(-1, versionid); // This uglyness below is what the openregionhandler on RS side does. versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO, - SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE, + SERVERNAME_LIVE, EventType.M_ZK_REGION_OFFLINE, EventType.RS_ZK_REGION_OPENING, versionid); assertNotSame(-1, versionid); // Move znode from OPENING to OPENED as RS does on successful open. versionid = ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO, - SERVERNAME_B, versionid); + SERVERNAME_LIVE, versionid); assertNotSame(-1, versionid); // Block here until our znode is cleared or until this test timesout. ZKAssign.blockUntilNoRIT(watcher); @@ -410,9 +421,9 @@ public class TestAssignmentManager { this.watcher.registerListenerFirst(am); // Call the balance function but fake the region being online first at // SERVERNAME_A. Create a balance plan. - am.regionOnline(REGIONINFO, SERVERNAME_A); + am.regionOnline(REGIONINFO, SERVERNAME_DEAD); // Balance region from A to B. - RegionPlan plan = new RegionPlan(REGIONINFO, SERVERNAME_A, SERVERNAME_B); + RegionPlan plan = new RegionPlan(REGIONINFO, SERVERNAME_DEAD, SERVERNAME_LIVE); am.balance(plan); RegionStates regionStates = am.getRegionStates(); @@ -428,7 +439,7 @@ public class TestAssignmentManager { // region handler duplicated here because its down deep in a private // method hard to expose. int versionid = - ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_A, -1); + ZKAssign.transitionNodeClosed(this.watcher, REGIONINFO, SERVERNAME_DEAD, -1); assertNotSame(versionid, -1); // AM is going to notice above CLOSED and queue up a new assign. The // assign will go to open the region in the new location set by the @@ -442,12 +453,12 @@ public class TestAssignmentManager { assertNotSame(-1, versionid); // This uglyness below is what the openregionhandler on RS side does. versionid = ZKAssign.transitionNode(server.getZooKeeper(), REGIONINFO, - SERVERNAME_B, EventType.M_ZK_REGION_OFFLINE, + SERVERNAME_LIVE, EventType.M_ZK_REGION_OFFLINE, EventType.RS_ZK_REGION_OPENING, versionid); assertNotSame(-1, versionid); // Move znode from OPENING to OPENED as RS does on successful open. versionid = - ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO, SERVERNAME_B, versionid); + ZKAssign.transitionNodeOpened(this.watcher, REGIONINFO, SERVERNAME_LIVE, versionid); assertNotSame(-1, versionid); // Wait on the handler removing the OPENED znode. while(regionStates.isRegionInTransition(REGIONINFO)) Threads.sleep(1); @@ -463,10 +474,12 @@ public class TestAssignmentManager { * Run a simple server shutdown handler. * @throws KeeperException * @throws IOException + * @throws InterruptedException */ @Test (timeout=180000) public void testShutdownHandler() - throws KeeperException, IOException, CoordinatedStateException, ServiceException { + throws KeeperException, IOException, CoordinatedStateException, ServiceException, + InterruptedException { // Create and startup an executor. This is used by AssignmentManager // handling zk callbacks. ExecutorService executor = startupMasterExecutor("testShutdownHandler"); @@ -492,26 +505,56 @@ public class TestAssignmentManager { * @throws KeeperException * @throws IOException * @throws ServiceException + * @throws InterruptedException */ @Test (timeout=180000) - public void testSSHWhenDisableTableInProgress() throws KeeperException, IOException, - CoordinatedStateException, ServiceException { + public void testSSHWhenDisablingTableInProgress() throws KeeperException, IOException, + CoordinatedStateException, ServiceException, InterruptedException { testCaseWithPartiallyDisabledState(Table.State.DISABLING); - testCaseWithPartiallyDisabledState(Table.State.DISABLED); } + /** + * To test closed region handler to remove rit and delete corresponding znode + * if region in pending close or closing while processing shutdown of a region + * server.(HBASE-5927). + * + * @throws KeeperException + * @throws IOException + * @throws ServiceException + * @throws InterruptedException + */ + @Test (timeout=180000) + public void testSSHWhenDisabledTableInProgress() throws KeeperException, IOException, + CoordinatedStateException, ServiceException, InterruptedException { + testCaseWithPartiallyDisabledState(Table.State.DISABLED); + } /** - * To test if the split region is removed from RIT if the region was in SPLITTING state but the RS - * has actually completed the splitting in hbase:meta but went down. See HBASE-6070 and also HBASE-5806 + * To test if the split region is removed from RIT if the region was in SPLITTING state but the + * RS has actually completed the splitting in hbase:meta but went down. See HBASE-6070 and also + * HBASE-5806 * * @throws KeeperException * @throws IOException */ @Test (timeout=180000) - public void testSSHWhenSplitRegionInProgress() throws KeeperException, IOException, Exception { + public void testSSHWhenSplitRegionInProgressTrue() + throws KeeperException, IOException, Exception { // true indicates the region is split but still in RIT testCaseWithSplitRegionPartial(true); + } + + /** + * To test if the split region is removed from RIT if the region was in SPLITTING state but the + * RS has actually completed the splitting in hbase:meta but went down. See HBASE-6070 and also + * HBASE-5806 + * + * @throws KeeperException + * @throws IOException + */ + @Test (timeout=180000) + public void testSSHWhenSplitRegionInProgressFalse() + throws KeeperException, IOException, Exception { // false indicate the region is not split testCaseWithSplitRegionPartial(false); } @@ -529,14 +572,13 @@ public class TestAssignmentManager { AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager( this.server, this.serverManager); // adding region to regions and servers maps. - am.regionOnline(REGIONINFO, SERVERNAME_A); - // adding region in pending close. - am.getRegionStates().updateRegionState( - REGIONINFO, State.SPLITTING, SERVERNAME_A); - am.getTableStateManager().setTableState(REGIONINFO.getTable(), - Table.State.ENABLED); - RegionTransition data = RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_SPLITTING, - REGIONINFO.getRegionName(), SERVERNAME_A); + am.regionOnline(REGIONINFO, SERVERNAME_DEAD); + // Adding region in SPLITTING state. + am.getRegionStates().updateRegionState(REGIONINFO, State.SPLITTING, SERVERNAME_DEAD); + am.getTableStateManager().setTableState(REGIONINFO.getTable(), Table.State.ENABLED); + RegionTransition data = + RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_SPLITTING, + REGIONINFO.getRegionName(), SERVERNAME_DEAD); String node = ZKAssign.getNodeName(this.watcher, REGIONINFO.getEncodedName()); // create znode in M_ZK_REGION_CLOSING state. ZKUtil.createAndWatch(this.watcher, node, data.toByteArray()); @@ -566,7 +608,7 @@ public class TestAssignmentManager { } private void testCaseWithPartiallyDisabledState(Table.State state) throws KeeperException, - IOException, CoordinatedStateException, ServiceException { + IOException, CoordinatedStateException, ServiceException, InterruptedException { // Create and startup an executor. This is used by AssignmentManager // handling zk callbacks. ExecutorService executor = startupMasterExecutor("testSSHWhenDisableTableInProgress"); @@ -577,7 +619,7 @@ public class TestAssignmentManager { AssignmentManager am = new AssignmentManager(this.server, this.serverManager, balancer, executor, null, master.getTableLockManager()); // adding region to regions and servers maps. - am.regionOnline(REGIONINFO, SERVERNAME_A); + am.regionOnline(REGIONINFO, SERVERNAME_DEAD); // adding region in pending close. am.getRegionStates().updateRegionState(REGIONINFO, State.PENDING_CLOSE); if (state == Table.State.DISABLING) { @@ -588,7 +630,7 @@ public class TestAssignmentManager { Table.State.DISABLED); } RegionTransition data = RegionTransition.createRegionTransition(EventType.M_ZK_REGION_CLOSING, - REGIONINFO.getRegionName(), SERVERNAME_A); + REGIONINFO.getRegionName(), SERVERNAME_DEAD); // RegionTransitionData data = new // RegionTransitionData(EventType.M_ZK_REGION_CLOSING, // REGIONINFO.getRegionName(), SERVERNAME_A); @@ -618,7 +660,7 @@ public class TestAssignmentManager { } private void processServerShutdownHandler(AssignmentManager am, boolean splitRegion) - throws IOException, ServiceException { + throws IOException, ServiceException, InterruptedException { // Make sure our new AM gets callbacks; once registered, can't unregister. // Thats ok because we make a new zk watcher for each test. this.watcher.registerListenerFirst(am); @@ -627,13 +669,13 @@ public class TestAssignmentManager { // Make an RS Interface implementation. Make it so a scanner can go against it. ClientProtos.ClientService.BlockingInterface implementation = Mockito.mock(ClientProtos.ClientService.BlockingInterface.class); - // Get a meta row result that has region up on SERVERNAME_A + // Get a meta row result that has region up on SERVERNAME_DEAD Result r; if (splitRegion) { - r = MetaMockingUtil.getMetaTableRowResultAsSplitRegion(REGIONINFO, SERVERNAME_A); + r = MetaMockingUtil.getMetaTableRowResultAsSplitRegion(REGIONINFO, SERVERNAME_DEAD); } else { - r = MetaMockingUtil.getMetaTableRowResult(REGIONINFO, SERVERNAME_A); + r = MetaMockingUtil.getMetaTableRowResult(REGIONINFO, SERVERNAME_DEAD); } final ScanResponse.Builder builder = ScanResponse.newBuilder(); @@ -641,8 +683,7 @@ public class TestAssignmentManager { builder.addCellsPerResult(r.size()); final List cellScannables = new ArrayList(1); cellScannables.add(r); - Mockito.when(implementation.scan( - (RpcController)Mockito.any(), (ScanRequest)Mockito.any())). + Mockito.when(implementation.scan((RpcController)Mockito.any(), (ScanRequest)Mockito.any())). thenAnswer(new Answer() { @Override public ScanResponse answer(InvocationOnMock invocation) throws Throwable { @@ -658,7 +699,7 @@ public class TestAssignmentManager { // Get a connection w/ mocked up common methods. ClusterConnection connection = HConnectionTestingUtility.getMockedConnectionAndDecorate(HTU.getConfiguration(), - null, implementation, SERVERNAME_B, REGIONINFO); + null, implementation, SERVERNAME_LIVE, REGIONINFO); // These mocks were done up when all connections were managed. World is different now we // moved to unmanaged connections. It messes up the intercepts done in these tests. // Just mark connections as marked and then down in MetaTableAccessor, it will go the path @@ -670,26 +711,58 @@ public class TestAssignmentManager { // down in guts of server shutdown handler. Mockito.when(this.server.getConnection()).thenReturn(connection); - // Now make a server shutdown handler instance and invoke process. - // Have it that SERVERNAME_A died. + // Now make a server crash procedure instance and invoke it to process crashed SERVERNAME_A. + // Fake out system that SERVERNAME_A is down. DeadServer deadServers = new DeadServer(); - deadServers.add(SERVERNAME_A); - // I need a services instance that will return the AM + deadServers.add(SERVERNAME_DEAD); + Mockito.when(this.serverManager.getDeadServers()).thenReturn(deadServers); + final List liveServers = new ArrayList(1); + liveServers.add(SERVERNAME_LIVE); + Mockito.when(this.serverManager.createDestinationServersList()). + thenReturn(liveServers); + Mockito.when(this.serverManager.isServerOnline(SERVERNAME_DEAD)).thenReturn(false); + Mockito.when(this.serverManager.isServerReachable(SERVERNAME_DEAD)).thenReturn(false); + Mockito.when(this.serverManager.isServerOnline(SERVERNAME_LIVE)).thenReturn(true); + Mockito.when(this.serverManager.isServerReachable(SERVERNAME_LIVE)).thenReturn(true); + // Make it so we give right answers when log recovery get/set are called. MasterFileSystem fs = Mockito.mock(MasterFileSystem.class); Mockito.doNothing().when(fs).setLogRecoveryMode(); Mockito.when(fs.getLogRecoveryMode()).thenReturn(RecoveryMode.LOG_REPLAY); + // I need a services instance that will return the AM MasterServices services = Mockito.mock(MasterServices.class); Mockito.when(services.getAssignmentManager()).thenReturn(am); Mockito.when(services.getServerManager()).thenReturn(this.serverManager); Mockito.when(services.getZooKeeper()).thenReturn(this.watcher); Mockito.when(services.getMasterFileSystem()).thenReturn(fs); Mockito.when(services.getConnection()).thenReturn(connection); + MetaTableLocator mtl = Mockito.mock(MetaTableLocator.class); + Mockito.when(mtl.verifyMetaRegionLocation(Mockito.isA(HConnection.class), + Mockito.isA(ZooKeeperWatcher.class), Mockito.anyLong())). + thenReturn(true); + Mockito.when(mtl.isLocationAvailable(this.watcher)).thenReturn(true); + Mockito.when(services.getMetaTableLocator()).thenReturn(mtl); Configuration conf = server.getConfiguration(); Mockito.when(services.getConfiguration()).thenReturn(conf); - ServerShutdownHandler handler = new ServerShutdownHandler(this.server, - services, deadServers, SERVERNAME_A, false); + MasterProcedureEnv env = new MasterProcedureEnv(services); + ServerCrashProcedure procedure = new ServerCrashProcedure(SERVERNAME_DEAD, true, false); am.failoverCleanupDone.set(true); - handler.process(); + clearRITInBackground(am, REGIONINFO, SERVERNAME_LIVE); + Method protectedExecuteMethod = null; + try { + protectedExecuteMethod = + procedure.getClass().getSuperclass().getDeclaredMethod("execute", Object.class); + protectedExecuteMethod.setAccessible(true); + Procedure [] procedures = new Procedure [] {procedure}; + do { + // We know that ServerCrashProcedure does not return more than a single Procedure as + // result; it does not make children so the procedures[0] is safe. + procedures = (Procedure [])protectedExecuteMethod.invoke(procedures[0], env); + } while(procedures != null); + } catch (NoSuchMethodException | SecurityException | IllegalAccessException | + IllegalArgumentException | InvocationTargetException e) { + // TODO Auto-generated catch block + e.printStackTrace(); + } // The region in r will have been assigned. It'll be up in zk as unassigned. } finally { if (connection != null) connection.close(); @@ -697,6 +770,30 @@ public class TestAssignmentManager { } /** + * Start a background thread that will notice when a particular RIT arrives and that will then + * 'clear it' as though it had been successfully processed. + */ + private void clearRITInBackground(final AssignmentManager am, final HRegionInfo hri, + final ServerName sn) { + Thread t = new Thread() { + @Override + public void run() { + while (true) { + RegionState rs = am.getRegionStates().getRegionTransitionState(hri); + if (rs != null && rs.getServerName() != null) { + if (rs.getServerName().equals(sn)) { + am.regionOnline(REGIONINFO, sn); + break; + } + } + Threads.sleep(100); + } + } + }; + t.start(); + } + + /** * Create and startup executor pools. Start same set as master does (just * run a few less). * @param name Name to give our executor @@ -720,7 +817,7 @@ public class TestAssignmentManager { // First amend the servermanager mock so that when we do send close of the // first meta region on SERVERNAME_A, it will return true rather than // default null. - Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_A, hri, -1)).thenReturn(true); + Mockito.when(this.serverManager.sendRegionClose(SERVERNAME_DEAD, hri, -1)).thenReturn(true); // Need a mocked catalog tracker. LoadBalancer balancer = LoadBalancerFactory.getLoadBalancer(server .getConfiguration()); @@ -729,20 +826,20 @@ public class TestAssignmentManager { this.serverManager, balancer, null, null, master.getTableLockManager()); try { // First make sure my mock up basically works. Unassign a region. - unassign(am, SERVERNAME_A, hri); + unassign(am, SERVERNAME_DEAD, hri); // This delete will fail if the previous unassign did wrong thing. - ZKAssign.deleteClosingNode(this.watcher, hri, SERVERNAME_A); + ZKAssign.deleteClosingNode(this.watcher, hri, SERVERNAME_DEAD); // Now put a SPLITTING region in the way. I don't have to assert it // go put in place. This method puts it in place then asserts it still // owns it by moving state from SPLITTING to SPLITTING. - int version = createNodeSplitting(this.watcher, hri, SERVERNAME_A); + int version = createNodeSplitting(this.watcher, hri, SERVERNAME_DEAD); // Now, retry the unassign with the SPLTTING in place. It should just // complete without fail; a sort of 'silent' recognition that the // region to unassign has been split and no longer exists: TOOD: what if // the split fails and the parent region comes back to life? - unassign(am, SERVERNAME_A, hri); + unassign(am, SERVERNAME_DEAD, hri); // This transition should fail if the znode has been messed with. - ZKAssign.transitionNode(this.watcher, hri, SERVERNAME_A, + ZKAssign.transitionNode(this.watcher, hri, SERVERNAME_DEAD, EventType.RS_ZK_REGION_SPLITTING, EventType.RS_ZK_REGION_SPLITTING, version); assertFalse(am.getRegionStates().isRegionInTransition(hri)); } finally { @@ -806,23 +903,23 @@ public class TestAssignmentManager { if (balancer instanceof MockedLoadBalancer) { ((MockedLoadBalancer) balancer).setGateVariable(gate); } - ZKAssign.createNodeOffline(this.watcher, REGIONINFO, SERVERNAME_A); + ZKAssign.createNodeOffline(this.watcher, REGIONINFO, SERVERNAME_DEAD); int v = ZKAssign.getVersion(this.watcher, REGIONINFO); - ZKAssign.transitionNode(this.watcher, REGIONINFO, SERVERNAME_A, + ZKAssign.transitionNode(this.watcher, REGIONINFO, SERVERNAME_DEAD, EventType.M_ZK_REGION_OFFLINE, EventType.RS_ZK_REGION_FAILED_OPEN, v); String path = ZKAssign.getNodeName(this.watcher, REGIONINFO .getEncodedName()); am.getRegionStates().updateRegionState( - REGIONINFO, State.OPENING, SERVERNAME_A); + REGIONINFO, State.OPENING, SERVERNAME_DEAD); // a dummy plan inserted into the regionPlans. This plan is cleared and // new one is formed am.regionPlans.put(REGIONINFO.getEncodedName(), new RegionPlan( - REGIONINFO, null, SERVERNAME_A)); + REGIONINFO, null, SERVERNAME_DEAD)); RegionPlan regionPlan = am.regionPlans.get(REGIONINFO.getEncodedName()); List serverList = new ArrayList(2); - serverList.add(SERVERNAME_B); + serverList.add(SERVERNAME_LIVE); Mockito.when( - this.serverManager.createDestinationServersList(SERVERNAME_A)) + this.serverManager.createDestinationServersList(SERVERNAME_DEAD)) .thenReturn(serverList); am.nodeDataChanged(path); // here we are waiting until the random assignment in the load balancer is @@ -886,22 +983,24 @@ public class TestAssignmentManager { /** * Test the scenario when the master is in failover and trying to process a * region which is in Opening state on a dead RS. Master will force offline the - * region and put it in transition. AM relies on SSH to reassign it. + * region and put it in transition. AM relies on ServerCrashProcedure to reassign it. */ @Test(timeout = 60000) public void testRegionInOpeningStateOnDeadRSWhileMasterFailover() throws IOException, KeeperException, ServiceException, CoordinatedStateException, InterruptedException { AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager( this.server, this.serverManager); - ZKAssign.createNodeOffline(this.watcher, REGIONINFO, SERVERNAME_A); + ZKAssign.createNodeOffline(this.watcher, REGIONINFO, SERVERNAME_DEAD); int version = ZKAssign.getVersion(this.watcher, REGIONINFO); - ZKAssign.transitionNode(this.watcher, REGIONINFO, SERVERNAME_A, EventType.M_ZK_REGION_OFFLINE, + ZKAssign.transitionNode(this.watcher, REGIONINFO, SERVERNAME_DEAD, + EventType.M_ZK_REGION_OFFLINE, EventType.RS_ZK_REGION_OPENING, version); RegionTransition rt = RegionTransition.createRegionTransition(EventType.RS_ZK_REGION_OPENING, - REGIONINFO.getRegionName(), SERVERNAME_A, HConstants.EMPTY_BYTE_ARRAY); + REGIONINFO.getRegionName(), SERVERNAME_DEAD, HConstants.EMPTY_BYTE_ARRAY); version = ZKAssign.getVersion(this.watcher, REGIONINFO); - Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(false); - am.getRegionStates().logSplit(SERVERNAME_A); // Assume log splitting is done + // This isServerOnlin is weird. It is just so the below processRegionsInTransition will walk + // the wanted code path. + Mockito.when(this.serverManager.isServerOnline(SERVERNAME_DEAD)).thenReturn(false); am.getRegionStates().createRegionState(REGIONINFO); am.gate.set(false); @@ -922,8 +1021,8 @@ public class TestAssignmentManager { while (!am.gate.get()) { Thread.sleep(10); } - assertTrue("The region should be assigned immediately.", null != am.regionPlans.get(REGIONINFO - .getEncodedName())); + assertFalse("The region should be assigned immediately.", + am.getRegionStates().isRegionInTransition(REGIONINFO.getEncodedName())); am.shutdown(); } @@ -943,7 +1042,7 @@ public class TestAssignmentManager { Mockito.when(this.serverManager.getOnlineServers()).thenReturn( new HashMap(0)); List destServers = new ArrayList(1); - destServers.add(SERVERNAME_A); + destServers.add(SERVERNAME_DEAD); Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(destServers); // To avoid cast exception in DisableTableHandler process. HTU.getConfiguration().setInt(HConstants.MASTER_PORT, 0); @@ -995,12 +1094,13 @@ public class TestAssignmentManager { * @throws Exception */ @Test (timeout=180000) - public void testMasterRestartWhenTableInEnabling() throws KeeperException, IOException, Exception { + public void testMasterRestartWhenTableInEnabling() + throws KeeperException, IOException, Exception { enabling = true; List destServers = new ArrayList(1); - destServers.add(SERVERNAME_A); + destServers.add(SERVERNAME_DEAD); Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(destServers); - Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(true); + Mockito.when(this.serverManager.isServerOnline(SERVERNAME_DEAD)).thenReturn(true); HTU.getConfiguration().setInt(HConstants.MASTER_PORT, 0); CoordinatedStateManager csm = CoordinatedStateManagerFactory.getCoordinatedStateManager( HTU.getConfiguration()); @@ -1047,9 +1147,9 @@ public class TestAssignmentManager { public void testMasterRestartShouldRemoveStaleZnodesOfUnknownTableAsForMeta() throws Exception { List destServers = new ArrayList(1); - destServers.add(SERVERNAME_A); + destServers.add(SERVERNAME_DEAD); Mockito.when(this.serverManager.createDestinationServersList()).thenReturn(destServers); - Mockito.when(this.serverManager.isServerOnline(SERVERNAME_A)).thenReturn(true); + Mockito.when(this.serverManager.isServerOnline(SERVERNAME_DEAD)).thenReturn(true); HTU.getConfiguration().setInt(HConstants.MASTER_PORT, 0); CoordinatedStateManager csm = CoordinatedStateManagerFactory.getCoordinatedStateManager( HTU.getConfiguration()); @@ -1078,25 +1178,27 @@ public class TestAssignmentManager { } /** * When a region is in transition, if the region server opening the region goes down, - * the region assignment takes a long time normally (waiting for timeout monitor to trigger assign). - * This test is to make sure SSH reassigns it right away. + * the region assignment takes a long time normally (waiting for timeout monitor to trigger + * assign). This test is to make sure SSH reassigns it right away. + * @throws InterruptedException */ @Test (timeout=180000) public void testSSHTimesOutOpeningRegionTransition() - throws KeeperException, IOException, CoordinatedStateException, ServiceException { + throws KeeperException, IOException, CoordinatedStateException, ServiceException, + InterruptedException { // Create an AM. - AssignmentManagerWithExtrasForTesting am = + final AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(this.server, this.serverManager); - // adding region in pending open. + // First set up region as being online on SERVERNAME_B. + am.getRegionStates().regionOnline(REGIONINFO, SERVERNAME_LIVE); + // Now add region in pending open up in RIT RegionState state = new RegionState(REGIONINFO, - State.OPENING, System.currentTimeMillis(), SERVERNAME_A); - am.getRegionStates().regionOnline(REGIONINFO, SERVERNAME_B); + State.OPENING, System.currentTimeMillis(), SERVERNAME_DEAD); am.getRegionStates().regionsInTransition.put(REGIONINFO.getEncodedName(), state); - // adding region plan + // Add a region plan am.regionPlans.put(REGIONINFO.getEncodedName(), - new RegionPlan(REGIONINFO, SERVERNAME_B, SERVERNAME_A)); - am.getTableStateManager().setTableState(REGIONINFO.getTable(), - Table.State.ENABLED); + new RegionPlan(REGIONINFO, SERVERNAME_LIVE, SERVERNAME_DEAD)); + am.getTableStateManager().setTableState(REGIONINFO.getTable(), Table.State.ENABLED); try { am.assignInvoked = false; @@ -1113,8 +1215,8 @@ public class TestAssignmentManager { * Scenario:

      *
    • master starts a close, and creates a znode
    • *
    • it fails just at this moment, before contacting the RS
    • - *
    • while the second master is coming up, the targeted RS dies. But it's before ZK timeout so - * we don't know, and we have an exception.
    • + *
    • while the second master is coming up, the targeted RS dies. But it's before ZK timeout + * so we don't know, and we have an exception.
    • *
    • the master must handle this nicely and reassign. *
    */ @@ -1123,7 +1225,7 @@ public class TestAssignmentManager { AssignmentManagerWithExtrasForTesting am = setUpMockedAssignmentManager(this.server, this.serverManager); - ZKAssign.createNodeClosing(this.watcher, REGIONINFO, SERVERNAME_A); + ZKAssign.createNodeClosing(this.watcher, REGIONINFO, SERVERNAME_DEAD); try { am.getRegionStates().createRegionState(REGIONINFO); @@ -1207,7 +1309,7 @@ public class TestAssignmentManager { ClientProtos.ClientService.BlockingInterface ri = Mockito.mock(ClientProtos.ClientService.BlockingInterface.class); // Get a meta row result that has region up on SERVERNAME_A for REGIONINFO - Result r = MetaMockingUtil.getMetaTableRowResult(REGIONINFO, SERVERNAME_A); + Result r = MetaMockingUtil.getMetaTableRowResult(REGIONINFO, SERVERNAME_DEAD); final ScanResponse.Builder builder = ScanResponse.newBuilder(); builder.setMoreResults(true); builder.addCellsPerResult(r.size()); @@ -1240,7 +1342,7 @@ public class TestAssignmentManager { // Get a connection w/ mocked up common methods. ClusterConnection connection = (ClusterConnection)HConnectionTestingUtility. getMockedConnectionAndDecorate(HTU.getConfiguration(), null, - ri, SERVERNAME_B, REGIONINFO); + ri, SERVERNAME_LIVE, REGIONINFO); // These mocks were done up when all connections were managed. World is different now we // moved to unmanaged connections. It messes up the intercepts done in these tests. // Just mark connections as marked and then down in MetaTableAccessor, it will go the path @@ -1288,7 +1390,7 @@ public class TestAssignmentManager { public void assign(HRegionInfo region, boolean setOfflineInZK, boolean forceNewPlan) { if (enabling) { assignmentCount++; - this.regionOnline(region, SERVERNAME_A); + this.regionOnline(region, SERVERNAME_DEAD); } else { super.assign(region, setOfflineInZK, forceNewPlan); this.gate.set(true); @@ -1301,7 +1403,7 @@ public class TestAssignmentManager { if (enabling) { for (HRegionInfo region : regions) { assignmentCount++; - this.regionOnline(region, SERVERNAME_A); + this.regionOnline(region, SERVERNAME_DEAD); } return true; } @@ -1447,9 +1549,9 @@ public class TestAssignmentManager { this.watcher.registerListenerFirst(am); assertFalse("The region should not be in transition", am.getRegionStates().isRegionInTransition(hri)); - ZKAssign.createNodeOffline(this.watcher, hri, SERVERNAME_A); + ZKAssign.createNodeOffline(this.watcher, hri, SERVERNAME_DEAD); // Trigger a transition event - ZKAssign.transitionNodeOpening(this.watcher, hri, SERVERNAME_A); + ZKAssign.transitionNodeOpening(this.watcher, hri, SERVERNAME_DEAD); long startTime = EnvironmentEdgeManager.currentTime(); while (!zkEventProcessed.get()) { assertTrue("Timed out in waiting for ZK event to be processed", @@ -1475,7 +1577,7 @@ public class TestAssignmentManager { HRegionInfo hri = REGIONINFO; regionStates.createRegionState(hri); assertFalse(regionStates.isRegionInTransition(hri)); - RegionPlan plan = new RegionPlan(hri, SERVERNAME_A, SERVERNAME_B); + RegionPlan plan = new RegionPlan(hri, SERVERNAME_DEAD, SERVERNAME_LIVE); // Fake table is deleted regionStates.tableDeleted(hri.getTable()); am.balance(plan); @@ -1491,7 +1593,8 @@ public class TestAssignmentManager { @SuppressWarnings("unchecked") @Test (timeout=180000) public void testOpenCloseRegionRPCIntendedForPreviousServer() throws Exception { - Mockito.when(this.serverManager.sendRegionOpen(Mockito.eq(SERVERNAME_B), Mockito.eq(REGIONINFO), + Mockito.when(this.serverManager.sendRegionOpen(Mockito.eq(SERVERNAME_LIVE), + Mockito.eq(REGIONINFO), Mockito.anyInt(), (List)Mockito.any())) .thenThrow(new DoNotRetryIOException()); this.server.getConfiguration().setInt("hbase.assignment.maximum.attempts", 100); @@ -1505,12 +1608,12 @@ public class TestAssignmentManager { RegionStates regionStates = am.getRegionStates(); try { am.regionPlans.put(REGIONINFO.getEncodedName(), - new RegionPlan(REGIONINFO, null, SERVERNAME_B)); + new RegionPlan(REGIONINFO, null, SERVERNAME_LIVE)); // Should fail once, but succeed on the second attempt for the SERVERNAME_A am.assign(hri, true, false); } finally { - assertEquals(SERVERNAME_A, regionStates.getRegionState(REGIONINFO).getServerName()); + assertEquals(SERVERNAME_DEAD, regionStates.getRegionState(REGIONINFO).getServerName()); am.shutdown(); } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java index e892ce7..b682233 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestAssignmentManagerOnCluster.java @@ -218,7 +218,7 @@ public class TestAssignmentManagerOnCluster { TEST_UTIL.deleteTable(Bytes.toBytes(table)); } } - + // Simulate a scenario where the AssignCallable and SSH are trying to assign a region @Test (timeout=60000) public void testAssignRegionBySSH() throws Exception { @@ -248,15 +248,15 @@ public class TestAssignmentManagerOnCluster { TEST_UTIL.getHBaseCluster().killRegionServer(controlledServer); TEST_UTIL.getHBaseCluster().waitForRegionServerToStop(controlledServer, -1); AssignmentManager am = master.getAssignmentManager(); - + // Simulate the AssignCallable trying to assign the region. Have the region in OFFLINE state, - // but not in transition and the server is the dead 'controlledServer' + // but not in transition and the server is the dead 'controlledServer' regionStates.createRegionState(hri, State.OFFLINE, controlledServer, null); am.assign(hri, true, true); // Region should remain OFFLINE and go to transition assertEquals(State.OFFLINE, regionStates.getRegionState(hri).getState()); assertTrue (regionStates.isRegionInTransition(hri)); - + master.enableSSH(true); am.waitForAssignment(hri); assertTrue (regionStates.getRegionState(hri).isOpened()); @@ -336,7 +336,7 @@ public class TestAssignmentManagerOnCluster { TEST_UTIL.getMiniHBaseCluster().stopMaster(masterServerName); TEST_UTIL.getMiniHBaseCluster().startMaster(); // Wait till master is active and is initialized - while (TEST_UTIL.getMiniHBaseCluster().getMaster() == null || + while (TEST_UTIL.getMiniHBaseCluster().getMaster() == null || !TEST_UTIL.getMiniHBaseCluster().getMaster().isInitialized()) { Threads.sleep(1); } @@ -724,7 +724,7 @@ public class TestAssignmentManagerOnCluster { } am.getTableStateManager().setTableState(table, ZooKeeperProtos.Table.State.DISABLING); - List toAssignRegions = am.processServerShutdown(destServerName); + List toAssignRegions = am.cleanOutCrashedServerReferences(destServerName); assertTrue("Regions to be assigned should be empty.", toAssignRegions.isEmpty()); assertTrue("Regions to be assigned should be empty.", am.getRegionStates() .getRegionState(hri).isOffline()); @@ -847,7 +847,7 @@ public class TestAssignmentManagerOnCluster { List regions = new ArrayList(); regions.add(hri); am.assign(destServerName, regions); - + // let region open continue MyRegionObserver.postOpenEnabled.set(false); @@ -1324,8 +1324,8 @@ public class TestAssignmentManagerOnCluster { } @Override - public boolean isServerShutdownHandlerEnabled() { - return enabled.get() && super.isServerShutdownHandlerEnabled(); + public boolean isServerCrashProcessingEnabled() { + return enabled.get() && super.isServerCrashProcessingEnabled(); } public void enableSSH(boolean enabled) { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java index dea5c3a..053dc99 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestCatalogJanitor.java @@ -364,7 +364,7 @@ public class TestCatalogJanitor { } @Override - public boolean isServerShutdownHandlerEnabled() { + public boolean isServerCrashProcessingEnabled() { return true; } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDistributedLogSplitting.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDistributedLogSplitting.java index 38e2613..83dd123 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDistributedLogSplitting.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestDistributedLogSplitting.java @@ -1727,5 +1727,4 @@ public class TestDistributedLogSplitting { return hrs; } - -} +} \ No newline at end of file diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java index 33fe65a..edd7b2d 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestMasterFailover.java @@ -74,6 +74,7 @@ import org.apache.hadoop.hbase.zookeeper.ZKAssign; import org.apache.hadoop.hbase.zookeeper.ZKTableStateManager; import org.apache.hadoop.hbase.zookeeper.ZooKeeperWatcher; import org.apache.zookeeper.data.Stat; +import org.junit.Ignore; import org.junit.Test; import org.junit.experimental.categories.Category; @@ -761,8 +762,7 @@ public class TestMasterFailover { } Thread.sleep(100); } - LOG.debug("\n\nRegion of enabled table was OPENED on dead RS\n" + - region + "\n\n"); + LOG.debug("\n\nRegion of enabled table was OPENED on dead RS\n" + region + "\n\n"); // Region of disabled table was opened on dead RS region = disabledRegions.remove(0); @@ -778,8 +778,7 @@ public class TestMasterFailover { } Thread.sleep(100); } - LOG.debug("\n\nRegion of disabled table was OPENED on dead RS\n" + - region + "\n\n"); + LOG.debug("\n\nRegion of disabled table was OPENED on dead RS\n" + region + "\n\n"); /* * ZK = NONE diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestZKLessAMOnCluster.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestZKLessAMOnCluster.java index 3d13d54..0e49f1c 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestZKLessAMOnCluster.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/TestZKLessAMOnCluster.java @@ -39,4 +39,4 @@ public class TestZKLessAMOnCluster extends TestAssignmentManagerOnCluster { public static void tearDownAfterClass() throws Exception { TestAssignmentManagerOnCluster.tearDownAfterClass(); } -} +} \ No newline at end of file diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureTestingUtility.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureTestingUtility.java index 6be34b6..00f82f4 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureTestingUtility.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/MasterProcedureTestingUtility.java @@ -192,15 +192,26 @@ public class MasterProcedureTestingUtility { assertTrue(tsm.isTableState(tableName, ZooKeeperProtos.Table.State.DISABLED)); } + /** + * Run through all procedure flow states TWICE while also restarting procedure executor at each + * step; i.e force a reread of procedure store. + * + *

    It does + *

    1. Execute step N - kill the executor before store update + *
    2. Restart executor/store + *
    3. Execute step N - and then save to store + *
    + * + *

    This is a good test for finding state that needs persisting and steps that are not + * idempotent. Use this version of the test when a procedure executes all flow steps from start to + * finish. + * @see #testRecoveryAndDoubleExecution(ProcedureExecutor, long) + */ public static void testRecoveryAndDoubleExecution( final ProcedureExecutor procExec, final long procId, final int numSteps, final TState[] states) throws Exception { ProcedureTestingUtility.waitProcedure(procExec, procId); assertEquals(false, procExec.isRunning()); - // Restart the executor and execute the step twice - // execute step N - kill before store update - // restart executor/store - // execute step N - save on store for (int i = 0; i < numSteps; ++i) { LOG.info("Restart "+ i +" exec state: " + states[i]); ProcedureTestingUtility.assertProcNotYetCompleted(procExec, procId); @@ -211,6 +222,35 @@ public class MasterProcedureTestingUtility { ProcedureTestingUtility.assertProcNotFailed(procExec, procId); } + /** + * Run through all procedure flow states TWICE while also restarting procedure executor at each + * step; i.e force a reread of procedure store. + * + *

    It does + *

    1. Execute step N - kill the executor before store update + *
    2. Restart executor/store + *
    3. Execute step N - and then save to store + *
    + * + *

    This is a good test for finding state that needs persisting and steps that are not + * idempotent. Use this version of the test when the order in which flow steps are executed is + * not start to finish; where the procedure may vary the flow steps dependent on circumstance + * found. + * @see #testRecoveryAndDoubleExecution(ProcedureExecutor, long, int, Object[]) + */ + public static void testRecoveryAndDoubleExecution( + final ProcedureExecutor procExec, final long procId) + throws Exception { + ProcedureTestingUtility.waitProcedure(procExec, procId); + assertEquals(false, procExec.isRunning()); + while (!procExec.isFinished(procId)) { + ProcedureTestingUtility.restart(procExec); + ProcedureTestingUtility.waitProcedure(procExec, procId); + } + assertEquals(true, procExec.isRunning()); + ProcedureTestingUtility.assertProcNotFailed(procExec, procId); + } + public static void testRollbackAndDoubleExecution( final ProcedureExecutor procExec, final long procId, final int lastStep, final TState[] states) throws Exception { diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestMasterProcedureQueue.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestMasterProcedureQueue.java index 0d00ff2..349fa8e 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestMasterProcedureQueue.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestMasterProcedureQueue.java @@ -18,14 +18,18 @@ package org.apache.hadoop.hbase.master.procedure; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; + import java.io.IOException; import java.io.InputStream; import java.io.OutputStream; -import java.util.concurrent.atomic.AtomicInteger; -import java.util.concurrent.ConcurrentHashMap; import java.util.ArrayList; import java.util.HashSet; import java.util.Map; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.atomic.AtomicInteger; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -35,18 +39,11 @@ import org.apache.hadoop.hbase.TableName; import org.apache.hadoop.hbase.master.TableLockManager; import org.apache.hadoop.hbase.procedure2.Procedure; import org.apache.hadoop.hbase.testclassification.SmallTests; - import org.junit.After; -import org.junit.Assert; import org.junit.Before; import org.junit.Test; import org.junit.experimental.categories.Category; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertFalse; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; - @Category(SmallTests.class) public class TestMasterProcedureQueue { private static final Log LOG = LogFactory.getLog(TestMasterProcedureQueue.class); @@ -118,12 +115,12 @@ public class TestMasterProcedureQueue { // fetch item and take a lock assertEquals(1, queue.poll().longValue()); // take the xlock - assertTrue(queue.tryAcquireTableWrite(tableName, "write")); + assertTrue(queue.tryAcquireTableExclusiveLock(tableName, "write")); // table can't be deleted because we have the lock assertEquals(0, queue.size()); assertFalse(queue.markTableAsDeleted(tableName)); // release the xlock - queue.releaseTableWrite(tableName); + queue.releaseTableExclusiveLock(tableName); // complete the table deletion assertTrue(queue.markTableAsDeleted(tableName)); } @@ -149,7 +146,7 @@ public class TestMasterProcedureQueue { // fetch item and take a lock assertEquals(i, queue.poll().longValue()); // take the rlock - assertTrue(queue.tryAcquireTableRead(tableName, "read " + i)); + assertTrue(queue.tryAcquireTableSharedLock(tableName, "read " + i)); // table can't be deleted because we have locks and/or items in the queue assertFalse(queue.markTableAsDeleted(tableName)); } @@ -158,7 +155,7 @@ public class TestMasterProcedureQueue { // table can't be deleted because we have locks assertFalse(queue.markTableAsDeleted(tableName)); // release the rlock - queue.releaseTableRead(tableName); + queue.releaseTableSharedLock(tableName); } // there are no items and no lock in the queeu @@ -187,47 +184,47 @@ public class TestMasterProcedureQueue { // Fetch the 1st item and take the write lock Long procId = queue.poll(); assertEquals(1, procId.longValue()); - assertEquals(true, queue.tryAcquireTableWrite(tableName, "write " + procId)); + assertEquals(true, queue.tryAcquireTableExclusiveLock(tableName, "write " + procId)); // Fetch the 2nd item and verify that the lock can't be acquired assertEquals(null, queue.poll()); // Release the write lock and acquire the read lock - queue.releaseTableWrite(tableName); + queue.releaseTableExclusiveLock(tableName); // Fetch the 2nd item and take the read lock procId = queue.poll(); assertEquals(2, procId.longValue()); - assertEquals(true, queue.tryAcquireTableRead(tableName, "read " + procId)); + assertEquals(true, queue.tryAcquireTableSharedLock(tableName, "read " + procId)); // Fetch the 3rd item and verify that the lock can't be acquired procId = queue.poll(); assertEquals(3, procId.longValue()); - assertEquals(false, queue.tryAcquireTableWrite(tableName, "write " + procId)); + assertEquals(false, queue.tryAcquireTableExclusiveLock(tableName, "write " + procId)); // release the rdlock of item 2 and take the wrlock for the 3d item - queue.releaseTableRead(tableName); - assertEquals(true, queue.tryAcquireTableWrite(tableName, "write " + procId)); + queue.releaseTableSharedLock(tableName); + assertEquals(true, queue.tryAcquireTableExclusiveLock(tableName, "write " + procId)); // Fetch 4th item and verify that the lock can't be acquired assertEquals(null, queue.poll()); // Release the write lock and acquire the read lock - queue.releaseTableWrite(tableName); + queue.releaseTableExclusiveLock(tableName); // Fetch the 4th item and take the read lock procId = queue.poll(); assertEquals(4, procId.longValue()); - assertEquals(true, queue.tryAcquireTableRead(tableName, "read " + procId)); + assertEquals(true, queue.tryAcquireTableSharedLock(tableName, "read " + procId)); // Fetch the 4th item and take the read lock procId = queue.poll(); assertEquals(5, procId.longValue()); - assertEquals(true, queue.tryAcquireTableRead(tableName, "read " + procId)); + assertEquals(true, queue.tryAcquireTableSharedLock(tableName, "read " + procId)); // Release 4th and 5th read-lock - queue.releaseTableRead(tableName); - queue.releaseTableRead(tableName); + queue.releaseTableSharedLock(tableName); + queue.releaseTableSharedLock(tableName); // remove table queue assertEquals(0, queue.size()); @@ -353,11 +350,11 @@ public class TestMasterProcedureQueue { case CREATE: case DELETE: case EDIT: - avail = queue.tryAcquireTableWrite(proc.getTableName(), + avail = queue.tryAcquireTableExclusiveLock(proc.getTableName(), "op="+ proc.getTableOperationType()); break; case READ: - avail = queue.tryAcquireTableRead(proc.getTableName(), + avail = queue.tryAcquireTableSharedLock(proc.getTableName(), "op="+ proc.getTableOperationType()); break; } @@ -374,10 +371,10 @@ public class TestMasterProcedureQueue { case CREATE: case DELETE: case EDIT: - queue.releaseTableWrite(proc.getTableName()); + queue.releaseTableExclusiveLock(proc.getTableName()); break; case READ: - queue.releaseTableRead(proc.getTableName()); + queue.releaseTableSharedLock(proc.getTableName()); break; } } diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestServerCrashProcedure.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestServerCrashProcedure.java new file mode 100644 index 0000000..97512ce --- /dev/null +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/master/procedure/TestServerCrashProcedure.java @@ -0,0 +1,131 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hbase.master.procedure; + +import static org.junit.Assert.assertEquals; + +import java.io.IOException; +import java.util.Arrays; +import java.util.Collection; + +import org.apache.hadoop.hbase.HBaseTestingUtility; +import org.apache.hadoop.hbase.TableName; +import org.apache.hadoop.hbase.client.ResultScanner; +import org.apache.hadoop.hbase.client.Scan; +import org.apache.hadoop.hbase.client.Table; +import org.apache.hadoop.hbase.master.HMaster; +import org.apache.hadoop.hbase.procedure2.ProcedureExecutor; +import org.apache.hadoop.hbase.procedure2.ProcedureTestingUtility; +import org.apache.hadoop.hbase.regionserver.HRegionServer; +import org.apache.hadoop.hbase.testclassification.LargeTests; +import org.apache.hadoop.hbase.util.Threads; +import org.junit.After; +import org.junit.Before; +import org.junit.Test; +import org.junit.experimental.categories.Category; +import org.junit.runner.RunWith; +import org.junit.runners.Parameterized; +import org.junit.runners.Parameterized.Parameters; + +/** + * Runs first with DLS and then with DLR. + */ +@Category(LargeTests.class) +@RunWith(Parameterized.class) +public class TestServerCrashProcedure { + // Ugly junit parameterization. I just want to pass false and then true but seems like needs + // to return sequences of two-element arrays. + @Parameters(name = "{index}: setting={0}") + public static Collection data() { + return Arrays.asList(new Object[] [] {{Boolean.FALSE, -1}, {Boolean.TRUE, -1}}); + } + + private final HBaseTestingUtility util = new HBaseTestingUtility(); + + @Before + public void setup() throws Exception { + this.util.startMiniCluster(3); + ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate( + this.util.getHBaseCluster().getMaster().getMasterProcedureExecutor(), false); + } + + @After + public void tearDown() throws Exception { + ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate( + this.util.getHBaseCluster().getMaster().getMasterProcedureExecutor(), false); + this.util.shutdownMiniCluster(); + } + + public TestServerCrashProcedure(final Boolean b, final int ignore) { + this.util.getConfiguration().setBoolean("hbase.master.distributed.log.replay", b); + this.util.getConfiguration().setInt(MasterProcedureConstants.MASTER_PROCEDURE_THREADS, 1); + } + + /** + * Run server crash procedure steps twice to test idempotency and that we are persisting all + * needed state. + * @throws Exception + */ + @Test(timeout = 300000) + public void testRecoveryAndDoubleExecutionOnline() throws Exception { + final TableName tableName = TableName.valueOf("testRecoveryAndDoubleExecutionOnline"); + this.util.createTable(tableName, HBaseTestingUtility.COLUMNS, + HBaseTestingUtility.KEYS_FOR_HBA_CREATE_TABLE); + try (Table t = this.util.getConnection().getTable(tableName)) { + // Load the table with a bit of data so some logs to split and some edits in each region. + this.util.loadTable(t, HBaseTestingUtility.COLUMNS[0]); + int count = countRows(t); + // Run the procedure executor outside the master so we can mess with it. Need to disable + // Master's running of the server crash processing. + HMaster master = this.util.getHBaseCluster().getMaster(); + final ProcedureExecutor procExec = master.getMasterProcedureExecutor(); + master.setServerCrashProcessingEnabled(false); + // Kill a server. Master will notice but do nothing other than add it to list of dead servers. + HRegionServer hrs = this.util.getHBaseCluster().getRegionServer(0); + boolean carryingMeta = master.getAssignmentManager().isCarryingMeta(hrs.getServerName()); + this.util.getHBaseCluster().killRegionServer(hrs.getServerName()); + hrs.join(); + // Wait until the expiration of the server has arrived at the master. We won't process it + // by queuing a ServerCrashProcedure because we have disabled crash processing... but wait + // here so ServerManager gets notice and adds expired server to appropriate queues. + while (!master.getServerManager().isServerDead(hrs.getServerName())) Threads.sleep(10); + // Now, reenable processing else we can't get a lock on the ServerCrashProcedure. + master.setServerCrashProcessingEnabled(true); + // Do some of the master processing of dead servers so when SCP runs, it has expected 'state'. + master.getServerManager().moveFromOnelineToDeadServers(hrs.getServerName()); + // Enable test flags and then queue the crash procedure. + ProcedureTestingUtility.waitNoProcedureRunning(procExec); + ProcedureTestingUtility.setKillAndToggleBeforeStoreUpdate(procExec, true); + long procId = + procExec.submitProcedure(new ServerCrashProcedure(hrs.getServerName(), true, carryingMeta)); + // Now run through the procedure twice crashing the executor on each step... + MasterProcedureTestingUtility.testRecoveryAndDoubleExecution(procExec, procId); + // Assert all data came back. + assertEquals(count, countRows(t)); + } + } + + int countRows(final Table t) throws IOException { + int count = 0; + try (ResultScanner scanner = t.getScanner(new Scan())) { + while(scanner.next() != null) count++; + } + return count; + } +} \ No newline at end of file diff --git a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestSnapshotClientRetries.java b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestSnapshotClientRetries.java index 5168b85..8ebeb97 100644 --- a/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestSnapshotClientRetries.java +++ b/hbase-server/src/test/java/org/apache/hadoop/hbase/snapshot/TestSnapshotClientRetries.java @@ -18,6 +18,8 @@ */ package org.apache.hadoop.hbase.snapshot; +import static org.junit.Assert.assertEquals; + import java.io.IOException; import java.util.concurrent.atomic.AtomicInteger; @@ -31,21 +33,14 @@ import org.apache.hadoop.hbase.coprocessor.CoprocessorHost; import org.apache.hadoop.hbase.coprocessor.MasterCoprocessorEnvironment; import org.apache.hadoop.hbase.coprocessor.ObserverContext; import org.apache.hadoop.hbase.protobuf.generated.HBaseProtos.SnapshotDescription; -import org.apache.hadoop.hbase.snapshot.SnapshotExistsException; -import org.apache.hadoop.hbase.snapshot.SnapshotDoesNotExistException; import org.apache.hadoop.hbase.testclassification.MediumTests; import org.apache.hadoop.hbase.util.TestTableName; - import org.junit.After; import org.junit.Before; import org.junit.Rule; import org.junit.Test; import org.junit.experimental.categories.Category; -import static org.junit.Assert.assertEquals; -import static org.junit.Assert.assertTrue; -import static org.junit.Assert.fail; - @Category({ MediumTests.class }) public class TestSnapshotClientRetries { private static final HBaseTestingUtility TEST_UTIL = new HBaseTestingUtility();