Index: system_test/system_test_env.py =================================================================== --- system_test/system_test_env.py (revision 1396332) +++ system_test/system_test_env.py (working copy) @@ -20,10 +20,13 @@ # system_test_env.py # =================================== +import copy import json import os import sys +from utils import system_test_utils + class SystemTestEnv(): # private: @@ -41,23 +44,84 @@ CLUSTER_CONFIG_PATHNAME = os.path.abspath(SYSTEM_TEST_BASE_DIR + "/" + CLUSTER_CONFIG_FILENAME) METRICS_FILENAME = "metrics.json" METRICS_PATHNAME = os.path.abspath(SYSTEM_TEST_BASE_DIR + "/" + METRICS_FILENAME) + TESTCASE_TO_RUN_FILENAME = "testcase_to_run.json" + TESTCASE_TO_RUN_PATHNAME = os.path.abspath(SYSTEM_TEST_BASE_DIR + "/" + TESTCASE_TO_RUN_FILENAME) + TESTCASE_TO_SKIP_FILENAME = "testcase_to_skip.json" + TESTCASE_TO_SKIP_PATHNAME = os.path.abspath(SYSTEM_TEST_BASE_DIR + "/" + TESTCASE_TO_SKIP_FILENAME) - clusterEntityConfigDictList = [] + clusterEntityConfigDictList = [] # cluster entity config for current level + clusterEntityConfigDictListInSystemTestLevel = [] # cluster entity config defined in system level + clusterEntityConfigDictListLastFoundInTestSuite = [] # cluster entity config last found in testsuite level + clusterEntityConfigDictListLastFoundInTestCase = [] # cluster entity config last found in testcase level + systemTestResultsList = [] + testCaseToRunListDict = {} + testCaseToSkipListDict = {} + printTestDescriptionsOnly = False + doNotValidateRemoteHost = False + def __init__(self): "Create an object with this system test session environment" - # retrieve each entity's data from cluster config json file - # as "dict" and enter them into a "list" - jsonFileContent = open(self.CLUSTER_CONFIG_PATHNAME, "r").read() - jsonData = json.loads(jsonFileContent) - for key, cfgList in jsonData.items(): - if key == "cluster_config": - for cfg in cfgList: - self.clusterEntityConfigDictList.append(cfg) + # load the system level cluster config + system_test_utils.load_cluster_config(self.CLUSTER_CONFIG_PATHNAME, self.clusterEntityConfigDictList) + # save the system level cluster config + self.clusterEntityConfigDictListInSystemTestLevel = copy.deepcopy(self.clusterEntityConfigDictList) + # retrieve testcases to run from testcase_to_run.json + try: + testcaseToRunFileContent = open(self.TESTCASE_TO_RUN_PATHNAME, "r").read() + testcaseToRunData = json.loads(testcaseToRunFileContent) + for testClassName, caseList in testcaseToRunData.items(): + self.testCaseToRunListDict[testClassName] = caseList + except: + pass + + # retrieve testcases to skip from testcase_to_skip.json + try: + testcaseToSkipFileContent = open(self.TESTCASE_TO_SKIP_PATHNAME, "r").read() + testcaseToSkipData = json.loads(testcaseToSkipFileContent) + for testClassName, caseList in testcaseToSkipData.items(): + self.testCaseToSkipListDict[testClassName] = caseList + except: + pass + + def isTestCaseToSkip(self, testClassName, testcaseDirName): + testCaseToRunList = {} + testCaseToSkipList = {} + + try: + testCaseToRunList = self.testCaseToRunListDict[testClassName] + except: + # no 'testClassName' found => no need to run any cases for this test class + return True + + try: + testCaseToSkipList = self.testCaseToSkipListDict[testClassName] + except: + pass + + # if testCaseToRunList has elements, it takes precedence: + if len(testCaseToRunList) > 0: + #print "#### testClassName => ", testClassName + #print "#### testCaseToRunList => ", testCaseToRunList + #print "#### testcaseDirName => ", testcaseDirName + if not testcaseDirName in testCaseToRunList: + #self.log_message("Skipping : " + testcaseDirName) + return True + elif len(testCaseToSkipList) > 0: + #print "#### testClassName => ", testClassName + #print "#### testCaseToSkipList => ", testCaseToSkipList + #print "#### testcaseDirName => ", testcaseDirName + if testcaseDirName in testCaseToSkipList: + #self.log_message("Skipping : " + testcaseDirName) + return True + + return False + + def getSystemTestEnvDict(self): envDict = {} envDict["system_test_base_dir"] = self.SYSTEM_TEST_BASE_DIR Index: system_test/replication_testsuite/testcase_0113/testcase_0113_properties.json =================================================================== --- system_test/replication_testsuite/testcase_0113/testcase_0113_properties.json (revision 0) +++ system_test/replication_testsuite/testcase_0113/testcase_0113_properties.json (revision 0) @@ -0,0 +1,79 @@ +{ + "description": {"01":"Multi Leader Failures in Replication : 1. acks => 1", + "02":"Produce and consume messages to a single topic - three partition.", + "03":"This test sends messages to 3 replicas", + "04":"To trigger leader election: find the leader and terminate by controlled failure (kill -15)", + "05":"Restart the terminated broker", + "06":"Lookup brokers' log4j messages and verify that leader is re-elected successfully", + "07":"At the end it verifies the log size and contents", + "08":"Use a consumer to verify no message loss.", + "09":"Producer dimensions : mode:sync, acks:1, comp:0", + "10":"Log segment size : 102400" + }, + "testcase_args": { + "bounce_leader": "true", + "replica_factor": "3", + "num_partition": "3", + "num_iteration": "3", + "sleep_seconds_between_producer_calls": "1", + "message_producing_free_time_sec": "15", + "num_messages_to_produce_per_producer_call": "50" + }, + "entities": [ + { + "entity_id": "0", + "clientPort": "2188", + "dataDir": "/tmp/zookeeper_0", + "log_filename": "zookeeper_2188.log", + "config_filename": "zookeeper_2188.properties" + }, + { + "entity_id": "1", + "port": "9091", + "brokerid": "1", + "log.file.size": "102400", + "log.dir": "/tmp/kafka_server_1_logs", + "log_filename": "kafka_server_9091.log", + "config_filename": "kafka_server_9091.properties" + }, + { + "entity_id": "2", + "port": "9092", + "brokerid": "2", + "log.file.size": "102400", + "log.dir": "/tmp/kafka_server_2_logs", + "log_filename": "kafka_server_9092.log", + "config_filename": "kafka_server_9092.properties" + }, + { + "entity_id": "3", + "port": "9093", + "brokerid": "3", + "log.file.size": "102400", + "log.dir": "/tmp/kafka_server_3_logs", + "log_filename": "kafka_server_9093.log", + "config_filename": "kafka_server_9093.properties" + }, + { + "entity_id": "4", + "topic": "test_1", + "threads": "5", + "compression-codec": "0", + "message-size": "500", + "message": "100", + "request-num-acks": "1", + "async":"false", + "log_filename": "producer_performance.log", + "config_filename": "producer_performance.properties" + }, + { + "entity_id": "5", + "topic": "test_1", + "groupid": "mytestgroup", + "consumer-timeout-ms": "10000", + "zookeeper": "localhost:2188", + "log_filename": "console_consumer.log", + "config_filename": "console_consumer.properties" + } + ] +} Index: system_test/replication_testsuite/testcase_0114/testcase_0114_properties.json =================================================================== --- system_test/replication_testsuite/testcase_0114/testcase_0114_properties.json (revision 0) +++ system_test/replication_testsuite/testcase_0114/testcase_0114_properties.json (revision 0) @@ -0,0 +1,79 @@ +{ + "description": {"01":"Multi Leader Failures in Replication : 1. mode => async; 2. acks => 1", + "02":"Produce and consume messages to a single topic - three partition.", + "03":"This test sends messages to 3 replicas", + "04":"To trigger leader election: find the leader and terminate by controlled failure (kill -15)", + "05":"Restart the terminated broker", + "06":"Lookup brokers' log4j messages and verify that leader is re-elected successfully", + "07":"At the end it verifies the log size and contents", + "08":"Use a consumer to verify no message loss.", + "09":"Producer dimensions : mode:async, acks:1, comp:0", + "10":"Log segment size : 102400" + }, + "testcase_args": { + "bounce_leader": "true", + "replica_factor": "3", + "num_partition": "3", + "num_iteration": "3", + "sleep_seconds_between_producer_calls": "1", + "message_producing_free_time_sec": "15", + "num_messages_to_produce_per_producer_call": "50" + }, + "entities": [ + { + "entity_id": "0", + "clientPort": "2188", + "dataDir": "/tmp/zookeeper_0", + "log_filename": "zookeeper_2188.log", + "config_filename": "zookeeper_2188.properties" + }, + { + "entity_id": "1", + "port": "9091", + "brokerid": "1", + "log.file.size": "102400", + "log.dir": "/tmp/kafka_server_1_logs", + "log_filename": "kafka_server_9091.log", + "config_filename": "kafka_server_9091.properties" + }, + { + "entity_id": "2", + "port": "9092", + "brokerid": "2", + "log.file.size": "102400", + "log.dir": "/tmp/kafka_server_2_logs", + "log_filename": "kafka_server_9092.log", + "config_filename": "kafka_server_9092.properties" + }, + { + "entity_id": "3", + "port": "9093", + "brokerid": "3", + "log.file.size": "102400", + "log.dir": "/tmp/kafka_server_3_logs", + "log_filename": "kafka_server_9093.log", + "config_filename": "kafka_server_9093.properties" + }, + { + "entity_id": "4", + "topic": "test_1", + "threads": "5", + "compression-codec": "0", + "message-size": "500", + "message": "100", + "request-num-acks": "1", + "async":"true", + "log_filename": "producer_performance.log", + "config_filename": "producer_performance.properties" + }, + { + "entity_id": "5", + "topic": "test_1", + "groupid": "mytestgroup", + "consumer-timeout-ms": "10000", + "zookeeper": "localhost:2188", + "log_filename": "console_consumer.log", + "config_filename": "console_consumer.properties" + } + ] +} Index: system_test/replication_testsuite/testcase_0115/testcase_0115_properties.json =================================================================== --- system_test/replication_testsuite/testcase_0115/testcase_0115_properties.json (revision 0) +++ system_test/replication_testsuite/testcase_0115/testcase_0115_properties.json (revision 0) @@ -0,0 +1,79 @@ +{ + "description": {"01":"Multi Leader Failures in Replication : 1. comp => 1", + "02":"Produce and consume messages to a single topic - three partition.", + "03":"This test sends messages to 3 replicas", + "04":"To trigger leader election: find the leader and terminate by controlled failure (kill -15)", + "05":"Restart the terminated broker", + "06":"Lookup brokers' log4j messages and verify that leader is re-elected successfully", + "07":"At the end it verifies the log size and contents", + "08":"Use a consumer to verify no message loss.", + "09":"Producer dimensions : mode:sync, acks:-1, comp:1", + "10":"Log segment size : 102400" + }, + "testcase_args": { + "bounce_leader": "true", + "replica_factor": "3", + "num_partition": "3", + "num_iteration": "3", + "sleep_seconds_between_producer_calls": "1", + "message_producing_free_time_sec": "15", + "num_messages_to_produce_per_producer_call": "50" + }, + "entities": [ + { + "entity_id": "0", + "clientPort": "2188", + "dataDir": "/tmp/zookeeper_0", + "log_filename": "zookeeper_2188.log", + "config_filename": "zookeeper_2188.properties" + }, + { + "entity_id": "1", + "port": "9091", + "brokerid": "1", + "log.file.size": "102400", + "log.dir": "/tmp/kafka_server_1_logs", + "log_filename": "kafka_server_9091.log", + "config_filename": "kafka_server_9091.properties" + }, + { + "entity_id": "2", + "port": "9092", + "brokerid": "2", + "log.file.size": "102400", + "log.dir": "/tmp/kafka_server_2_logs", + "log_filename": "kafka_server_9092.log", + "config_filename": "kafka_server_9092.properties" + }, + { + "entity_id": "3", + "port": "9093", + "brokerid": "3", + "log.file.size": "102400", + "log.dir": "/tmp/kafka_server_3_logs", + "log_filename": "kafka_server_9093.log", + "config_filename": "kafka_server_9093.properties" + }, + { + "entity_id": "4", + "topic": "test_1", + "threads": "5", + "compression-codec": "1", + "message-size": "500", + "message": "100", + "request-num-acks": "-1", + "async":"false", + "log_filename": "producer_performance.log", + "config_filename": "producer_performance.properties" + }, + { + "entity_id": "5", + "topic": "test_1", + "groupid": "mytestgroup", + "consumer-timeout-ms": "10000", + "zookeeper": "localhost:2188", + "log_filename": "console_consumer.log", + "config_filename": "console_consumer.properties" + } + ] +} Index: system_test/replication_testsuite/testcase_0116/testcase_0116_properties.json =================================================================== --- system_test/replication_testsuite/testcase_0116/testcase_0116_properties.json (revision 0) +++ system_test/replication_testsuite/testcase_0116/testcase_0116_properties.json (revision 0) @@ -0,0 +1,79 @@ +{ + "description": {"01":"Multi Leader Failures in Replication : 1. mode => async; 2. comp => 1", + "02":"Produce and consume messages to a single topic - three partition.", + "03":"This test sends messages to 3 replicas", + "04":"To trigger leader election: find the leader and terminate by controlled failure (kill -15)", + "05":"Restart the terminated broker", + "06":"Lookup brokers' log4j messages and verify that leader is re-elected successfully", + "07":"At the end it verifies the log size and contents", + "08":"Use a consumer to verify no message loss.", + "09":"Producer dimensions : mode:async, acks:-1, comp:1", + "10":"Log segment size : 102400" + }, + "testcase_args": { + "bounce_leader": "true", + "replica_factor": "3", + "num_partition": "3", + "num_iteration": "3", + "sleep_seconds_between_producer_calls": "1", + "message_producing_free_time_sec": "15", + "num_messages_to_produce_per_producer_call": "50" + }, + "entities": [ + { + "entity_id": "0", + "clientPort": "2188", + "dataDir": "/tmp/zookeeper_0", + "log_filename": "zookeeper_2188.log", + "config_filename": "zookeeper_2188.properties" + }, + { + "entity_id": "1", + "port": "9091", + "brokerid": "1", + "log.file.size": "102400", + "log.dir": "/tmp/kafka_server_1_logs", + "log_filename": "kafka_server_9091.log", + "config_filename": "kafka_server_9091.properties" + }, + { + "entity_id": "2", + "port": "9092", + "brokerid": "2", + "log.file.size": "102400", + "log.dir": "/tmp/kafka_server_2_logs", + "log_filename": "kafka_server_9092.log", + "config_filename": "kafka_server_9092.properties" + }, + { + "entity_id": "3", + "port": "9093", + "brokerid": "3", + "log.file.size": "102400", + "log.dir": "/tmp/kafka_server_3_logs", + "log_filename": "kafka_server_9093.log", + "config_filename": "kafka_server_9093.properties" + }, + { + "entity_id": "4", + "topic": "test_1", + "threads": "5", + "compression-codec": "1", + "message-size": "500", + "message": "100", + "request-num-acks": "-1", + "async":"true", + "log_filename": "producer_performance.log", + "config_filename": "producer_performance.properties" + }, + { + "entity_id": "5", + "topic": "test_1", + "groupid": "mytestgroup", + "consumer-timeout-ms": "10000", + "zookeeper": "localhost:2188", + "log_filename": "console_consumer.log", + "config_filename": "console_consumer.properties" + } + ] +} Index: system_test/replication_testsuite/testcase_0117/testcase_0117_properties.json =================================================================== --- system_test/replication_testsuite/testcase_0117/testcase_0117_properties.json (revision 0) +++ system_test/replication_testsuite/testcase_0117/testcase_0117_properties.json (revision 0) @@ -0,0 +1,79 @@ +{ + "description": {"01":"Multi Leader Failures in Replication : 1. acks => 1; 2. comp => 1", + "02":"Produce and consume messages to a single topic - three partition.", + "03":"This test sends messages to 3 replicas", + "04":"To trigger leader election: find the leader and terminate by controlled failure (kill -15)", + "05":"Restart the terminated broker", + "06":"Lookup brokers' log4j messages and verify that leader is re-elected successfully", + "07":"At the end it verifies the log size and contents", + "08":"Use a consumer to verify no message loss.", + "09":"Producer dimensions : mode:sync, acks:1, comp:1", + "10":"Log segment size : 102400" + }, + "testcase_args": { + "bounce_leader": "true", + "replica_factor": "3", + "num_partition": "3", + "num_iteration": "3", + "sleep_seconds_between_producer_calls": "1", + "message_producing_free_time_sec": "15", + "num_messages_to_produce_per_producer_call": "50" + }, + "entities": [ + { + "entity_id": "0", + "clientPort": "2188", + "dataDir": "/tmp/zookeeper_0", + "log_filename": "zookeeper_2188.log", + "config_filename": "zookeeper_2188.properties" + }, + { + "entity_id": "1", + "port": "9091", + "brokerid": "1", + "log.file.size": "102400", + "log.dir": "/tmp/kafka_server_1_logs", + "log_filename": "kafka_server_9091.log", + "config_filename": "kafka_server_9091.properties" + }, + { + "entity_id": "2", + "port": "9092", + "brokerid": "2", + "log.file.size": "102400", + "log.dir": "/tmp/kafka_server_2_logs", + "log_filename": "kafka_server_9092.log", + "config_filename": "kafka_server_9092.properties" + }, + { + "entity_id": "3", + "port": "9093", + "brokerid": "3", + "log.file.size": "102400", + "log.dir": "/tmp/kafka_server_3_logs", + "log_filename": "kafka_server_9093.log", + "config_filename": "kafka_server_9093.properties" + }, + { + "entity_id": "4", + "topic": "test_1", + "threads": "5", + "compression-codec": "1", + "message-size": "500", + "message": "100", + "request-num-acks": "1", + "async":"false", + "log_filename": "producer_performance.log", + "config_filename": "producer_performance.properties" + }, + { + "entity_id": "5", + "topic": "test_1", + "groupid": "mytestgroup", + "consumer-timeout-ms": "10000", + "zookeeper": "localhost:2188", + "log_filename": "console_consumer.log", + "config_filename": "console_consumer.properties" + } + ] +} Index: system_test/replication_testsuite/testcase_0118/testcase_0118_properties.json =================================================================== --- system_test/replication_testsuite/testcase_0118/testcase_0118_properties.json (revision 0) +++ system_test/replication_testsuite/testcase_0118/testcase_0118_properties.json (revision 0) @@ -0,0 +1,79 @@ +{ + "description": {"01":"Multi Leader Failures in Replication : 1. mode => async; 2. acks => 1; 3. comp => 1", + "02":"Produce and consume messages to a single topic - three partition.", + "03":"This test sends messages to 3 replicas", + "04":"To trigger leader election: find the leader and terminate by controlled failure (kill -15)", + "05":"Restart the terminated broker", + "06":"Lookup brokers' log4j messages and verify that leader is re-elected successfully", + "07":"At the end it verifies the log size and contents", + "08":"Use a consumer to verify no message loss.", + "09":"Producer dimensions : mode:async, acks:1, comp:1", + "10":"Log segment size : 102400" + }, + "testcase_args": { + "bounce_leader": "true", + "replica_factor": "3", + "num_partition": "3", + "num_iteration": "3", + "sleep_seconds_between_producer_calls": "1", + "message_producing_free_time_sec": "15", + "num_messages_to_produce_per_producer_call": "50" + }, + "entities": [ + { + "entity_id": "0", + "clientPort": "2188", + "dataDir": "/tmp/zookeeper_0", + "log_filename": "zookeeper_2188.log", + "config_filename": "zookeeper_2188.properties" + }, + { + "entity_id": "1", + "port": "9091", + "brokerid": "1", + "log.file.size": "102400", + "log.dir": "/tmp/kafka_server_1_logs", + "log_filename": "kafka_server_9091.log", + "config_filename": "kafka_server_9091.properties" + }, + { + "entity_id": "2", + "port": "9092", + "brokerid": "2", + "log.file.size": "102400", + "log.dir": "/tmp/kafka_server_2_logs", + "log_filename": "kafka_server_9092.log", + "config_filename": "kafka_server_9092.properties" + }, + { + "entity_id": "3", + "port": "9093", + "brokerid": "3", + "log.file.size": "102400", + "log.dir": "/tmp/kafka_server_3_logs", + "log_filename": "kafka_server_9093.log", + "config_filename": "kafka_server_9093.properties" + }, + { + "entity_id": "4", + "topic": "test_1", + "threads": "5", + "compression-codec": "1", + "message-size": "500", + "message": "100", + "request-num-acks": "1", + "async":"true", + "log_filename": "producer_performance.log", + "config_filename": "producer_performance.properties" + }, + { + "entity_id": "5", + "topic": "test_1", + "groupid": "mytestgroup", + "consumer-timeout-ms": "10000", + "zookeeper": "localhost:2188", + "log_filename": "console_consumer.log", + "config_filename": "console_consumer.properties" + } + ] +} Index: system_test/replication_testsuite/replica_basic_test.py =================================================================== --- system_test/replication_testsuite/replica_basic_test.py (revision 1396332) +++ system_test/replication_testsuite/replica_basic_test.py (working copy) @@ -31,7 +31,9 @@ from system_test_env import SystemTestEnv sys.path.append(SystemTestEnv.SYSTEM_TEST_UTIL_DIR) + from setup_utils import SetupUtils +from replication_utils import ReplicationUtils import system_test_utils from testcase_env import TestcaseEnv @@ -39,12 +41,10 @@ import kafka_system_test_utils import metrics -class ReplicaBasicTest(SetupUtils): +class ReplicaBasicTest(ReplicationUtils, SetupUtils): testModuleAbsPathName = os.path.realpath(__file__) testSuiteAbsPathName = os.path.abspath(os.path.dirname(testModuleAbsPathName)) - isLeaderLogPattern = "Completed the leader state transition" - brokerShutDownCompletedPattern = "shut down completed" def __init__(self, systemTestEnv): @@ -54,20 +54,24 @@ # "clusterEntityConfigDictList" self.systemTestEnv = systemTestEnv + super(ReplicaBasicTest, self).__init__(self) + # dict to pass user-defined attributes to logger argument: "extra" d = {'name_of_class': self.__class__.__name__} def signal_handler(self, signal, frame): self.log_message("Interrupt detected - User pressed Ctrl+c") - for entityId, parentPid in self.testcaseEnv.entityParentPidDict.items(): - kafka_system_test_utils.stop_remote_entity(self.systemTestEnv, self.testcaseEnv, entityId, parentPid) - + # perform the necessary cleanup here when user presses Ctrl+c and it may be product specific + self.log_message("stopping all entities - please wait ...") + kafka_system_test_utils.stop_all_remote_running_processes(self.systemTestEnv, self.testcaseEnv) sys.exit(1) def runTest(self): + # ====================================================================== # get all testcase directories under this testsuite + # ====================================================================== testCasePathNameList = system_test_utils.get_dir_paths_with_prefix( self.testSuiteAbsPathName, SystemTestEnv.SYSTEM_TEST_CASE_PREFIX) testCasePathNameList.sort() @@ -76,75 +80,48 @@ # launch each testcase one by one: testcase_1, testcase_2, ... # ============================================================= for testCasePathName in testCasePathNameList: - + + skipThisTestCase = False + try: - # create a new instance of TestcaseEnv to keep track of this testcase's environment variables + # ====================================================================== + # A new instance of TestcaseEnv to keep track of this testcase's env vars + # and initialize some env vars as testCasePathName is available now + # ====================================================================== self.testcaseEnv = TestcaseEnv(self.systemTestEnv, self) self.testcaseEnv.testSuiteBaseDir = self.testSuiteAbsPathName - + self.testcaseEnv.initWithKnownTestCasePathName(testCasePathName) + self.testcaseEnv.testcaseArgumentsDict = self.testcaseEnv.testcaseNonEntityDataDict["testcase_args"] + # ====================================================================== - # initialize self.testcaseEnv with user-defined environment variables + # SKIP if this case is IN testcase_to_skip.json or NOT IN testcase_to_run.json # ====================================================================== - self.testcaseEnv.userDefinedEnvVarDict["BROKER_SHUT_DOWN_COMPLETED_MSG"] = ReplicaBasicTest.brokerShutDownCompletedPattern - self.testcaseEnv.userDefinedEnvVarDict["REGX_BROKER_SHUT_DOWN_COMPLETED_PATTERN"] = \ - "\[(.*?)\] .* \[Kafka Server (.*?)\], " + ReplicaBasicTest.brokerShutDownCompletedPattern + testcaseDirName = self.testcaseEnv.testcaseResultsDict["_test_case_name"] - self.testcaseEnv.userDefinedEnvVarDict["LEADER_ELECTION_COMPLETED_MSG"] = ReplicaBasicTest.isLeaderLogPattern - self.testcaseEnv.userDefinedEnvVarDict["REGX_LEADER_ELECTION_PATTERN"] = \ - "\[(.*?)\] .* Broker (.*?): " + \ - self.testcaseEnv.userDefinedEnvVarDict["LEADER_ELECTION_COMPLETED_MSG"] + \ - " for topic (.*?) partition (.*?) \(.*" + if self.systemTestEnv.printTestDescriptionsOnly: + self.testcaseEnv.printTestCaseDescription(testcaseDirName) + continue + elif self.systemTestEnv.isTestCaseToSkip(self.__class__.__name__, testcaseDirName): + self.log_message("Skipping : " + testcaseDirName) + skipThisTestCase = True + continue + else: + self.testcaseEnv.printTestCaseDescription(testcaseDirName) + system_test_utils.setup_remote_hosts_with_testcase_level_cluster_config(self.systemTestEnv, testCasePathName) + + # ============================================================================== # + # ============================================================================== # + # Product Specific Testing Code Starts Here: # + # ============================================================================== # + # ============================================================================== # + + # initialize self.testcaseEnv with user-defined environment variables (product specific) self.testcaseEnv.userDefinedEnvVarDict["zkConnectStr"] = "" self.testcaseEnv.userDefinedEnvVarDict["stopBackgroundProducer"] = False self.testcaseEnv.userDefinedEnvVarDict["backgroundProducerStopped"] = False + self.testcaseEnv.userDefinedEnvVarDict["leaderElectionLatencyList"] = [] - # find testcase properties json file - testcasePropJsonPathName = system_test_utils.get_testcase_prop_json_pathname(testCasePathName) - self.logger.debug("testcasePropJsonPathName : " + testcasePropJsonPathName, extra=self.d) - - # get the dictionary that contains the testcase arguments and description - testcaseNonEntityDataDict = system_test_utils.get_json_dict_data(testcasePropJsonPathName) - - testcaseDirName = os.path.basename(testCasePathName) - self.testcaseEnv.testcaseResultsDict["test_case_name"] = testcaseDirName - - # update testcaseEnv - self.testcaseEnv.testCaseBaseDir = testCasePathName - self.testcaseEnv.testCaseLogsDir = self.testcaseEnv.testCaseBaseDir + "/logs" - self.testcaseEnv.testCaseDashboardsDir = self.testcaseEnv.testCaseBaseDir + "/dashboards" - - # get testcase description - testcaseDescription = "" - for k,v in testcaseNonEntityDataDict.items(): - if ( k == "description" ): testcaseDescription = v - - # TestcaseEnv.testcaseArgumentsDict initialized, this dictionary keeps track of the - # "testcase_args" in the testcase_properties.json such as replica_factor, num_partition, ... - self.testcaseEnv.testcaseArgumentsDict = testcaseNonEntityDataDict["testcase_args"] - - # ================================================================= - # TestcaseEnv environment settings initialization are completed here - # ================================================================= - # self.testcaseEnv.systemTestBaseDir - # self.testcaseEnv.testSuiteBaseDir - # self.testcaseEnv.testCaseBaseDir - # self.testcaseEnv.testCaseLogsDir - # self.testcaseEnv.testcaseArgumentsDict - - print - # display testcase name and arguments - self.log_message("Test Case : " + testcaseDirName) - for k,v in self.testcaseEnv.testcaseArgumentsDict.items(): - self.anonLogger.info(" " + k + " : " + v) - self.log_message("Description : " + testcaseDescription) - - # ================================================================ # - # ================================================================ # - # Product Specific Testing Code Starts Here: # - # ================================================================ # - # ================================================================ # - # initialize signal handler signal.signal(signal.SIGINT, self.signal_handler) @@ -154,11 +131,13 @@ # TestcaseEnv.testcaseConfigsList initialized by reading testcase properties file: # system_test/_testsuite/testcase_/testcase__properties.json - self.testcaseEnv.testcaseConfigsList = system_test_utils.get_json_list_data(testcasePropJsonPathName) + self.testcaseEnv.testcaseConfigsList = system_test_utils.get_json_list_data( + self.testcaseEnv.testcasePropJsonPathName) # TestcaseEnv - initialize producer & consumer config / log file pathnames kafka_system_test_utils.init_entity_props(self.systemTestEnv, self.testcaseEnv) - + + # clean up data directories specified in zookeeper.properties and kafka_server_.properties kafka_system_test_utils.cleanup_data_at_remote_hosts(self.systemTestEnv, self.testcaseEnv) @@ -171,7 +150,8 @@ # 2. update all properties files in system_test/_testsuite/testcase_/config # by overriding the settings specified in: # system_test/_testsuite/testcase_/testcase__properties.json - kafka_system_test_utils.generate_overriden_props_files(self.testSuiteAbsPathName, self.testcaseEnv, self.systemTestEnv) + kafka_system_test_utils.generate_overriden_props_files(self.testSuiteAbsPathName, + self.testcaseEnv, self.systemTestEnv) # ============================================= # preparing all entities to start the test @@ -183,30 +163,36 @@ self.log_message("starting brokers") kafka_system_test_utils.start_brokers(self.systemTestEnv, self.testcaseEnv) - self.anonLogger.info("sleeping for 2s") - time.sleep(2) - + self.anonLogger.info("sleeping for 5s") + time.sleep(5) + self.log_message("creating topics") kafka_system_test_utils.create_topic(self.systemTestEnv, self.testcaseEnv) self.anonLogger.info("sleeping for 5s") time.sleep(5) - + # ============================================= # starting producer # ============================================= self.log_message("starting producer in the background") - kafka_system_test_utils.start_producer_performance(self.systemTestEnv, self.testcaseEnv) - self.anonLogger.info("sleeping for 5s") - time.sleep(5) + kafka_system_test_utils.start_producer_performance(self.systemTestEnv, self.testcaseEnv, False) + msgProducingFreeTimeSec = self.testcaseEnv.testcaseArgumentsDict["message_producing_free_time_sec"] + self.anonLogger.info("sleeping for " + msgProducingFreeTimeSec + " sec to produce some messages") + time.sleep(int(msgProducingFreeTimeSec)) + # ============================================= + # A while-loop to bounce leader as specified + # by "num_iterations" in testcase_n_properties.json + # ============================================= i = 1 numIterations = int(self.testcaseEnv.testcaseArgumentsDict["num_iteration"]) while i <= numIterations: self.log_message("Iteration " + str(i) + " of " + str(numIterations)) - # looking up leader - leaderDict = kafka_system_test_utils.get_leader_elected_log_line(self.systemTestEnv, self.testcaseEnv) + self.log_message("looking up leader") + leaderDict = kafka_system_test_utils.get_leader_elected_log_line( + self.systemTestEnv, self.testcaseEnv, self.leaderAttributesDict) # ========================== # leaderDict looks like this: @@ -226,15 +212,18 @@ self.testcaseEnv, leaderDict, self.testcaseEnv.validationStatusDict) # ============================================= - # get leader re-election latency by stopping leader + # trigger leader re-election by stopping leader + # to get re-election latency # ============================================= bounceLeaderFlag = self.testcaseEnv.testcaseArgumentsDict["bounce_leader"] self.log_message("bounce_leader flag : " + bounceLeaderFlag) if (bounceLeaderFlag.lower() == "true"): - reelectionLatency = kafka_system_test_utils.get_reelection_latency(self.systemTestEnv, self.testcaseEnv, leaderDict) + reelectionLatency = kafka_system_test_utils.get_reelection_latency( + self.systemTestEnv, self.testcaseEnv, leaderDict, self.leaderAttributesDict) latencyKeyName = "Leader Election Latency - iter " + str(i) + " brokerid " + leaderDict["brokerid"] self.testcaseEnv.validationStatusDict[latencyKeyName] = str("{0:.2f}".format(reelectionLatency * 1000)) + " ms" - + self.testcaseEnv.userDefinedEnvVarDict["leaderElectionLatencyList"].append("{0:.2f}".format(reelectionLatency * 1000)) + # ============================================= # starting previously terminated broker # ============================================= @@ -243,24 +232,45 @@ stoppedLeaderEntityId = leaderDict["entity_id"] kafka_system_test_utils.start_entity_in_background(self.systemTestEnv, self.testcaseEnv, stoppedLeaderEntityId) - self.anonLogger.info("sleeping for 5s") - time.sleep(5) + self.anonLogger.info("sleeping for 15s") + time.sleep(15) i += 1 # while loop + self.testcaseEnv.validationStatusDict["Leader Election Latency MIN"] = None + try: + self.testcaseEnv.validationStatusDict["Leader Election Latency MIN"] = \ + min(self.testcaseEnv.userDefinedEnvVarDict["leaderElectionLatencyList"]) + except: + pass + + self.testcaseEnv.validationStatusDict["Leader Election Latency MAX"] = None + try: + self.testcaseEnv.validationStatusDict["Leader Election Latency MAX"] = \ + max(self.testcaseEnv.userDefinedEnvVarDict["leaderElectionLatencyList"]) + except: + pass + + # ============================================= # tell producer to stop + # ============================================= self.testcaseEnv.lock.acquire() self.testcaseEnv.userDefinedEnvVarDict["stopBackgroundProducer"] = True time.sleep(1) self.testcaseEnv.lock.release() time.sleep(1) + # ============================================= + # wait for producer thread's update of + # "backgroundProducerStopped" to be "True" + # ============================================= while 1: self.testcaseEnv.lock.acquire() self.logger.info("status of backgroundProducerStopped : [" + \ str(self.testcaseEnv.userDefinedEnvVarDict["backgroundProducerStopped"]) + "]", extra=self.d) if self.testcaseEnv.userDefinedEnvVarDict["backgroundProducerStopped"]: time.sleep(1) + self.logger.info("all producer threads completed", extra=self.d) break time.sleep(1) self.testcaseEnv.lock.release() @@ -274,27 +284,34 @@ self.anonLogger.info("sleeping for 10s") time.sleep(10) - # this testcase is completed - so stopping all entities + # ============================================= + # this testcase is completed - stop all entities + # ============================================= self.log_message("stopping all entities") - for entityId, parentPid in self.testcaseEnv.entityParentPidDict.items(): + for entityId, parentPid in self.testcaseEnv.entityBrokerParentPidDict.items(): kafka_system_test_utils.stop_remote_entity(self.systemTestEnv, entityId, parentPid) + for entityId, parentPid in self.testcaseEnv.entityZkParentPidDict.items(): + kafka_system_test_utils.stop_remote_entity(self.systemTestEnv, entityId, parentPid) + # make sure all entities are stopped kafka_system_test_utils.ps_grep_terminate_running_entity(self.systemTestEnv) - # validate the data matched # ============================================= - self.log_message("validating data matched") - result = kafka_system_test_utils.validate_data_matched(self.systemTestEnv, self.testcaseEnv) - - # ============================================= # collect logs from remote hosts # ============================================= kafka_system_test_utils.collect_logs_from_remote_hosts(self.systemTestEnv, self.testcaseEnv) - # ========================== + # ============================================= + # validate the data matched and checksum + # ============================================= + self.log_message("validating data matched") + kafka_system_test_utils.validate_data_matched(self.systemTestEnv, self.testcaseEnv) + kafka_system_test_utils.validate_broker_log_segment_checksum(self.systemTestEnv, self.testcaseEnv) + + # ============================================= # draw graphs - # ========================== + # ============================================= metrics.draw_all_graphs(self.systemTestEnv.METRICS_PATHNAME, self.testcaseEnv, self.systemTestEnv.clusterEntityConfigDictList) @@ -303,30 +320,12 @@ metrics.build_all_dashboards(self.systemTestEnv.METRICS_PATHNAME, self.testcaseEnv.testCaseDashboardsDir, self.systemTestEnv.clusterEntityConfigDictList) - except Exception as e: self.log_message("Exception while running test {0}".format(e)) traceback.print_exc() - traceback.print_exc() finally: - self.log_message("stopping all entities") + if not skipThisTestCase and not self.systemTestEnv.printTestDescriptionsOnly: + self.log_message("stopping all entities - please wait ...") + kafka_system_test_utils.stop_all_remote_running_processes(self.systemTestEnv, self.testcaseEnv) - for entityId, parentPid in self.testcaseEnv.entityParentPidDict.items(): - kafka_system_test_utils.force_stop_remote_entity(self.systemTestEnv, entityId, parentPid) - - for entityId, jmxParentPidList in self.testcaseEnv.entityJmxParentPidDict.items(): - for jmxParentPid in jmxParentPidList: - kafka_system_test_utils.force_stop_remote_entity(self.systemTestEnv, entityId, jmxParentPid) - - for hostname, consumerPPid in self.testcaseEnv.consumerHostParentPidDict.items(): - consumerEntityId = system_test_utils.get_data_by_lookup_keyval( \ - self.systemTestEnv.clusterEntityConfigDictList, "hostname", hostname, "entity_id") - kafka_system_test_utils.force_stop_remote_entity(self.systemTestEnv, consumerEntityId, consumerPPid) - - for hostname, producerPPid in self.testcaseEnv.producerHostParentPidDict.items(): - producerEntityId = system_test_utils.get_data_by_lookup_keyval( \ - self.systemTestEnv.clusterEntityConfigDictList, "hostname", hostname, "entity_id") - kafka_system_test_utils.force_stop_remote_entity(self.systemTestEnv, producerEntityId, producerPPid) - - Index: system_test/replication_testsuite/testcase_0001/testcase_0001_properties.json =================================================================== --- system_test/replication_testsuite/testcase_0001/testcase_0001_properties.json (revision 0) +++ system_test/replication_testsuite/testcase_0001/testcase_0001_properties.json (revision 0) @@ -0,0 +1,76 @@ +{ + "description": {"01":"Replication Basic : Base Test", + "02":"Produce and consume messages to a single topic - single partition.", + "03":"This test sends messages to 3 replicas", + "04":"At the end it verifies the log size and contents", + "05":"Use a consumer to verify no message loss.", + "06":"Producer dimensions : mode:sync, acks:-1, comp:0", + "07":"Log segment size : 10240" + }, + "testcase_args": { + "bounce_leader": "false", + "replica_factor": "3", + "num_partition": "1", + "num_iteration": "1", + "sleep_seconds_between_producer_calls": "1", + "message_producing_free_time_sec": "15", + "num_messages_to_produce_per_producer_call": "50" + }, + "entities": [ + { + "entity_id": "0", + "clientPort": "2188", + "dataDir": "/tmp/zookeeper_0", + "log_filename": "zookeeper_2188.log", + "config_filename": "zookeeper_2188.properties" + }, + { + "entity_id": "1", + "port": "9091", + "brokerid": "1", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_1_logs", + "log_filename": "kafka_server_9091.log", + "config_filename": "kafka_server_9091.properties" + }, + { + "entity_id": "2", + "port": "9092", + "brokerid": "2", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_2_logs", + "log_filename": "kafka_server_9092.log", + "config_filename": "kafka_server_9092.properties" + }, + { + "entity_id": "3", + "port": "9093", + "brokerid": "3", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_3_logs", + "log_filename": "kafka_server_9093.log", + "config_filename": "kafka_server_9093.properties" + }, + { + "entity_id": "4", + "topic": "test_1", + "threads": "5", + "compression-codec": "0", + "message-size": "500", + "message": "100", + "request-num-acks": "-1", + "async":"false", + "log_filename": "producer_performance.log", + "config_filename": "producer_performance.properties" + }, + { + "entity_id": "5", + "topic": "test_1", + "groupid": "mytestgroup", + "consumer-timeout-ms": "10000", + "zookeeper": "localhost:2188", + "log_filename": "console_consumer.log", + "config_filename": "console_consumer.properties" + } + ] +} Index: system_test/replication_testsuite/testcase_0101/testcase_0101_properties.json =================================================================== --- system_test/replication_testsuite/testcase_0101/testcase_0101_properties.json (revision 0) +++ system_test/replication_testsuite/testcase_0101/testcase_0101_properties.json (revision 0) @@ -0,0 +1,79 @@ +{ + "description": {"01":"Leader Failure in Replication : Base Test", + "02":"Produce and consume messages to a single topic - single partition.", + "03":"This test sends messages to 3 replicas", + "04":"To trigger leader election: find the leader and terminate by controlled failure (kill -15)", + "05":"Restart the terminated broker", + "06":"Lookup brokers' log4j messages and verify that leader is re-elected successfully", + "07":"At the end it verifies the log size and contents", + "08":"Use a consumer to verify no message loss.", + "09":"Producer dimensions : mode:sync, acks:-1, comp:0", + "10":"Log segment size : 10240" + }, + "testcase_args": { + "bounce_leader": "true", + "replica_factor": "3", + "num_partition": "1", + "num_iteration": "1", + "sleep_seconds_between_producer_calls": "1", + "message_producing_free_time_sec": "15", + "num_messages_to_produce_per_producer_call": "50" + }, + "entities": [ + { + "entity_id": "0", + "clientPort": "2188", + "dataDir": "/tmp/zookeeper_0", + "log_filename": "zookeeper_2188.log", + "config_filename": "zookeeper_2188.properties" + }, + { + "entity_id": "1", + "port": "9091", + "brokerid": "1", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_1_logs", + "log_filename": "kafka_server_9091.log", + "config_filename": "kafka_server_9091.properties" + }, + { + "entity_id": "2", + "port": "9092", + "brokerid": "2", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_2_logs", + "log_filename": "kafka_server_9092.log", + "config_filename": "kafka_server_9092.properties" + }, + { + "entity_id": "3", + "port": "9093", + "brokerid": "3", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_3_logs", + "log_filename": "kafka_server_9093.log", + "config_filename": "kafka_server_9093.properties" + }, + { + "entity_id": "4", + "topic": "test_1", + "threads": "5", + "compression-codec": "0", + "message-size": "500", + "message": "100", + "request-num-acks": "-1", + "async":"false", + "log_filename": "producer_performance.log", + "config_filename": "producer_performance.properties" + }, + { + "entity_id": "5", + "topic": "test_1", + "groupid": "mytestgroup", + "consumer-timeout-ms": "10000", + "zookeeper": "localhost:2188", + "log_filename": "console_consumer.log", + "config_filename": "console_consumer.properties" + } + ] +} Index: system_test/replication_testsuite/testcase_0002/testcase_0002_properties.json =================================================================== --- system_test/replication_testsuite/testcase_0002/testcase_0002_properties.json (revision 0) +++ system_test/replication_testsuite/testcase_0002/testcase_0002_properties.json (revision 0) @@ -0,0 +1,76 @@ +{ + "description": {"01":"Replication Basic : 1. comp => 1", + "02":"Produce and consume messages to a single topic - single partition.", + "03":"This test sends messages to 3 replicas", + "04":"At the end it verifies the log size and contents", + "05":"Use a consumer to verify no message loss.", + "06":"Producer dimensions : mode:sync, acks:-1, comp:1", + "07":"Log segment size : 10240" + }, + "testcase_args": { + "bounce_leader": "false", + "replica_factor": "3", + "num_partition": "1", + "num_iteration": "1", + "sleep_seconds_between_producer_calls": "1", + "message_producing_free_time_sec": "15", + "num_messages_to_produce_per_producer_call": "50" + }, + "entities": [ + { + "entity_id": "0", + "clientPort": "2188", + "dataDir": "/tmp/zookeeper_0", + "log_filename": "zookeeper_2188.log", + "config_filename": "zookeeper_2188.properties" + }, + { + "entity_id": "1", + "port": "9091", + "brokerid": "1", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_1_logs", + "log_filename": "kafka_server_9091.log", + "config_filename": "kafka_server_9091.properties" + }, + { + "entity_id": "2", + "port": "9092", + "brokerid": "2", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_2_logs", + "log_filename": "kafka_server_9092.log", + "config_filename": "kafka_server_9092.properties" + }, + { + "entity_id": "3", + "port": "9093", + "brokerid": "3", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_3_logs", + "log_filename": "kafka_server_9093.log", + "config_filename": "kafka_server_9093.properties" + }, + { + "entity_id": "4", + "topic": "test_1", + "threads": "5", + "compression-codec": "1", + "message-size": "500", + "message": "100", + "request-num-acks": "-1", + "async":"false", + "log_filename": "producer_performance.log", + "config_filename": "producer_performance.properties" + }, + { + "entity_id": "5", + "topic": "test_1", + "groupid": "mytestgroup", + "consumer-timeout-ms": "10000", + "zookeeper": "localhost:2188", + "log_filename": "console_consumer.log", + "config_filename": "console_consumer.properties" + } + ] +} Index: system_test/replication_testsuite/testcase_0102/testcase_0102_properties.json =================================================================== --- system_test/replication_testsuite/testcase_0102/testcase_0102_properties.json (revision 0) +++ system_test/replication_testsuite/testcase_0102/testcase_0102_properties.json (revision 0) @@ -0,0 +1,79 @@ +{ + "description": {"01":"Leader Failure in Replication : 1. comp => 1", + "02":"Produce and consume messages to a single topic - single partition.", + "03":"This test sends messages to 3 replicas", + "04":"To trigger leader election: find the leader and terminate by controlled failure (kill -15)", + "05":"Restart the terminated broker", + "06":"Lookup brokers' log4j messages and verify that leader is re-elected successfully", + "07":"At the end it verifies the log size and contents", + "08":"Use a consumer to verify no message loss.", + "09":"Producer dimensions : mode:sync, acks:-1, comp:1", + "10":"Log segment size : 10240" + }, + "testcase_args": { + "bounce_leader": "true", + "replica_factor": "3", + "num_partition": "1", + "num_iteration": "1", + "sleep_seconds_between_producer_calls": "1", + "message_producing_free_time_sec": "15", + "num_messages_to_produce_per_producer_call": "50" + }, + "entities": [ + { + "entity_id": "0", + "clientPort": "2188", + "dataDir": "/tmp/zookeeper_0", + "log_filename": "zookeeper_2188.log", + "config_filename": "zookeeper_2188.properties" + }, + { + "entity_id": "1", + "port": "9091", + "brokerid": "1", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_1_logs", + "log_filename": "kafka_server_9091.log", + "config_filename": "kafka_server_9091.properties" + }, + { + "entity_id": "2", + "port": "9092", + "brokerid": "2", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_2_logs", + "log_filename": "kafka_server_9092.log", + "config_filename": "kafka_server_9092.properties" + }, + { + "entity_id": "3", + "port": "9093", + "brokerid": "3", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_3_logs", + "log_filename": "kafka_server_9093.log", + "config_filename": "kafka_server_9093.properties" + }, + { + "entity_id": "4", + "topic": "test_1", + "threads": "5", + "compression-codec": "1", + "message-size": "500", + "message": "100", + "request-num-acks": "-1", + "async":"false", + "log_filename": "producer_performance.log", + "config_filename": "producer_performance.properties" + }, + { + "entity_id": "5", + "topic": "test_1", + "groupid": "mytestgroup", + "consumer-timeout-ms": "10000", + "zookeeper": "localhost:2188", + "log_filename": "console_consumer.log", + "config_filename": "console_consumer.properties" + } + ] +} Index: system_test/replication_testsuite/testcase_0021/testcase_0021_properties.json =================================================================== --- system_test/replication_testsuite/testcase_0021/testcase_0021_properties.json (revision 0) +++ system_test/replication_testsuite/testcase_0021/testcase_0021_properties.json (revision 0) @@ -0,0 +1,94 @@ +{ + "description": {"01":"Replication Basic on Multi Topics & Partitions : Base Test", + "02":"Produce and consume messages to 2 topics - 3 partitions", + "03":"This test sends messages to 3 replicas", + "04":"At the end it verifies the log size and contents", + "05":"Use a consumer to verify no message loss.", + "06":"Producer dimensions : mode:sync, acks:-1, comp:0", + "07":"Log segment size : 102400" + }, + "testcase_args": { + "bounce_leader": "false", + "replica_factor": "3", + "num_partition": "3", + "num_iteration": "1", + "sleep_seconds_between_producer_calls": "1", + "message_producing_free_time_sec": "15" + }, + "entities": [ + { + "entity_id": "0", + "clientPort": "2188", + "dataDir": "/tmp/zookeeper_0", + "log_filename": "zookeeper_2188.log", + "config_filename": "zookeeper_2188.properties" + }, + { + "entity_id": "1", + "port": "9091", + "brokerid": "1", + "log.file.size": "102400", + "log.dir": "/tmp/kafka_server_1_logs", + "log_filename": "kafka_server_9091.log", + "config_filename": "kafka_server_9091.properties" + }, + { + "entity_id": "2", + "port": "9092", + "brokerid": "2", + "log.file.size": "102400", + "log.dir": "/tmp/kafka_server_2_logs", + "log_filename": "kafka_server_9092.log", + "config_filename": "kafka_server_9092.properties" + }, + { + "entity_id": "3", + "port": "9093", + "brokerid": "3", + "log.file.size": "102400", + "log.dir": "/tmp/kafka_server_3_logs", + "log_filename": "kafka_server_9093.log", + "config_filename": "kafka_server_9093.properties" + }, + { + "entity_id": "4", + "topic": "test_1", + "threads": "5", + "compression-codec": "0", + "message-size": "500", + "message": "100", + "request-num-acks": "-1", + "async":"false", + "log_filename": "producer_performance_4.log", + "config_filename": "producer_performance_4.properties" + }, + { + "entity_id": "5", + "topic": "test_2", + "threads": "5", + "compression-codec": "0", + "message-size": "500", + "message": "100", + "request-num-acks": "-1", + "async":"false", + "log_filename": "producer_performance_5.log", + "config_filename": "producer_performance_5.properties" + }, + { + "entity_id": "6", + "topic": "test_1", + "groupid": "mytestgroup", + "consumer-timeout-ms": "10000", + "log_filename": "console_consumer_6.log", + "config_filename": "console_consumer_6.properties" + }, + { + "entity_id": "7", + "topic": "test_2", + "groupid": "mytestgroup", + "consumer-timeout-ms": "10000", + "log_filename": "console_consumer_7.log", + "config_filename": "console_consumer_7.properties" + } + ] +} Index: system_test/replication_testsuite/testcase_0021/cluster_config.json =================================================================== --- system_test/replication_testsuite/testcase_0021/cluster_config.json (revision 0) +++ system_test/replication_testsuite/testcase_0021/cluster_config.json (revision 0) @@ -0,0 +1,76 @@ +{ + "cluster_config": [ + { + "entity_id": "0", + "hostname": "localhost", + "role": "zookeeper", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9990" + }, + { + "entity_id": "1", + "hostname": "localhost", + "role": "broker", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9991" + }, + { + "entity_id": "2", + "hostname": "localhost", + "role": "broker", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9992" + }, + { + "entity_id": "3", + "hostname": "localhost", + "role": "broker", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9993" + }, + { + "entity_id": "4", + "hostname": "localhost", + "role": "producer_performance", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9997" + }, + { + "entity_id": "5", + "hostname": "localhost", + "role": "producer_performance", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9998" + }, + { + "entity_id": "6", + "hostname": "localhost", + "role": "console_consumer", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9999" + }, + { + "entity_id": "7", + "hostname": "localhost", + "role": "console_consumer", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9099" + } + ] +} Index: system_test/replication_testsuite/testcase_0003/testcase_0003_properties.json =================================================================== --- system_test/replication_testsuite/testcase_0003/testcase_0003_properties.json (revision 0) +++ system_test/replication_testsuite/testcase_0003/testcase_0003_properties.json (revision 0) @@ -0,0 +1,76 @@ +{ + "description": {"01":"Replication Basic : 1. acks => 1; 2. comp => 1", + "02":"Produce and consume messages to a single topic - single partition.", + "03":"This test sends messages to 3 replicas", + "04":"At the end it verifies the log size and contents", + "05":"Use a consumer to verify no message loss.", + "06":"Producer dimensions : mode:sync, acks:1, comp:1", + "07":"Log segment size : 10240" + }, + "testcase_args": { + "bounce_leader": "false", + "replica_factor": "3", + "num_partition": "1", + "num_iteration": "1", + "sleep_seconds_between_producer_calls": "1", + "message_producing_free_time_sec": "15", + "num_messages_to_produce_per_producer_call": "50" + }, + "entities": [ + { + "entity_id": "0", + "clientPort": "2188", + "dataDir": "/tmp/zookeeper_0", + "log_filename": "zookeeper_2188.log", + "config_filename": "zookeeper_2188.properties" + }, + { + "entity_id": "1", + "port": "9091", + "brokerid": "1", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_1_logs", + "log_filename": "kafka_server_9091.log", + "config_filename": "kafka_server_9091.properties" + }, + { + "entity_id": "2", + "port": "9092", + "brokerid": "2", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_2_logs", + "log_filename": "kafka_server_9092.log", + "config_filename": "kafka_server_9092.properties" + }, + { + "entity_id": "3", + "port": "9093", + "brokerid": "3", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_3_logs", + "log_filename": "kafka_server_9093.log", + "config_filename": "kafka_server_9093.properties" + }, + { + "entity_id": "4", + "topic": "test_1", + "threads": "5", + "compression-codec": "1", + "message-size": "500", + "message": "100", + "request-num-acks": "1", + "async":"false", + "log_filename": "producer_performance.log", + "config_filename": "producer_performance.properties" + }, + { + "entity_id": "5", + "topic": "test_1", + "groupid": "mytestgroup", + "consumer-timeout-ms": "10000", + "zookeeper": "localhost:2188", + "log_filename": "console_consumer.log", + "config_filename": "console_consumer.properties" + } + ] +} Index: system_test/replication_testsuite/testcase_0121/testcase_0121_properties.json =================================================================== --- system_test/replication_testsuite/testcase_0121/testcase_0121_properties.json (revision 0) +++ system_test/replication_testsuite/testcase_0121/testcase_0121_properties.json (revision 0) @@ -0,0 +1,97 @@ +{ + "description": {"01":"Leader Failure in Replication with multi topics & partitions : Base Test", + "02":"Produce and consume messages to 2 topics - 3 partitions", + "03":"This test sends messages to 3 replicas", + "04":"To trigger leader election: find the leader and terminate by controlled failure (kill -15)", + "05":"Restart the terminated broker", + "06":"Lookup brokers' log4j messages and verify that leader is re-elected successfully", + "07":"At the end it verifies the log size and contents", + "08":"Use a consumer to verify no message loss.", + "09":"Producer dimensions : mode:sync, acks:-1, comp:0", + "10":"Log segment size : 102400" + }, + "testcase_args": { + "bounce_leader": "true", + "replica_factor": "3", + "num_partition": "3", + "num_iteration": "3", + "sleep_seconds_between_producer_calls": "1", + "message_producing_free_time_sec": "15" + }, + "entities": [ + { + "entity_id": "0", + "clientPort": "2188", + "dataDir": "/tmp/zookeeper_0", + "log_filename": "zookeeper_2188.log", + "config_filename": "zookeeper_2188.properties" + }, + { + "entity_id": "1", + "port": "9091", + "brokerid": "1", + "log.file.size": "102400", + "log.dir": "/tmp/kafka_server_1_logs", + "log_filename": "kafka_server_9091.log", + "config_filename": "kafka_server_9091.properties" + }, + { + "entity_id": "2", + "port": "9092", + "brokerid": "2", + "log.file.size": "102400", + "log.dir": "/tmp/kafka_server_2_logs", + "log_filename": "kafka_server_9092.log", + "config_filename": "kafka_server_9092.properties" + }, + { + "entity_id": "3", + "port": "9093", + "brokerid": "3", + "log.file.size": "102400", + "log.dir": "/tmp/kafka_server_3_logs", + "log_filename": "kafka_server_9093.log", + "config_filename": "kafka_server_9093.properties" + }, + { + "entity_id": "4", + "topic": "test_1", + "threads": "5", + "compression-codec": "0", + "message-size": "500", + "message": "100", + "request-num-acks": "-1", + "async":"false", + "log_filename": "producer_performance_4.log", + "config_filename": "producer_performance_4.properties" + }, + { + "entity_id": "5", + "topic": "test_2", + "threads": "5", + "compression-codec": "0", + "message-size": "500", + "message": "100", + "request-num-acks": "-1", + "async":"false", + "log_filename": "producer_performance_5.log", + "config_filename": "producer_performance_5.properties" + }, + { + "entity_id": "6", + "topic": "test_1", + "groupid": "mytestgroup", + "consumer-timeout-ms": "10000", + "log_filename": "console_consumer_6.log", + "config_filename": "console_consumer_6.properties" + }, + { + "entity_id": "7", + "topic": "test_2", + "groupid": "mytestgroup", + "consumer-timeout-ms": "10000", + "log_filename": "console_consumer_7.log", + "config_filename": "console_consumer_7.properties" + } + ] +} Index: system_test/replication_testsuite/testcase_0121/cluster_config.json =================================================================== --- system_test/replication_testsuite/testcase_0121/cluster_config.json (revision 0) +++ system_test/replication_testsuite/testcase_0121/cluster_config.json (revision 0) @@ -0,0 +1,76 @@ +{ + "cluster_config": [ + { + "entity_id": "0", + "hostname": "localhost", + "role": "zookeeper", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9990" + }, + { + "entity_id": "1", + "hostname": "localhost", + "role": "broker", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9991" + }, + { + "entity_id": "2", + "hostname": "localhost", + "role": "broker", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9992" + }, + { + "entity_id": "3", + "hostname": "localhost", + "role": "broker", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9993" + }, + { + "entity_id": "4", + "hostname": "localhost", + "role": "producer_performance", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9997" + }, + { + "entity_id": "5", + "hostname": "localhost", + "role": "producer_performance", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9998" + }, + { + "entity_id": "6", + "hostname": "localhost", + "role": "console_consumer", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9999" + }, + { + "entity_id": "7", + "hostname": "localhost", + "role": "console_consumer", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9099" + } + ] +} Index: system_test/replication_testsuite/testcase_0004/testcase_0004_properties.json =================================================================== --- system_test/replication_testsuite/testcase_0004/testcase_0004_properties.json (revision 0) +++ system_test/replication_testsuite/testcase_0004/testcase_0004_properties.json (revision 0) @@ -0,0 +1,76 @@ +{ + "description": {"01":"Replication Basic : 1. mode => async; 2. comp => 1", + "02":"Produce and consume messages to a single topic - single partition.", + "03":"This test sends messages to 3 replicas", + "04":"At the end it verifies the log size and contents", + "05":"Use a consumer to verify no message loss.", + "06":"Producer dimensions : mode:async, acks:-1, comp:1", + "07":"Log segment size : 10240" + }, + "testcase_args": { + "bounce_leader": "false", + "replica_factor": "3", + "num_partition": "1", + "num_iteration": "1", + "sleep_seconds_between_producer_calls": "1", + "message_producing_free_time_sec": "15", + "num_messages_to_produce_per_producer_call": "50" + }, + "entities": [ + { + "entity_id": "0", + "clientPort": "2188", + "dataDir": "/tmp/zookeeper_0", + "log_filename": "zookeeper_2188.log", + "config_filename": "zookeeper_2188.properties" + }, + { + "entity_id": "1", + "port": "9091", + "brokerid": "1", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_1_logs", + "log_filename": "kafka_server_9091.log", + "config_filename": "kafka_server_9091.properties" + }, + { + "entity_id": "2", + "port": "9092", + "brokerid": "2", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_2_logs", + "log_filename": "kafka_server_9092.log", + "config_filename": "kafka_server_9092.properties" + }, + { + "entity_id": "3", + "port": "9093", + "brokerid": "3", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_3_logs", + "log_filename": "kafka_server_9093.log", + "config_filename": "kafka_server_9093.properties" + }, + { + "entity_id": "4", + "topic": "test_1", + "threads": "5", + "compression-codec": "1", + "message-size": "500", + "message": "100", + "request-num-acks": "-1", + "async":"true", + "log_filename": "producer_performance.log", + "config_filename": "producer_performance.properties" + }, + { + "entity_id": "5", + "topic": "test_1", + "groupid": "mytestgroup", + "consumer-timeout-ms": "10000", + "zookeeper": "localhost:2188", + "log_filename": "console_consumer.log", + "config_filename": "console_consumer.properties" + } + ] +} Index: system_test/replication_testsuite/testcase_0103/testcase_0103_properties.json =================================================================== --- system_test/replication_testsuite/testcase_0103/testcase_0103_properties.json (revision 0) +++ system_test/replication_testsuite/testcase_0103/testcase_0103_properties.json (revision 0) @@ -0,0 +1,79 @@ +{ + "description": {"01":"Leader Failure in Replication : 1. acks => 1; 2. comp => 1", + "02":"Produce and consume messages to a single topic - single partition.", + "03":"This test sends messages to 3 replicas", + "04":"To trigger leader election: find the leader and terminate by controlled failure (kill -15)", + "05":"Restart the terminated broker", + "06":"Lookup brokers' log4j messages and verify that leader is re-elected successfully", + "07":"At the end it verifies the log size and contents", + "08":"Use a consumer to verify no message loss.", + "09":"Producer dimensions : mode:sync, acks:1, comp:1", + "10":"Log segment size : 10240" + }, + "testcase_args": { + "bounce_leader": "true", + "replica_factor": "3", + "num_partition": "1", + "num_iteration": "1", + "sleep_seconds_between_producer_calls": "1", + "message_producing_free_time_sec": "15", + "num_messages_to_produce_per_producer_call": "50" + }, + "entities": [ + { + "entity_id": "0", + "clientPort": "2188", + "dataDir": "/tmp/zookeeper_0", + "log_filename": "zookeeper_2188.log", + "config_filename": "zookeeper_2188.properties" + }, + { + "entity_id": "1", + "port": "9091", + "brokerid": "1", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_1_logs", + "log_filename": "kafka_server_9091.log", + "config_filename": "kafka_server_9091.properties" + }, + { + "entity_id": "2", + "port": "9092", + "brokerid": "2", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_2_logs", + "log_filename": "kafka_server_9092.log", + "config_filename": "kafka_server_9092.properties" + }, + { + "entity_id": "3", + "port": "9093", + "brokerid": "3", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_3_logs", + "log_filename": "kafka_server_9093.log", + "config_filename": "kafka_server_9093.properties" + }, + { + "entity_id": "4", + "topic": "test_1", + "threads": "5", + "compression-codec": "1", + "message-size": "500", + "message": "100", + "request-num-acks": "1", + "async":"false", + "log_filename": "producer_performance.log", + "config_filename": "producer_performance.properties" + }, + { + "entity_id": "5", + "topic": "test_1", + "groupid": "mytestgroup", + "consumer-timeout-ms": "10000", + "zookeeper": "localhost:2188", + "log_filename": "console_consumer.log", + "config_filename": "console_consumer.properties" + } + ] +} Index: system_test/replication_testsuite/testcase_0022/testcase_0022_properties.json =================================================================== --- system_test/replication_testsuite/testcase_0022/testcase_0022_properties.json (revision 0) +++ system_test/replication_testsuite/testcase_0022/testcase_0022_properties.json (revision 0) @@ -0,0 +1,94 @@ +{ + "description": {"01":"Replication Basic on Multi Topics & Partitions : 1. acks => 1; 2. log segment size => 512K", + "02":"Produce and consume messages to 2 topics - 3 partitions", + "03":"This test sends messages to 3 replicas", + "04":"At the end it verifies the log size and contents", + "05":"Use a consumer to verify no message loss.", + "06":"Producer dimensions : mode:sync, acks:1, comp:0", + "07":"Log segment size : 512000" + }, + "testcase_args": { + "bounce_leader": "false", + "replica_factor": "3", + "num_partition": "3", + "num_iteration": "1", + "sleep_seconds_between_producer_calls": "1", + "message_producing_free_time_sec": "15" + }, + "entities": [ + { + "entity_id": "0", + "clientPort": "2188", + "dataDir": "/tmp/zookeeper_0", + "log_filename": "zookeeper_2188.log", + "config_filename": "zookeeper_2188.properties" + }, + { + "entity_id": "1", + "port": "9091", + "brokerid": "1", + "log.file.size": "102400", + "log.dir": "/tmp/kafka_server_1_logs", + "log_filename": "kafka_server_9091.log", + "config_filename": "kafka_server_9091.properties" + }, + { + "entity_id": "2", + "port": "9092", + "brokerid": "2", + "log.file.size": "102400", + "log.dir": "/tmp/kafka_server_2_logs", + "log_filename": "kafka_server_9092.log", + "config_filename": "kafka_server_9092.properties" + }, + { + "entity_id": "3", + "port": "9093", + "brokerid": "3", + "log.file.size": "102400", + "log.dir": "/tmp/kafka_server_3_logs", + "log_filename": "kafka_server_9093.log", + "config_filename": "kafka_server_9093.properties" + }, + { + "entity_id": "4", + "topic": "test_1", + "threads": "5", + "compression-codec": "0", + "message-size": "500", + "message": "100", + "request-num-acks": "1", + "async":"false", + "log_filename": "producer_performance_4.log", + "config_filename": "producer_performance_4.properties" + }, + { + "entity_id": "5", + "topic": "test_2", + "threads": "5", + "compression-codec": "0", + "message-size": "500", + "message": "100", + "request-num-acks": "1", + "async":"false", + "log_filename": "producer_performance_5.log", + "config_filename": "producer_performance_5.properties" + }, + { + "entity_id": "6", + "topic": "test_1", + "groupid": "mytestgroup", + "consumer-timeout-ms": "10000", + "log_filename": "console_consumer_6.log", + "config_filename": "console_consumer_6.properties" + }, + { + "entity_id": "7", + "topic": "test_2", + "groupid": "mytestgroup", + "consumer-timeout-ms": "10000", + "log_filename": "console_consumer_7.log", + "config_filename": "console_consumer_7.properties" + } + ] +} Index: system_test/replication_testsuite/testcase_0022/cluster_config.json =================================================================== --- system_test/replication_testsuite/testcase_0022/cluster_config.json (revision 0) +++ system_test/replication_testsuite/testcase_0022/cluster_config.json (revision 0) @@ -0,0 +1,76 @@ +{ + "cluster_config": [ + { + "entity_id": "0", + "hostname": "localhost", + "role": "zookeeper", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9990" + }, + { + "entity_id": "1", + "hostname": "localhost", + "role": "broker", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9991" + }, + { + "entity_id": "2", + "hostname": "localhost", + "role": "broker", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9992" + }, + { + "entity_id": "3", + "hostname": "localhost", + "role": "broker", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9993" + }, + { + "entity_id": "4", + "hostname": "localhost", + "role": "producer_performance", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9997" + }, + { + "entity_id": "5", + "hostname": "localhost", + "role": "producer_performance", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9998" + }, + { + "entity_id": "6", + "hostname": "localhost", + "role": "console_consumer", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9999" + }, + { + "entity_id": "7", + "hostname": "localhost", + "role": "console_consumer", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9099" + } + ] +} Index: system_test/replication_testsuite/testcase_0023/testcase_0023_properties.json =================================================================== --- system_test/replication_testsuite/testcase_0023/testcase_0023_properties.json (revision 0) +++ system_test/replication_testsuite/testcase_0023/testcase_0023_properties.json (revision 0) @@ -0,0 +1,94 @@ +{ + "description": {"01":"Replication Basic on Multi Topics & Partitions : 1. mode => async; 2. acks => 1; 3. comp => 1", + "02":"Produce and consume messages to 2 topics - 3 partitions", + "03":"This test sends messages to 3 replicas", + "04":"At the end it verifies the log size and contents", + "05":"Use a consumer to verify no message loss.", + "06":"Producer dimensions : mode:async, acks:1, comp:1", + "07":"Log segment size : 512000" + }, + "testcase_args": { + "bounce_leader": "false", + "replica_factor": "3", + "num_partition": "3", + "num_iteration": "1", + "sleep_seconds_between_producer_calls": "1", + "message_producing_free_time_sec": "15" + }, + "entities": [ + { + "entity_id": "0", + "clientPort": "2188", + "dataDir": "/tmp/zookeeper_0", + "log_filename": "zookeeper_2188.log", + "config_filename": "zookeeper_2188.properties" + }, + { + "entity_id": "1", + "port": "9091", + "brokerid": "1", + "log.file.size": "512000", + "log.dir": "/tmp/kafka_server_1_logs", + "log_filename": "kafka_server_9091.log", + "config_filename": "kafka_server_9091.properties" + }, + { + "entity_id": "2", + "port": "9092", + "brokerid": "2", + "log.file.size": "512000", + "log.dir": "/tmp/kafka_server_2_logs", + "log_filename": "kafka_server_9092.log", + "config_filename": "kafka_server_9092.properties" + }, + { + "entity_id": "3", + "port": "9093", + "brokerid": "3", + "log.file.size": "512000", + "log.dir": "/tmp/kafka_server_3_logs", + "log_filename": "kafka_server_9093.log", + "config_filename": "kafka_server_9093.properties" + }, + { + "entity_id": "4", + "topic": "test_1", + "threads": "5", + "compression-codec": "1", + "message-size": "500", + "message": "100", + "request-num-acks": "1", + "async":"true", + "log_filename": "producer_performance_4.log", + "config_filename": "producer_performance_4.properties" + }, + { + "entity_id": "5", + "topic": "test_2", + "threads": "5", + "compression-codec": "1", + "message-size": "500", + "message": "100", + "request-num-acks": "1", + "async":"true", + "log_filename": "producer_performance_5.log", + "config_filename": "producer_performance_5.properties" + }, + { + "entity_id": "6", + "topic": "test_1", + "groupid": "mytestgroup", + "consumer-timeout-ms": "10000", + "log_filename": "console_consumer_6.log", + "config_filename": "console_consumer_6.properties" + }, + { + "entity_id": "7", + "topic": "test_2", + "groupid": "mytestgroup", + "consumer-timeout-ms": "10000", + "log_filename": "console_consumer_7.log", + "config_filename": "console_consumer_7.properties" + } + ] +} Index: system_test/replication_testsuite/testcase_0023/cluster_config.json =================================================================== --- system_test/replication_testsuite/testcase_0023/cluster_config.json (revision 0) +++ system_test/replication_testsuite/testcase_0023/cluster_config.json (revision 0) @@ -0,0 +1,76 @@ +{ + "cluster_config": [ + { + "entity_id": "0", + "hostname": "localhost", + "role": "zookeeper", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9990" + }, + { + "entity_id": "1", + "hostname": "localhost", + "role": "broker", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9991" + }, + { + "entity_id": "2", + "hostname": "localhost", + "role": "broker", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9992" + }, + { + "entity_id": "3", + "hostname": "localhost", + "role": "broker", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9993" + }, + { + "entity_id": "4", + "hostname": "localhost", + "role": "producer_performance", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9997" + }, + { + "entity_id": "5", + "hostname": "localhost", + "role": "producer_performance", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9998" + }, + { + "entity_id": "6", + "hostname": "localhost", + "role": "console_consumer", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9999" + }, + { + "entity_id": "7", + "hostname": "localhost", + "role": "console_consumer", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9099" + } + ] +} Index: system_test/replication_testsuite/testcase_0104/testcase_0104_properties.json =================================================================== --- system_test/replication_testsuite/testcase_0104/testcase_0104_properties.json (revision 0) +++ system_test/replication_testsuite/testcase_0104/testcase_0104_properties.json (revision 0) @@ -0,0 +1,79 @@ +{ + "description": {"01":"Leader Failure in Replication : 1. mode => async; 2. comp => 0", + "02":"Produce and consume messages to a single topic - single partition.", + "03":"This test sends messages to 3 replicas", + "04":"To trigger leader election: find the leader and terminate by controlled failure (kill -15)", + "05":"Restart the terminated broker", + "06":"Lookup brokers' log4j messages and verify that leader is re-elected successfully", + "07":"At the end it verifies the log size and contents", + "08":"Use a consumer to verify no message loss.", + "09":"Producer dimensions : mode:async, acks:-1, comp:1", + "10":"Log segment size : 10240" + }, + "testcase_args": { + "bounce_leader": "true", + "replica_factor": "3", + "num_partition": "1", + "num_iteration": "1", + "sleep_seconds_between_producer_calls": "1", + "message_producing_free_time_sec": "15", + "num_messages_to_produce_per_producer_call": "50" + }, + "entities": [ + { + "entity_id": "0", + "clientPort": "2188", + "dataDir": "/tmp/zookeeper_0", + "log_filename": "zookeeper_2188.log", + "config_filename": "zookeeper_2188.properties" + }, + { + "entity_id": "1", + "port": "9091", + "brokerid": "1", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_1_logs", + "log_filename": "kafka_server_9091.log", + "config_filename": "kafka_server_9091.properties" + }, + { + "entity_id": "2", + "port": "9092", + "brokerid": "2", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_2_logs", + "log_filename": "kafka_server_9092.log", + "config_filename": "kafka_server_9092.properties" + }, + { + "entity_id": "3", + "port": "9093", + "brokerid": "3", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_3_logs", + "log_filename": "kafka_server_9093.log", + "config_filename": "kafka_server_9093.properties" + }, + { + "entity_id": "4", + "topic": "test_1", + "threads": "5", + "compression-codec": "1", + "message-size": "500", + "message": "100", + "request-num-acks": "-1", + "async":"true", + "log_filename": "producer_performance.log", + "config_filename": "producer_performance.properties" + }, + { + "entity_id": "5", + "topic": "test_1", + "groupid": "mytestgroup", + "consumer-timeout-ms": "10000", + "zookeeper": "localhost:2188", + "log_filename": "console_consumer.log", + "config_filename": "console_consumer.properties" + } + ] +} Index: system_test/replication_testsuite/testcase_0005/testcase_0005_properties.json =================================================================== --- system_test/replication_testsuite/testcase_0005/testcase_0005_properties.json (revision 0) +++ system_test/replication_testsuite/testcase_0005/testcase_0005_properties.json (revision 0) @@ -0,0 +1,76 @@ +{ + "description": {"01":"Replication Basic : 1. mode => async; 2. acks => 1; 3. comp => 1", + "02":"Produce and consume messages to a single topic - single partition.", + "03":"This test sends messages to 3 replicas", + "04":"At the end it verifies the log size and contents", + "05":"Use a consumer to verify no message loss.", + "06":"Producer dimensions : mode:async, acks:1, comp:1", + "07":"Log segment size : 10240" + }, + "testcase_args": { + "bounce_leader": "false", + "replica_factor": "3", + "num_partition": "1", + "num_iteration": "1", + "sleep_seconds_between_producer_calls": "1", + "message_producing_free_time_sec": "15", + "num_messages_to_produce_per_producer_call": "50" + }, + "entities": [ + { + "entity_id": "0", + "clientPort": "2188", + "dataDir": "/tmp/zookeeper_0", + "log_filename": "zookeeper_2188.log", + "config_filename": "zookeeper_2188.properties" + }, + { + "entity_id": "1", + "port": "9091", + "brokerid": "1", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_1_logs", + "log_filename": "kafka_server_9091.log", + "config_filename": "kafka_server_9091.properties" + }, + { + "entity_id": "2", + "port": "9092", + "brokerid": "2", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_2_logs", + "log_filename": "kafka_server_9092.log", + "config_filename": "kafka_server_9092.properties" + }, + { + "entity_id": "3", + "port": "9093", + "brokerid": "3", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_3_logs", + "log_filename": "kafka_server_9093.log", + "config_filename": "kafka_server_9093.properties" + }, + { + "entity_id": "4", + "topic": "test_1", + "threads": "5", + "compression-codec": "1", + "message-size": "500", + "message": "100", + "request-num-acks": "1", + "async":"true", + "log_filename": "producer_performance.log", + "config_filename": "producer_performance.properties" + }, + { + "entity_id": "5", + "topic": "test_1", + "groupid": "mytestgroup", + "consumer-timeout-ms": "10000", + "zookeeper": "localhost:2188", + "log_filename": "console_consumer.log", + "config_filename": "console_consumer.properties" + } + ] +} Index: system_test/replication_testsuite/testcase_0122/testcase_0122_properties.json =================================================================== --- system_test/replication_testsuite/testcase_0122/testcase_0122_properties.json (revision 0) +++ system_test/replication_testsuite/testcase_0122/testcase_0122_properties.json (revision 0) @@ -0,0 +1,97 @@ +{ + "description": {"01":"Leader Failure in Replication with multi topics & partitions : 1. acks => 1", + "02":"Produce and consume messages to 2 topics - 3 partitions", + "03":"This test sends messages to 3 replicas", + "04":"To trigger leader election: find the leader and terminate by controlled failure (kill -15)", + "05":"Restart the terminated broker", + "06":"Lookup brokers' log4j messages and verify that leader is re-elected successfully", + "07":"At the end it verifies the log size and contents", + "08":"Use a consumer to verify no message loss.", + "09":"Producer dimensions : mode:sync, acks:1, comp:0", + "10":"Log segment size : 512000" + }, + "testcase_args": { + "bounce_leader": "true", + "replica_factor": "3", + "num_partition": "3", + "num_iteration": "3", + "sleep_seconds_between_producer_calls": "1", + "message_producing_free_time_sec": "15" + }, + "entities": [ + { + "entity_id": "0", + "clientPort": "2188", + "dataDir": "/tmp/zookeeper_0", + "log_filename": "zookeeper_2188.log", + "config_filename": "zookeeper_2188.properties" + }, + { + "entity_id": "1", + "port": "9091", + "brokerid": "1", + "log.file.size": "512000", + "log.dir": "/tmp/kafka_server_1_logs", + "log_filename": "kafka_server_9091.log", + "config_filename": "kafka_server_9091.properties" + }, + { + "entity_id": "2", + "port": "9092", + "brokerid": "2", + "log.file.size": "512000", + "log.dir": "/tmp/kafka_server_2_logs", + "log_filename": "kafka_server_9092.log", + "config_filename": "kafka_server_9092.properties" + }, + { + "entity_id": "3", + "port": "9093", + "brokerid": "3", + "log.file.size": "512000", + "log.dir": "/tmp/kafka_server_3_logs", + "log_filename": "kafka_server_9093.log", + "config_filename": "kafka_server_9093.properties" + }, + { + "entity_id": "4", + "topic": "test_1", + "threads": "5", + "compression-codec": "0", + "message-size": "500", + "message": "100", + "request-num-acks": "1", + "async":"false", + "log_filename": "producer_performance_4.log", + "config_filename": "producer_performance_4.properties" + }, + { + "entity_id": "5", + "topic": "test_2", + "threads": "5", + "compression-codec": "0", + "message-size": "500", + "message": "100", + "request-num-acks": "1", + "async":"false", + "log_filename": "producer_performance_5.log", + "config_filename": "producer_performance_5.properties" + }, + { + "entity_id": "6", + "topic": "test_1", + "groupid": "mytestgroup", + "consumer-timeout-ms": "10000", + "log_filename": "console_consumer_6.log", + "config_filename": "console_consumer_6.properties" + }, + { + "entity_id": "7", + "topic": "test_2", + "groupid": "mytestgroup", + "consumer-timeout-ms": "10000", + "log_filename": "console_consumer_7.log", + "config_filename": "console_consumer_7.properties" + } + ] +} Index: system_test/replication_testsuite/testcase_0122/cluster_config.json =================================================================== --- system_test/replication_testsuite/testcase_0122/cluster_config.json (revision 0) +++ system_test/replication_testsuite/testcase_0122/cluster_config.json (revision 0) @@ -0,0 +1,76 @@ +{ + "cluster_config": [ + { + "entity_id": "0", + "hostname": "localhost", + "role": "zookeeper", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9990" + }, + { + "entity_id": "1", + "hostname": "localhost", + "role": "broker", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9991" + }, + { + "entity_id": "2", + "hostname": "localhost", + "role": "broker", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9992" + }, + { + "entity_id": "3", + "hostname": "localhost", + "role": "broker", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9993" + }, + { + "entity_id": "4", + "hostname": "localhost", + "role": "producer_performance", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9997" + }, + { + "entity_id": "5", + "hostname": "localhost", + "role": "producer_performance", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9998" + }, + { + "entity_id": "6", + "hostname": "localhost", + "role": "console_consumer", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9999" + }, + { + "entity_id": "7", + "hostname": "localhost", + "role": "console_consumer", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9099" + } + ] +} Index: system_test/replication_testsuite/testcase_0123/testcase_0123_properties.json =================================================================== --- system_test/replication_testsuite/testcase_0123/testcase_0123_properties.json (revision 0) +++ system_test/replication_testsuite/testcase_0123/testcase_0123_properties.json (revision 0) @@ -0,0 +1,97 @@ +{ + "description": {"01":"Leader Failure in Replication with multi topics & partitions : 1. mode => async; 2. comp => 0", + "02":"Produce and consume messages to 2 topics - 3 partitions", + "03":"This test sends messages to 3 replicas", + "04":"To trigger leader election: find the leader and terminate by controlled failure (kill -15)", + "05":"Restart the terminated broker", + "06":"Lookup brokers' log4j messages and verify that leader is re-elected successfully", + "07":"At the end it verifies the log size and contents", + "08":"Use a consumer to verify no message loss.", + "09":"Producer dimensions : mode:async, acks:1, comp:1", + "10":"Log segment size : 512000" + }, + "testcase_args": { + "bounce_leader": "true", + "replica_factor": "3", + "num_partition": "3", + "num_iteration": "3", + "sleep_seconds_between_producer_calls": "1", + "message_producing_free_time_sec": "15" + }, + "entities": [ + { + "entity_id": "0", + "clientPort": "2188", + "dataDir": "/tmp/zookeeper_0", + "log_filename": "zookeeper_2188.log", + "config_filename": "zookeeper_2188.properties" + }, + { + "entity_id": "1", + "port": "9091", + "brokerid": "1", + "log.file.size": "512000", + "log.dir": "/tmp/kafka_server_1_logs", + "log_filename": "kafka_server_9091.log", + "config_filename": "kafka_server_9091.properties" + }, + { + "entity_id": "2", + "port": "9092", + "brokerid": "2", + "log.file.size": "512000", + "log.dir": "/tmp/kafka_server_2_logs", + "log_filename": "kafka_server_9092.log", + "config_filename": "kafka_server_9092.properties" + }, + { + "entity_id": "3", + "port": "9093", + "brokerid": "3", + "log.file.size": "512000", + "log.dir": "/tmp/kafka_server_3_logs", + "log_filename": "kafka_server_9093.log", + "config_filename": "kafka_server_9093.properties" + }, + { + "entity_id": "4", + "topic": "test_1", + "threads": "5", + "compression-codec": "1", + "message-size": "500", + "message": "100", + "request-num-acks": "1", + "async":"true", + "log_filename": "producer_performance_4.log", + "config_filename": "producer_performance_4.properties" + }, + { + "entity_id": "5", + "topic": "test_2", + "threads": "5", + "compression-codec": "1", + "message-size": "500", + "message": "100", + "request-num-acks": "1", + "async":"true", + "log_filename": "producer_performance_5.log", + "config_filename": "producer_performance_5.properties" + }, + { + "entity_id": "6", + "topic": "test_1", + "groupid": "mytestgroup", + "consumer-timeout-ms": "10000", + "log_filename": "console_consumer_6.log", + "config_filename": "console_consumer_6.properties" + }, + { + "entity_id": "7", + "topic": "test_2", + "groupid": "mytestgroup", + "consumer-timeout-ms": "10000", + "log_filename": "console_consumer_7.log", + "config_filename": "console_consumer_7.properties" + } + ] +} Index: system_test/replication_testsuite/testcase_0123/cluster_config.json =================================================================== --- system_test/replication_testsuite/testcase_0123/cluster_config.json (revision 0) +++ system_test/replication_testsuite/testcase_0123/cluster_config.json (revision 0) @@ -0,0 +1,76 @@ +{ + "cluster_config": [ + { + "entity_id": "0", + "hostname": "localhost", + "role": "zookeeper", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9990" + }, + { + "entity_id": "1", + "hostname": "localhost", + "role": "broker", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9991" + }, + { + "entity_id": "2", + "hostname": "localhost", + "role": "broker", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9992" + }, + { + "entity_id": "3", + "hostname": "localhost", + "role": "broker", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9993" + }, + { + "entity_id": "4", + "hostname": "localhost", + "role": "producer_performance", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9997" + }, + { + "entity_id": "5", + "hostname": "localhost", + "role": "producer_performance", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9998" + }, + { + "entity_id": "6", + "hostname": "localhost", + "role": "console_consumer", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9999" + }, + { + "entity_id": "7", + "hostname": "localhost", + "role": "console_consumer", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9099" + } + ] +} Index: system_test/replication_testsuite/testcase_0006/testcase_0006_properties.json =================================================================== --- system_test/replication_testsuite/testcase_0006/testcase_0006_properties.json (revision 0) +++ system_test/replication_testsuite/testcase_0006/testcase_0006_properties.json (revision 0) @@ -0,0 +1,76 @@ +{ + "description": {"01":"Replication Basic : 1. comp => 1", + "02":"Produce and consume messages to a single topic - 3 partitions.", + "03":"This test sends messages to 3 replicas", + "04":"At the end it verifies the log size and contents", + "05":"Use a consumer to verify no message loss.", + "06":"Producer dimensions : mode:sync, acks:-1, comp:1", + "07":"Log segment size : 10240" + }, + "testcase_args": { + "bounce_leader": "false", + "replica_factor": "3", + "num_partition": "3", + "num_iteration": "1", + "sleep_seconds_between_producer_calls": "1", + "message_producing_free_time_sec": "15", + "num_messages_to_produce_per_producer_call": "50" + }, + "entities": [ + { + "entity_id": "0", + "clientPort": "2188", + "dataDir": "/tmp/zookeeper_0", + "log_filename": "zookeeper_2188.log", + "config_filename": "zookeeper_2188.properties" + }, + { + "entity_id": "1", + "port": "9091", + "brokerid": "1", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_1_logs", + "log_filename": "kafka_server_9091.log", + "config_filename": "kafka_server_9091.properties" + }, + { + "entity_id": "2", + "port": "9092", + "brokerid": "2", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_2_logs", + "log_filename": "kafka_server_9092.log", + "config_filename": "kafka_server_9092.properties" + }, + { + "entity_id": "3", + "port": "9093", + "brokerid": "3", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_3_logs", + "log_filename": "kafka_server_9093.log", + "config_filename": "kafka_server_9093.properties" + }, + { + "entity_id": "4", + "topic": "test_1", + "threads": "5", + "compression-codec": "1", + "message-size": "500", + "message": "100", + "request-num-acks": "-1", + "async":"false", + "log_filename": "producer_performance.log", + "config_filename": "producer_performance.properties" + }, + { + "entity_id": "5", + "topic": "test_1", + "groupid": "mytestgroup", + "consumer-timeout-ms": "10000", + "zookeeper": "localhost:2188", + "log_filename": "console_consumer.log", + "config_filename": "console_consumer.properties" + } + ] +} Index: system_test/replication_testsuite/testcase_0105/testcase_0105_properties.json =================================================================== --- system_test/replication_testsuite/testcase_0105/testcase_0105_properties.json (revision 0) +++ system_test/replication_testsuite/testcase_0105/testcase_0105_properties.json (revision 0) @@ -0,0 +1,79 @@ +{ + "description": {"01":"Leader Failure in Replication : 1. mode => async; 2. acks => 1; 3. comp => 1", + "02":"Produce and consume messages to a single topic - single partition.", + "03":"This test sends messages to 3 replicas", + "04":"To trigger leader election: find the leader and terminate by controlled failure (kill -15)", + "05":"Restart the terminated broker", + "06":"Lookup brokers' log4j messages and verify that leader is re-elected successfully", + "07":"At the end it verifies the log size and contents", + "08":"Use a consumer to verify no message loss.", + "09":"Producer dimensions : mode:async, acks:1, comp:1", + "10":"Log segment size : 10240" + }, + "testcase_args": { + "bounce_leader": "true", + "replica_factor": "3", + "num_partition": "1", + "num_iteration": "1", + "sleep_seconds_between_producer_calls": "1", + "message_producing_free_time_sec": "15", + "num_messages_to_produce_per_producer_call": "50" + }, + "entities": [ + { + "entity_id": "0", + "clientPort": "2188", + "dataDir": "/tmp/zookeeper_0", + "log_filename": "zookeeper_2188.log", + "config_filename": "zookeeper_2188.properties" + }, + { + "entity_id": "1", + "port": "9091", + "brokerid": "1", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_1_logs", + "log_filename": "kafka_server_9091.log", + "config_filename": "kafka_server_9091.properties" + }, + { + "entity_id": "2", + "port": "9092", + "brokerid": "2", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_2_logs", + "log_filename": "kafka_server_9092.log", + "config_filename": "kafka_server_9092.properties" + }, + { + "entity_id": "3", + "port": "9093", + "brokerid": "3", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_3_logs", + "log_filename": "kafka_server_9093.log", + "config_filename": "kafka_server_9093.properties" + }, + { + "entity_id": "4", + "topic": "test_1", + "threads": "5", + "compression-codec": "1", + "message-size": "500", + "message": "100", + "request-num-acks": "1", + "async":"true", + "log_filename": "producer_performance.log", + "config_filename": "producer_performance.properties" + }, + { + "entity_id": "5", + "topic": "test_1", + "groupid": "mytestgroup", + "consumer-timeout-ms": "10000", + "zookeeper": "localhost:2188", + "log_filename": "console_consumer.log", + "config_filename": "console_consumer.properties" + } + ] +} Index: system_test/replication_testsuite/testcase_0106/testcase_0106_properties.json =================================================================== --- system_test/replication_testsuite/testcase_0106/testcase_0106_properties.json (revision 0) +++ system_test/replication_testsuite/testcase_0106/testcase_0106_properties.json (revision 0) @@ -0,0 +1,79 @@ +{ + "description": {"01":"Leader Failure in Replication : 1. comp => 1; 2. no of partion => 3", + "02":"Produce and consume messages to a single topic - 3 partition.", + "03":"This test sends messages to 3 replicas", + "04":"To trigger leader election: find the leader and terminate by controlled failure (kill -15)", + "05":"Restart the terminated broker", + "06":"Lookup brokers' log4j messages and verify that leader is re-elected successfully", + "07":"At the end it verifies the log size and contents", + "08":"Use a consumer to verify no message loss.", + "09":"Producer dimensions : mode:sync, acks:-1, comp:1", + "10":"Log segment size : 10240" + }, + "testcase_args": { + "bounce_leader": "true", + "replica_factor": "3", + "num_partition": "3", + "num_iteration": "1", + "sleep_seconds_between_producer_calls": "1", + "message_producing_free_time_sec": "15", + "num_messages_to_produce_per_producer_call": "50" + }, + "entities": [ + { + "entity_id": "0", + "clientPort": "2188", + "dataDir": "/tmp/zookeeper_0", + "log_filename": "zookeeper_2188.log", + "config_filename": "zookeeper_2188.properties" + }, + { + "entity_id": "1", + "port": "9091", + "brokerid": "1", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_1_logs", + "log_filename": "kafka_server_9091.log", + "config_filename": "kafka_server_9091.properties" + }, + { + "entity_id": "2", + "port": "9092", + "brokerid": "2", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_2_logs", + "log_filename": "kafka_server_9092.log", + "config_filename": "kafka_server_9092.properties" + }, + { + "entity_id": "3", + "port": "9093", + "brokerid": "3", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_3_logs", + "log_filename": "kafka_server_9093.log", + "config_filename": "kafka_server_9093.properties" + }, + { + "entity_id": "4", + "topic": "test_1", + "threads": "5", + "compression-codec": "1", + "message-size": "500", + "message": "100", + "request-num-acks": "-1", + "async":"false", + "log_filename": "producer_performance.log", + "config_filename": "producer_performance.properties" + }, + { + "entity_id": "5", + "topic": "test_1", + "groupid": "mytestgroup", + "consumer-timeout-ms": "10000", + "zookeeper": "localhost:2188", + "log_filename": "console_consumer.log", + "config_filename": "console_consumer.properties" + } + ] +} Index: system_test/replication_testsuite/testcase_0007/testcase_0007_properties.json =================================================================== --- system_test/replication_testsuite/testcase_0007/testcase_0007_properties.json (revision 0) +++ system_test/replication_testsuite/testcase_0007/testcase_0007_properties.json (revision 0) @@ -0,0 +1,76 @@ +{ + "description": {"01":"Replication Basic : 1. mode => async; 2. comp => 1", + "02":"Produce and consume messages to a single topic - 3 partitions.", + "03":"This test sends messages to 3 replicas", + "04":"At the end it verifies the log size and contents", + "05":"Use a consumer to verify no message loss.", + "06":"Producer dimensions : mode:async, acks:-1, comp:1", + "07":"Log segment size : 10240" + }, + "testcase_args": { + "bounce_leader": "false", + "replica_factor": "3", + "num_partition": "3", + "num_iteration": "1", + "sleep_seconds_between_producer_calls": "1", + "message_producing_free_time_sec": "15", + "num_messages_to_produce_per_producer_call": "50" + }, + "entities": [ + { + "entity_id": "0", + "clientPort": "2188", + "dataDir": "/tmp/zookeeper_0", + "log_filename": "zookeeper_2188.log", + "config_filename": "zookeeper_2188.properties" + }, + { + "entity_id": "1", + "port": "9091", + "brokerid": "1", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_1_logs", + "log_filename": "kafka_server_9091.log", + "config_filename": "kafka_server_9091.properties" + }, + { + "entity_id": "2", + "port": "9092", + "brokerid": "2", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_2_logs", + "log_filename": "kafka_server_9092.log", + "config_filename": "kafka_server_9092.properties" + }, + { + "entity_id": "3", + "port": "9093", + "brokerid": "3", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_3_logs", + "log_filename": "kafka_server_9093.log", + "config_filename": "kafka_server_9093.properties" + }, + { + "entity_id": "4", + "topic": "test_1", + "threads": "5", + "compression-codec": "1", + "message-size": "500", + "message": "100", + "request-num-acks": "-1", + "async":"true", + "log_filename": "producer_performance.log", + "config_filename": "producer_performance.properties" + }, + { + "entity_id": "5", + "topic": "test_1", + "groupid": "mytestgroup", + "consumer-timeout-ms": "10000", + "zookeeper": "localhost:2188", + "log_filename": "console_consumer.log", + "config_filename": "console_consumer.properties" + } + ] +} Index: system_test/replication_testsuite/testcase_0008/testcase_0008_properties.json =================================================================== --- system_test/replication_testsuite/testcase_0008/testcase_0008_properties.json (revision 0) +++ system_test/replication_testsuite/testcase_0008/testcase_0008_properties.json (revision 0) @@ -0,0 +1,76 @@ +{ + "description": {"01":"Replication Basic : 1. acks => 1; 2. comp => 1", + "02":"Produce and consume messages to a single topic - 3 partitions.", + "03":"This test sends messages to 3 replicas", + "04":"At the end it verifies the log size and contents", + "05":"Use a consumer to verify no message loss.", + "06":"Producer dimensions : mode:sync, acks:1, comp:1", + "07":"Log segment size : 10240" + }, + "testcase_args": { + "bounce_leader": "false", + "replica_factor": "3", + "num_partition": "3", + "num_iteration": "1", + "sleep_seconds_between_producer_calls": "1", + "message_producing_free_time_sec": "15", + "num_messages_to_produce_per_producer_call": "50" + }, + "entities": [ + { + "entity_id": "0", + "clientPort": "2188", + "dataDir": "/tmp/zookeeper_0", + "log_filename": "zookeeper_2188.log", + "config_filename": "zookeeper_2188.properties" + }, + { + "entity_id": "1", + "port": "9091", + "brokerid": "1", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_1_logs", + "log_filename": "kafka_server_9091.log", + "config_filename": "kafka_server_9091.properties" + }, + { + "entity_id": "2", + "port": "9092", + "brokerid": "2", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_2_logs", + "log_filename": "kafka_server_9092.log", + "config_filename": "kafka_server_9092.properties" + }, + { + "entity_id": "3", + "port": "9093", + "brokerid": "3", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_3_logs", + "log_filename": "kafka_server_9093.log", + "config_filename": "kafka_server_9093.properties" + }, + { + "entity_id": "4", + "topic": "test_1", + "threads": "5", + "compression-codec": "1", + "message-size": "500", + "message": "100", + "request-num-acks": "1", + "async":"false", + "log_filename": "producer_performance.log", + "config_filename": "producer_performance.properties" + }, + { + "entity_id": "5", + "topic": "test_1", + "groupid": "mytestgroup", + "consumer-timeout-ms": "10000", + "zookeeper": "localhost:2188", + "log_filename": "console_consumer.log", + "config_filename": "console_consumer.properties" + } + ] +} Index: system_test/replication_testsuite/testcase_0107/testcase_0107_properties.json =================================================================== --- system_test/replication_testsuite/testcase_0107/testcase_0107_properties.json (revision 0) +++ system_test/replication_testsuite/testcase_0107/testcase_0107_properties.json (revision 0) @@ -0,0 +1,79 @@ +{ + "description": {"01":"Leader Failure in Replication : 1. mode => async; 2. comp => 1; 3. no of partition => 3", + "02":"Produce and consume messages to a single topic - 3 partition.", + "03":"This test sends messages to 3 replicas", + "04":"To trigger leader election: find the leader and terminate by controlled failure (kill -15)", + "05":"Restart the terminated broker", + "06":"Lookup brokers' log4j messages and verify that leader is re-elected successfully", + "07":"At the end it verifies the log size and contents", + "08":"Use a consumer to verify no message loss.", + "09":"Producer dimensions : mode:async, acks:-1, comp:1", + "10":"Log segment size : 10240" + }, + "testcase_args": { + "bounce_leader": "true", + "replica_factor": "3", + "num_partition": "3", + "num_iteration": "1", + "sleep_seconds_between_producer_calls": "1", + "message_producing_free_time_sec": "15", + "num_messages_to_produce_per_producer_call": "50" + }, + "entities": [ + { + "entity_id": "0", + "clientPort": "2188", + "dataDir": "/tmp/zookeeper_0", + "log_filename": "zookeeper_2188.log", + "config_filename": "zookeeper_2188.properties" + }, + { + "entity_id": "1", + "port": "9091", + "brokerid": "1", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_1_logs", + "log_filename": "kafka_server_9091.log", + "config_filename": "kafka_server_9091.properties" + }, + { + "entity_id": "2", + "port": "9092", + "brokerid": "2", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_2_logs", + "log_filename": "kafka_server_9092.log", + "config_filename": "kafka_server_9092.properties" + }, + { + "entity_id": "3", + "port": "9093", + "brokerid": "3", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_3_logs", + "log_filename": "kafka_server_9093.log", + "config_filename": "kafka_server_9093.properties" + }, + { + "entity_id": "4", + "topic": "test_1", + "threads": "5", + "compression-codec": "1", + "message-size": "500", + "message": "100", + "request-num-acks": "-1", + "async":"true", + "log_filename": "producer_performance.log", + "config_filename": "producer_performance.properties" + }, + { + "entity_id": "5", + "topic": "test_1", + "groupid": "mytestgroup", + "consumer-timeout-ms": "10000", + "zookeeper": "localhost:2188", + "log_filename": "console_consumer.log", + "config_filename": "console_consumer.properties" + } + ] +} Index: system_test/replication_testsuite/testcase_0108/testcase_0108_properties.json =================================================================== --- system_test/replication_testsuite/testcase_0108/testcase_0108_properties.json (revision 0) +++ system_test/replication_testsuite/testcase_0108/testcase_0108_properties.json (revision 0) @@ -0,0 +1,79 @@ +{ + "description": {"01":"Leader Failure in Replication : 1. acks => 1; 2. comp => 1; 3. no. of partition => 3", + "02":"Produce and consume messages to a single topic - 3 partition.", + "03":"This test sends messages to 3 replicas", + "04":"To trigger leader election: find the leader and terminate by controlled failure (kill -15)", + "05":"Restart the terminated broker", + "06":"Lookup brokers' log4j messages and verify that leader is re-elected successfully", + "07":"At the end it verifies the log size and contents", + "08":"Use a consumer to verify no message loss.", + "09":"Producer dimensions : mode:sync, acks:1, comp:1", + "10":"Log segment size : 10240" + }, + "testcase_args": { + "bounce_leader": "true", + "replica_factor": "3", + "num_partition": "3", + "num_iteration": "1", + "sleep_seconds_between_producer_calls": "1", + "message_producing_free_time_sec": "15", + "num_messages_to_produce_per_producer_call": "50" + }, + "entities": [ + { + "entity_id": "0", + "clientPort": "2188", + "dataDir": "/tmp/zookeeper_0", + "log_filename": "zookeeper_2188.log", + "config_filename": "zookeeper_2188.properties" + }, + { + "entity_id": "1", + "port": "9091", + "brokerid": "1", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_1_logs", + "log_filename": "kafka_server_9091.log", + "config_filename": "kafka_server_9091.properties" + }, + { + "entity_id": "2", + "port": "9092", + "brokerid": "2", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_2_logs", + "log_filename": "kafka_server_9092.log", + "config_filename": "kafka_server_9092.properties" + }, + { + "entity_id": "3", + "port": "9093", + "brokerid": "3", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_3_logs", + "log_filename": "kafka_server_9093.log", + "config_filename": "kafka_server_9093.properties" + }, + { + "entity_id": "4", + "topic": "test_1", + "threads": "5", + "compression-codec": "1", + "message-size": "500", + "message": "100", + "request-num-acks": "1", + "async":"false", + "log_filename": "producer_performance.log", + "config_filename": "producer_performance.properties" + }, + { + "entity_id": "5", + "topic": "test_1", + "groupid": "mytestgroup", + "consumer-timeout-ms": "10000", + "zookeeper": "localhost:2188", + "log_filename": "console_consumer.log", + "config_filename": "console_consumer.properties" + } + ] +} Index: system_test/replication_testsuite/testcase_0009/testcase_0009_properties.json =================================================================== --- system_test/replication_testsuite/testcase_0009/testcase_0009_properties.json (revision 0) +++ system_test/replication_testsuite/testcase_0009/testcase_0009_properties.json (revision 0) @@ -0,0 +1,76 @@ +{ + "description": {"01":"Replication Basic : 1. mode => async; 2. acks => 1; 3. comp => 1", + "02":"Produce and consume messages to a single topic - 3 partitions.", + "03":"This test sends messages to 3 replicas", + "04":"At the end it verifies the log size and contents", + "05":"Use a consumer to verify no message loss.", + "06":"Producer dimensions : mode:async, acks:1, comp:1", + "07":"Log segment size : 10240" + }, + "testcase_args": { + "bounce_leader": "false", + "replica_factor": "3", + "num_partition": "3", + "num_iteration": "1", + "sleep_seconds_between_producer_calls": "1", + "message_producing_free_time_sec": "15", + "num_messages_to_produce_per_producer_call": "50" + }, + "entities": [ + { + "entity_id": "0", + "clientPort": "2188", + "dataDir": "/tmp/zookeeper_0", + "log_filename": "zookeeper_2188.log", + "config_filename": "zookeeper_2188.properties" + }, + { + "entity_id": "1", + "port": "9091", + "brokerid": "1", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_1_logs", + "log_filename": "kafka_server_9091.log", + "config_filename": "kafka_server_9091.properties" + }, + { + "entity_id": "2", + "port": "9092", + "brokerid": "2", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_2_logs", + "log_filename": "kafka_server_9092.log", + "config_filename": "kafka_server_9092.properties" + }, + { + "entity_id": "3", + "port": "9093", + "brokerid": "3", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_3_logs", + "log_filename": "kafka_server_9093.log", + "config_filename": "kafka_server_9093.properties" + }, + { + "entity_id": "4", + "topic": "test_1", + "threads": "5", + "compression-codec": "1", + "message-size": "500", + "message": "100", + "request-num-acks": "1", + "async":"true", + "log_filename": "producer_performance.log", + "config_filename": "producer_performance.properties" + }, + { + "entity_id": "5", + "topic": "test_1", + "groupid": "mytestgroup", + "consumer-timeout-ms": "10000", + "zookeeper": "localhost:2188", + "log_filename": "console_consumer.log", + "config_filename": "console_consumer.properties" + } + ] +} Index: system_test/replication_testsuite/testcase_0109/testcase_0109_properties.json =================================================================== --- system_test/replication_testsuite/testcase_0109/testcase_0109_properties.json (revision 0) +++ system_test/replication_testsuite/testcase_0109/testcase_0109_properties.json (revision 0) @@ -0,0 +1,79 @@ +{ + "description": {"01":"Leader Failure in Replication : 1. mode => async; 2. acks => 1; 3. comp =>; 4. no. of partitions => 3", + "02":"Produce and consume messages to a single topic - 3 partition.", + "03":"This test sends messages to 3 replicas", + "04":"To trigger leader election: find the leader and terminate by controlled failure (kill -15)", + "05":"Restart the terminated broker", + "06":"Lookup brokers' log4j messages and verify that leader is re-elected successfully", + "07":"At the end it verifies the log size and contents", + "08":"Use a consumer to verify no message loss.", + "09":"Producer dimensions : mode:async, acks:1, comp:1", + "10":"Log segment size : 10240" + }, + "testcase_args": { + "bounce_leader": "true", + "replica_factor": "3", + "num_partition": "3", + "num_iteration": "1", + "sleep_seconds_between_producer_calls": "1", + "message_producing_free_time_sec": "15", + "num_messages_to_produce_per_producer_call": "50" + }, + "entities": [ + { + "entity_id": "0", + "clientPort": "2188", + "dataDir": "/tmp/zookeeper_0", + "log_filename": "zookeeper_2188.log", + "config_filename": "zookeeper_2188.properties" + }, + { + "entity_id": "1", + "port": "9091", + "brokerid": "1", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_1_logs", + "log_filename": "kafka_server_9091.log", + "config_filename": "kafka_server_9091.properties" + }, + { + "entity_id": "2", + "port": "9092", + "brokerid": "2", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_2_logs", + "log_filename": "kafka_server_9092.log", + "config_filename": "kafka_server_9092.properties" + }, + { + "entity_id": "3", + "port": "9093", + "brokerid": "3", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_3_logs", + "log_filename": "kafka_server_9093.log", + "config_filename": "kafka_server_9093.properties" + }, + { + "entity_id": "4", + "topic": "test_1", + "threads": "5", + "compression-codec": "1", + "message-size": "500", + "message": "100", + "request-num-acks": "1", + "async":"true", + "log_filename": "producer_performance.log", + "config_filename": "producer_performance.properties" + }, + { + "entity_id": "5", + "topic": "test_1", + "groupid": "mytestgroup", + "consumer-timeout-ms": "10000", + "zookeeper": "localhost:2188", + "log_filename": "console_consumer.log", + "config_filename": "console_consumer.properties" + } + ] +} Index: system_test/replication_testsuite/testcase_1/testcase_1_properties.json =================================================================== --- system_test/replication_testsuite/testcase_1/testcase_1_properties.json (revision 1396332) +++ system_test/replication_testsuite/testcase_1/testcase_1_properties.json (working copy) @@ -1,11 +1,22 @@ { - "description": "Basic test to produce and consume messages to a single topic partition. This test sends messages to n replicas and at the end verifies the log size and contents as well as using a consumer to verify no message loss. Optionally, the test bounces the leader periodically to introduce failures during the message replication.", + "description": {"01":"To Test : 'Leader Failure in Replication'", + "02":"Produce and consume messages to a single topic - single partition.", + "03":"This test sends messages to 3 replicas", + "04":"To trigger leader election: find the leader and terminate by controlled failure (kill -15)", + "05":"Restart the terminated broker", + "06":"Lookup brokers' log4j messages and verify that leader is re-elected successfully", + "07":"At the end it verifies the log size and contents", + "08":"Use a consumer to verify no message loss.", + "09":"Producer dimensions : mode:sync, acks:-1, comp:0", + "10":"Log segment size : 10240" + }, "testcase_args": { "bounce_leader": "true", "replica_factor": "3", "num_partition": "2", "num_iteration": "2", "sleep_seconds_between_producer_calls": "1", + "message_producing_free_time_sec": "15", "num_messages_to_produce_per_producer_call": "50" }, "entities": [ @@ -47,9 +58,11 @@ "entity_id": "4", "topic": "test_1", "threads": "5", - "compression-codec": "1", + "compression-codec": "0", "message-size": "500", "message": "500", + "request-num-acks": "-1", + "async":"false", "log_filename": "producer_performance.log", "config_filename": "producer_performance.properties" }, @@ -58,6 +71,7 @@ "topic": "test_1", "groupid": "mytestgroup", "consumer-timeout-ms": "10000", + "zookeeper": "localhost:2188", "log_filename": "console_consumer.log", "config_filename": "console_consumer.properties" } Index: system_test/replication_testsuite/testcase_1/cluster_config.json =================================================================== --- system_test/replication_testsuite/testcase_1/cluster_config.json (revision 0) +++ system_test/replication_testsuite/testcase_1/cluster_config.json (revision 0) @@ -0,0 +1,58 @@ +{ + "cluster_config": [ + { + "entity_id": "0", + "hostname": "localhost", + "role": "zookeeper", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9990" + }, + { + "entity_id": "1", + "hostname": "localhost", + "role": "broker", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9991" + }, + { + "entity_id": "2", + "hostname": "localhost", + "role": "broker", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9992" + }, + { + "entity_id": "3", + "hostname": "localhost", + "role": "broker", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9993" + }, + { + "entity_id": "4", + "hostname": "localhost", + "role": "producer_performance", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9994" + }, + { + "entity_id": "5", + "hostname": "localhost", + "role": "console_consumer", + "cluster_name": "source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9995" + } + ] +} Index: system_test/replication_testsuite/config/producer_performance.properties =================================================================== --- system_test/replication_testsuite/config/producer_performance.properties (revision 1396332) +++ system_test/replication_testsuite/config/producer_performance.properties (working copy) @@ -2,3 +2,4 @@ message-size=100 thread=5 compression-codec=0 +request-num-acks=-1 Index: system_test/replication_testsuite/testcase_0010/testcase_0010_properties.json =================================================================== --- system_test/replication_testsuite/testcase_0010/testcase_0010_properties.json (revision 0) +++ system_test/replication_testsuite/testcase_0010/testcase_0010_properties.json (revision 0) @@ -0,0 +1,76 @@ +{ + "description": {"01":"Replication Basic : 1. mode => async; 2. acks => 1; 3. comp => 1; 4. log segment size => 1M", + "02":"Produce and consume messages to a single topic - 3 partitions.", + "03":"This test sends messages to 3 replicas", + "04":"At the end it verifies the log size and contents", + "05":"Use a consumer to verify no message loss.", + "06":"Producer dimensions : mode:async, acks:1, comp:1", + "07":"Log segment size : 1048576 (1M)" + }, + "testcase_args": { + "bounce_leader": "false", + "replica_factor": "3", + "num_partition": "3", + "num_iteration": "1", + "sleep_seconds_between_producer_calls": "1", + "message_producing_free_time_sec": "15", + "num_messages_to_produce_per_producer_call": "50" + }, + "entities": [ + { + "entity_id": "0", + "clientPort": "2188", + "dataDir": "/tmp/zookeeper_0", + "log_filename": "zookeeper_2188.log", + "config_filename": "zookeeper_2188.properties" + }, + { + "entity_id": "1", + "port": "9091", + "brokerid": "1", + "log.file.size": "1048576", + "log.dir": "/tmp/kafka_server_1_logs", + "log_filename": "kafka_server_9091.log", + "config_filename": "kafka_server_9091.properties" + }, + { + "entity_id": "2", + "port": "9092", + "brokerid": "2", + "log.file.size": "1048576", + "log.dir": "/tmp/kafka_server_2_logs", + "log_filename": "kafka_server_9092.log", + "config_filename": "kafka_server_9092.properties" + }, + { + "entity_id": "3", + "port": "9093", + "brokerid": "3", + "log.file.size": "1048576", + "log.dir": "/tmp/kafka_server_3_logs", + "log_filename": "kafka_server_9093.log", + "config_filename": "kafka_server_9093.properties" + }, + { + "entity_id": "4", + "topic": "test_1", + "threads": "5", + "compression-codec": "1", + "message-size": "500", + "message": "100", + "request-num-acks": "1", + "async":"true", + "log_filename": "producer_performance.log", + "config_filename": "producer_performance.properties" + }, + { + "entity_id": "5", + "topic": "test_1", + "groupid": "mytestgroup", + "consumer-timeout-ms": "10000", + "zookeeper": "localhost:2188", + "log_filename": "console_consumer.log", + "config_filename": "console_consumer.properties" + } + ] +} Index: system_test/replication_testsuite/testcase_0110/testcase_0110_properties.json =================================================================== --- system_test/replication_testsuite/testcase_0110/testcase_0110_properties.json (revision 0) +++ system_test/replication_testsuite/testcase_0110/testcase_0110_properties.json (revision 0) @@ -0,0 +1,79 @@ +{ + "description": {"01":"Leader Failure in Replication : 1. mode => async; 2. acks => 1; 3. comp =>; 4. no. of partitins => 3; 5. log segment size => 1M", + "02":"Produce and consume messages to a single topic - 3 partition.", + "03":"This test sends messages to 3 replicas", + "04":"To trigger leader election: find the leader and terminate by controlled failure (kill -15)", + "05":"Restart the terminated broker", + "06":"Lookup brokers' log4j messages and verify that leader is re-elected successfully", + "07":"At the end it verifies the log size and contents", + "08":"Use a consumer to verify no message loss.", + "09":"Producer dimensions : mode:async, acks:1, comp:1", + "10":"Log segment size : 1048576 (1M)" + }, + "testcase_args": { + "bounce_leader": "true", + "replica_factor": "3", + "num_partition": "3", + "num_iteration": "1", + "sleep_seconds_between_producer_calls": "1", + "message_producing_free_time_sec": "15", + "num_messages_to_produce_per_producer_call": "50" + }, + "entities": [ + { + "entity_id": "0", + "clientPort": "2188", + "dataDir": "/tmp/zookeeper_0", + "log_filename": "zookeeper_2188.log", + "config_filename": "zookeeper_2188.properties" + }, + { + "entity_id": "1", + "port": "9091", + "brokerid": "1", + "log.file.size": "1048576", + "log.dir": "/tmp/kafka_server_1_logs", + "log_filename": "kafka_server_9091.log", + "config_filename": "kafka_server_9091.properties" + }, + { + "entity_id": "2", + "port": "9092", + "brokerid": "2", + "log.file.size": "1048576", + "log.dir": "/tmp/kafka_server_2_logs", + "log_filename": "kafka_server_9092.log", + "config_filename": "kafka_server_9092.properties" + }, + { + "entity_id": "3", + "port": "9093", + "brokerid": "3", + "log.file.size": "1048576", + "log.dir": "/tmp/kafka_server_3_logs", + "log_filename": "kafka_server_9093.log", + "config_filename": "kafka_server_9093.properties" + }, + { + "entity_id": "4", + "topic": "test_1", + "threads": "5", + "compression-codec": "1", + "message-size": "500", + "message": "100", + "request-num-acks": "1", + "async":"true", + "log_filename": "producer_performance.log", + "config_filename": "producer_performance.properties" + }, + { + "entity_id": "5", + "topic": "test_1", + "groupid": "mytestgroup", + "consumer-timeout-ms": "10000", + "zookeeper": "localhost:2188", + "log_filename": "console_consumer.log", + "config_filename": "console_consumer.properties" + } + ] +} Index: system_test/replication_testsuite/testcase_0111/testcase_0111_properties.json =================================================================== --- system_test/replication_testsuite/testcase_0111/testcase_0111_properties.json (revision 0) +++ system_test/replication_testsuite/testcase_0111/testcase_0111_properties.json (revision 0) @@ -0,0 +1,79 @@ +{ + "description": {"01":"Multi Leader Failures in Replication : Base Test", + "02":"Produce and consume messages to a single topic - three partition.", + "03":"This test sends messages to 3 replicas", + "04":"To trigger leader election: find the leader and terminate by controlled failure (kill -15)", + "05":"Restart the terminated broker", + "06":"Lookup brokers' log4j messages and verify that leader is re-elected successfully", + "07":"At the end it verifies the log size and contents", + "08":"Use a consumer to verify no message loss.", + "09":"Producer dimensions : mode:sync, acks:-1, comp:0", + "10":"Log segment size : 102400" + }, + "testcase_args": { + "bounce_leader": "true", + "replica_factor": "3", + "num_partition": "3", + "num_iteration": "3", + "sleep_seconds_between_producer_calls": "1", + "message_producing_free_time_sec": "15", + "num_messages_to_produce_per_producer_call": "50" + }, + "entities": [ + { + "entity_id": "0", + "clientPort": "2188", + "dataDir": "/tmp/zookeeper_0", + "log_filename": "zookeeper_2188.log", + "config_filename": "zookeeper_2188.properties" + }, + { + "entity_id": "1", + "port": "9091", + "brokerid": "1", + "log.file.size": "102400", + "log.dir": "/tmp/kafka_server_1_logs", + "log_filename": "kafka_server_9091.log", + "config_filename": "kafka_server_9091.properties" + }, + { + "entity_id": "2", + "port": "9092", + "brokerid": "2", + "log.file.size": "102400", + "log.dir": "/tmp/kafka_server_2_logs", + "log_filename": "kafka_server_9092.log", + "config_filename": "kafka_server_9092.properties" + }, + { + "entity_id": "3", + "port": "9093", + "brokerid": "3", + "log.file.size": "102400", + "log.dir": "/tmp/kafka_server_3_logs", + "log_filename": "kafka_server_9093.log", + "config_filename": "kafka_server_9093.properties" + }, + { + "entity_id": "4", + "topic": "test_1", + "threads": "5", + "compression-codec": "0", + "message-size": "500", + "message": "100", + "request-num-acks": "-1", + "async":"false", + "log_filename": "producer_performance.log", + "config_filename": "producer_performance.properties" + }, + { + "entity_id": "5", + "topic": "test_1", + "groupid": "mytestgroup", + "consumer-timeout-ms": "10000", + "zookeeper": "localhost:2188", + "log_filename": "console_consumer.log", + "config_filename": "console_consumer.properties" + } + ] +} Index: system_test/replication_testsuite/testcase_0112/testcase_0112_properties.json =================================================================== --- system_test/replication_testsuite/testcase_0112/testcase_0112_properties.json (revision 0) +++ system_test/replication_testsuite/testcase_0112/testcase_0112_properties.json (revision 0) @@ -0,0 +1,79 @@ +{ + "description": {"01":"Multi Leader Failures in Replication : 1. mode => async", + "02":"Produce and consume messages to a single topic - three partition.", + "03":"This test sends messages to 3 replicas", + "04":"To trigger leader election: find the leader and terminate by controlled failure (kill -15)", + "05":"Restart the terminated broker", + "06":"Lookup brokers' log4j messages and verify that leader is re-elected successfully", + "07":"At the end it verifies the log size and contents", + "08":"Use a consumer to verify no message loss.", + "09":"Producer dimensions : mode:async, acks:-1, comp:0", + "10":"Log segment size : 102400" + }, + "testcase_args": { + "bounce_leader": "true", + "replica_factor": "3", + "num_partition": "3", + "num_iteration": "3", + "sleep_seconds_between_producer_calls": "1", + "message_producing_free_time_sec": "15", + "num_messages_to_produce_per_producer_call": "50" + }, + "entities": [ + { + "entity_id": "0", + "clientPort": "2188", + "dataDir": "/tmp/zookeeper_0", + "log_filename": "zookeeper_2188.log", + "config_filename": "zookeeper_2188.properties" + }, + { + "entity_id": "1", + "port": "9091", + "brokerid": "1", + "log.file.size": "102400", + "log.dir": "/tmp/kafka_server_1_logs", + "log_filename": "kafka_server_9091.log", + "config_filename": "kafka_server_9091.properties" + }, + { + "entity_id": "2", + "port": "9092", + "brokerid": "2", + "log.file.size": "102400", + "log.dir": "/tmp/kafka_server_2_logs", + "log_filename": "kafka_server_9092.log", + "config_filename": "kafka_server_9092.properties" + }, + { + "entity_id": "3", + "port": "9093", + "brokerid": "3", + "log.file.size": "102400", + "log.dir": "/tmp/kafka_server_3_logs", + "log_filename": "kafka_server_9093.log", + "config_filename": "kafka_server_9093.properties" + }, + { + "entity_id": "4", + "topic": "test_1", + "threads": "5", + "compression-codec": "0", + "message-size": "500", + "message": "100", + "request-num-acks": "-1", + "async":"true", + "log_filename": "producer_performance.log", + "config_filename": "producer_performance.properties" + }, + { + "entity_id": "5", + "topic": "test_1", + "groupid": "mytestgroup", + "consumer-timeout-ms": "10000", + "zookeeper": "localhost:2188", + "log_filename": "console_consumer.log", + "config_filename": "console_consumer.properties" + } + ] +} Index: system_test/testcase_to_run_sanity.json =================================================================== --- system_test/testcase_to_run_sanity.json (revision 0) +++ system_test/testcase_to_run_sanity.json (revision 0) @@ -0,0 +1,5 @@ +{ + "ReplicaBasicTest" : [ + "testcase_1" + ] +} Index: system_test/utils/replication_utils.py =================================================================== --- system_test/utils/replication_utils.py (revision 0) +++ system_test/utils/replication_utils.py (revision 0) @@ -0,0 +1,60 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +#!/usr/bin/env python + +# ================================================================= +# replication_utils.py +# - This module defines constant values specific to Kafka Replication +# and also provides helper functions for Replication system test. +# ================================================================= + +import logging +import sys + +class ReplicationUtils(object): + + thisClassName = '(ReplicationUtils)' + d = {'name_of_class': thisClassName} + + logger = logging.getLogger("namedLogger") + anonLogger = logging.getLogger("anonymousLogger") + + def __init__(self, testClassInstance): + super(ReplicationUtils, self).__init__() + self.logger.debug("#### constructor inside ReplicationUtils", extra=self.d) + + # leader attributes + self.isLeaderLogPattern = "Completed the leader state transition" + self.brokerShutDownCompletedPattern = "shut down completed" + + self.leaderAttributesDict = {} + + self.leaderAttributesDict["BROKER_SHUT_DOWN_COMPLETED_MSG"] = \ + self.brokerShutDownCompletedPattern + + self.leaderAttributesDict["REGX_BROKER_SHUT_DOWN_COMPLETED_PATTERN"] = \ + "\[(.*?)\] .* \[Kafka Server (.*?)\], " + \ + self.brokerShutDownCompletedPattern + + self.leaderAttributesDict["LEADER_ELECTION_COMPLETED_MSG"] = \ + self.isLeaderLogPattern + + self.leaderAttributesDict["REGX_LEADER_ELECTION_PATTERN"] = \ + "\[(.*?)\] .* Broker (.*?): " + \ + self.leaderAttributesDict["LEADER_ELECTION_COMPLETED_MSG"] + \ + " for topic (.*?) partition (.*?) \(.*" + Index: system_test/utils/kafka_system_test_utils.py =================================================================== --- system_test/utils/kafka_system_test_utils.py (revision 1396332) +++ system_test/utils/kafka_system_test_utils.py (working copy) @@ -26,6 +26,7 @@ import json import logging import os +import pprint import re import subprocess import sys @@ -199,25 +200,30 @@ testcaseConfigsList = testcaseEnv.testcaseConfigsList testcasePathName = testcaseEnv.testCaseBaseDir - # consumer config / log files location - consEntityIdList = system_test_utils.get_data_from_list_of_dicts( \ + try: + # consumer config / log files location + consEntityIdList = system_test_utils.get_data_from_list_of_dicts( \ clusterConfigsList, "role", "console_consumer", "entity_id") - consLogList = system_test_utils.get_data_from_list_of_dicts( \ + consLogList = system_test_utils.get_data_from_list_of_dicts( \ testcaseConfigsList, "entity_id", consEntityIdList[0], "log_filename") - consLogPathname = testcasePathName + "/logs/" + consLogList[0] - consCfgList = system_test_utils.get_data_from_list_of_dicts( \ + consLogPathname = testcasePathName + "/logs/" + consLogList[0] + consCfgList = system_test_utils.get_data_from_list_of_dicts( \ testcaseConfigsList, "entity_id", consEntityIdList[0], "config_filename") - consCfgPathname = testcasePathName + "/config/" + consCfgList[0] + consCfgPathname = testcasePathName + "/config/" + consCfgList[0] - # producer config / log files location - prodEntityIdList = system_test_utils.get_data_from_list_of_dicts( \ + # producer config / log files location + prodEntityIdList = system_test_utils.get_data_from_list_of_dicts( \ clusterConfigsList, "role", "producer_performance", "entity_id") - prodLogList = system_test_utils.get_data_from_list_of_dicts( \ + prodLogList = system_test_utils.get_data_from_list_of_dicts( \ testcaseConfigsList, "entity_id", prodEntityIdList[0], "log_filename") - prodLogPathname = testcasePathName + "/logs/" + prodLogList[0] - prodCfgList = system_test_utils.get_data_from_list_of_dicts( \ + prodLogPathname = testcasePathName + "/logs/" + prodLogList[0] + prodCfgList = system_test_utils.get_data_from_list_of_dicts( \ testcaseConfigsList, "entity_id", prodEntityIdList[0], "config_filename") - prodCfgPathname = testcasePathName + "/config/" + prodCfgList[0] + prodCfgPathname = testcasePathName + "/config/" + prodCfgList[0] + except: + logger.error("Failed to initialize entity config/log path names: possibly mismatched " \ + + "number of entities in cluster_config.json & testcase_n_properties.json", extra=d) + raise testcaseEnv.userDefinedEnvVarDict["consumerLogPathName"] = consLogPathname testcaseEnv.userDefinedEnvVarDict["consumerConfigPathName"] = consCfgPathname @@ -225,7 +231,7 @@ testcaseEnv.userDefinedEnvVarDict["producerConfigPathName"] = prodCfgPathname -def copy_file_with_dict_values(srcFile, destFile, dictObj): +def copy_file_with_dict_values(srcFile, destFile, dictObj, keyValToAddDict): infile = open(srcFile, "r") inlines = infile.readlines() infile.close() @@ -236,9 +242,14 @@ if (line.startswith(key + "=")): line = key + "=" + dictObj[key] + "\n" outfile.write(line) + + if (keyValToAddDict is not None): + for key in sorted(keyValToAddDict.iterkeys()): + line = key + "=" + keyValToAddDict[key] + "\n" + outfile.write(line) + outfile.close() - def generate_overriden_props_files(testsuitePathname, testcaseEnv, systemTestEnv): logger.info("calling generate_properties_files", extra=d) @@ -253,50 +264,135 @@ # loop through all zookeepers (if more than 1) to retrieve host and clientPort # to construct a zk.connect str for broker in the form of: - # zk.connect=:,: - zkConnectStr = "" + # zk.connect=:,:,... + testcaseEnv.userDefinedEnvVarDict["sourceZkConnectStr"] = "" + testcaseEnv.userDefinedEnvVarDict["targetZkConnectStr"] = "" + testcaseEnv.userDefinedEnvVarDict["sourceZkEntityIdList"] = [] + testcaseEnv.userDefinedEnvVarDict["targetZkEntityIdList"] = [] + testcaseEnv.userDefinedEnvVarDict["sourceZkHostPortDict"] = {} + testcaseEnv.userDefinedEnvVarDict["targetZkHostPortDict"] = {} + testcaseEnv.userDefinedEnvVarDict["sourceBrokerEntityIdList"] = [] + testcaseEnv.userDefinedEnvVarDict["targetBrokerEntityIdList"] = [] + testcaseEnv.userDefinedEnvVarDict["sourceBrokerList"] = "" + testcaseEnv.userDefinedEnvVarDict["targetBrokerList"] = "" + + # update zookeeper cluster info into "testcaseEnv.userDefinedEnvVarDict" zkDictList = system_test_utils.get_dict_from_list_of_dicts(clusterConfigsList, "role", "zookeeper") + for zkDict in zkDictList: entityID = zkDict["entity_id"] hostname = zkDict["hostname"] + clusterName = zkDict["cluster_name"] clientPortList = system_test_utils.get_data_from_list_of_dicts(tcConfigsList, "entity_id", entityID, "clientPort") clientPort = clientPortList[0] - if ( zkConnectStr.__len__() == 0 ): - zkConnectStr = hostname + ":" + clientPort + if clusterName == "source": + # update source cluster zookeeper entities + testcaseEnv.userDefinedEnvVarDict["sourceZkEntityIdList"].append(entityID) + if ( len(testcaseEnv.userDefinedEnvVarDict["sourceZkConnectStr"]) == 0 ): + testcaseEnv.userDefinedEnvVarDict["sourceZkConnectStr"] = hostname + ":" + clientPort + else: + testcaseEnv.userDefinedEnvVarDict["sourceZkConnectStr"] += "," + hostname + ":" + clientPort + + # generate these strings for zookeeper config: + # server.1=host1:2180:2182 + # server.2=host2:2180:2182 + zkClusterSize = len(testcaseEnv.userDefinedEnvVarDict["sourceZkHostPortDict"]) + zkClusterId = str(zkClusterSize + 1) + key = "server." + zkClusterId + val = hostname + ":" + str(int(clientPort) - 1) + ":" + str(int(clientPort) + 1) + testcaseEnv.userDefinedEnvVarDict["sourceZkHostPortDict"][key] = val + + elif clusterName == "target": + # update target cluster zookeeper entities + testcaseEnv.userDefinedEnvVarDict["targetZkEntityIdList"].append(entityID) + if ( len(testcaseEnv.userDefinedEnvVarDict["targetZkConnectStr"]) == 0 ): + testcaseEnv.userDefinedEnvVarDict["targetZkConnectStr"] = hostname + ":" + clientPort + else: + testcaseEnv.userDefinedEnvVarDict["targetZkConnectStr"] += "," + hostname + ":" + clientPort + + # generate these strings for zookeeper config: + # server.1=host1:2180:2182 + # server.2=host2:2180:2182 + zkClusterSize = len(testcaseEnv.userDefinedEnvVarDict["targetZkHostPortDict"]) + zkClusterId = str(zkClusterSize + 1) + key = "server." + zkClusterId + val = hostname + ":" + str(int(clientPort) - 1) + ":" + str(int(clientPort) + 1) + testcaseEnv.userDefinedEnvVarDict["targetZkHostPortDict"][key] = val + else: - zkConnectStr = zkConnectStr + "," + hostname + ":" + clientPort + logger.error("Unknown cluster name: " + clusterName) + sys.exit(1) + # update broker cluster info into "testcaseEnv.userDefinedEnvVarDict" + brokerDictList = system_test_utils.get_dict_from_list_of_dicts(clusterConfigsList, "role", "broker") + for brokerDict in brokerDictList: + entityID = brokerDict["entity_id"] + hostname = brokerDict["hostname"] + clusterName = brokerDict["cluster_name"] + portList = system_test_utils.get_data_from_list_of_dicts(tcConfigsList, "entity_id", entityID, "port") + port = portList[0] + + if clusterName == "source": + if ( len(testcaseEnv.userDefinedEnvVarDict["sourceBrokerList"]) == 0 ): + testcaseEnv.userDefinedEnvVarDict["sourceBrokerList"] = hostname + ":" + port + else: + testcaseEnv.userDefinedEnvVarDict["sourceBrokerList"] += "," + hostname + ":" + port + elif clusterName == "target": + if ( len(testcaseEnv.userDefinedEnvVarDict["targetBrokerList"]) == 0 ): + testcaseEnv.userDefinedEnvVarDict["targetBrokerList"] = hostname + ":" + port + else: + testcaseEnv.userDefinedEnvVarDict["targetBrokerList"] += "," + hostname + ":" + port + else: + logger.error("Unknown cluster name: " + clusterName) + sys.exit(1) + # for each entity in the cluster config for clusterCfg in clusterConfigsList: cl_entity_id = clusterCfg["entity_id"] + # loop through testcase config list 'tcConfigsList' for a matching cluster entity_id for tcCfg in tcConfigsList: if (tcCfg["entity_id"] == cl_entity_id): # copy the associated .properties template, update values, write to testcase_/config if ( clusterCfg["role"] == "broker" ): - tcCfg["zk.connect"] = zkConnectStr - copy_file_with_dict_values(cfgTemplatePathname + "/server.properties", \ - cfgDestPathname + "/" + tcCfg["config_filename"], tcCfg) + if clusterCfg["cluster_name"] == "source": + tcCfg["zk.connect"] = testcaseEnv.userDefinedEnvVarDict["sourceZkConnectStr"] + elif clusterCfg["cluster_name"] == "target": + tcCfg["zk.connect"] = testcaseEnv.userDefinedEnvVarDict["targetZkConnectStr"] + else: + logger.error("Unknown cluster name: " + clusterName) + sys.exit(1) + copy_file_with_dict_values(cfgTemplatePathname + "/server.properties", + cfgDestPathname + "/" + tcCfg["config_filename"], tcCfg, None) + elif ( clusterCfg["role"] == "zookeeper"): - copy_file_with_dict_values(cfgTemplatePathname + "/zookeeper.properties", \ - cfgDestPathname + "/" + tcCfg["config_filename"], tcCfg) + if clusterCfg["cluster_name"] == "source": + copy_file_with_dict_values(cfgTemplatePathname + "/zookeeper.properties", + cfgDestPathname + "/" + tcCfg["config_filename"], tcCfg, + testcaseEnv.userDefinedEnvVarDict["sourceZkHostPortDict"]) + elif clusterCfg["cluster_name"] == "target": + copy_file_with_dict_values(cfgTemplatePathname + "/zookeeper.properties", + cfgDestPathname + "/" + tcCfg["config_filename"], tcCfg, + testcaseEnv.userDefinedEnvVarDict["targetZkHostPortDict"]) + else: + logger.error("Unknown cluster name: " + clusterName) + sys.exit(1) - elif ( clusterCfg["role"] == "producer_performance"): - #tcCfg["brokerinfo"] = "zk.connect" + "=" + zkConnectStr - copy_file_with_dict_values(cfgTemplatePathname + "/producer_performance.properties", \ - cfgDestPathname + "/" + tcCfg["config_filename"], tcCfg) + elif ( clusterCfg["role"] == "mirror_maker"): + tcCfg["broker.list"] = testcaseEnv.userDefinedEnvVarDict["targetBrokerList"] + copy_file_with_dict_values(cfgTemplatePathname + "/mirror_producer.properties", + cfgDestPathname + "/" + tcCfg["mirror_producer_config_filename"], tcCfg, None) - elif ( clusterCfg["role"] == "console_consumer"): - tcCfg["zookeeper"] = zkConnectStr - copy_file_with_dict_values(cfgTemplatePathname + "/console_consumer.properties", \ - cfgDestPathname + "/" + tcCfg["config_filename"], tcCfg) + # update zk.connect with the zk entities specified in cluster_config.json + tcCfg["zk.connect"] = testcaseEnv.userDefinedEnvVarDict["sourceZkConnectStr"] + copy_file_with_dict_values(cfgTemplatePathname + "/mirror_consumer.properties", + cfgDestPathname + "/" + tcCfg["mirror_consumer_config_filename"], tcCfg, None) else: - print " => ", tcCfg - print "UNHANDLED key" + logger.debug("UNHANDLED role " + clusterCfg["role"], extra=d) # scp updated config files to remote hosts scp_file_to_remote_host(clusterConfigsList, testcaseEnv) @@ -318,25 +414,64 @@ def start_zookeepers(systemTestEnv, testcaseEnv): clusterEntityConfigDictList = systemTestEnv.clusterEntityConfigDictList - zkEntityIdList = system_test_utils.get_data_from_list_of_dicts( \ - clusterEntityConfigDictList, "role", "zookeeper", "entity_id") + zkEntityIdList = system_test_utils.get_data_from_list_of_dicts( + clusterEntityConfigDictList, "role", "zookeeper", "entity_id") for zkEntityId in zkEntityIdList: + configPathName = get_testcase_config_log_dir_pathname(testcaseEnv, "zookeeper", zkEntityId, "config") + configFile = system_test_utils.get_data_by_lookup_keyval( + testcaseEnv.testcaseConfigsList, "entity_id", zkEntityId, "config_filename") + clientPort = system_test_utils.get_data_by_lookup_keyval( + testcaseEnv.testcaseConfigsList, "entity_id", zkEntityId, "clientPort") + dataDir = system_test_utils.get_data_by_lookup_keyval( + testcaseEnv.testcaseConfigsList, "entity_id", zkEntityId, "dataDir") + hostname = system_test_utils.get_data_by_lookup_keyval( + clusterEntityConfigDictList, "entity_id", zkEntityId, "hostname") + minusOnePort = str(int(clientPort) - 1) + plusOnePort = str(int(clientPort) + 1) + + # read configFile to find out the id of the zk and create the file "myid" + infile = open(configPathName + "/" + configFile, "r") + inlines = infile.readlines() + infile.close() + + for line in inlines: + if line.startswith("server.") and hostname + ":" + minusOnePort + ":" + plusOnePort in line: + # server.1=host1:2187:2189 + matchObj = re.match("server\.(.*?)=.*", line) + zkServerId = matchObj.group(1) + + cmdStr = "ssh " + hostname + " 'mkdir -p " + dataDir + "; echo " + zkServerId + " > " + dataDir + "/myid'" + logger.debug("executing command [" + cmdStr + "]", extra=d) + subproc = system_test_utils.sys_call_return_subproc(cmdStr) + for line in subproc.stdout.readlines(): + pass # dummy loop to wait until producer is completed + + time.sleep(2) start_entity_in_background(systemTestEnv, testcaseEnv, zkEntityId) - def start_brokers(systemTestEnv, testcaseEnv): clusterEntityConfigDictList = systemTestEnv.clusterEntityConfigDictList - brokerEntityIdList = system_test_utils.get_data_from_list_of_dicts( \ - clusterEntityConfigDictList, "role", "broker", "entity_id") + brokerEntityIdList = system_test_utils.get_data_from_list_of_dicts( + clusterEntityConfigDictList, "role", "broker", "entity_id") for brokerEntityId in brokerEntityIdList: start_entity_in_background(systemTestEnv, testcaseEnv, brokerEntityId) -def get_broker_shutdown_log_line(systemTestEnv, testcaseEnv): +def start_mirror_makers(systemTestEnv, testcaseEnv): + clusterEntityConfigDictList = systemTestEnv.clusterEntityConfigDictList + brokerEntityIdList = system_test_utils.get_data_from_list_of_dicts( + clusterEntityConfigDictList, "role", "mirror_maker", "entity_id") + + for brokerEntityId in brokerEntityIdList: + start_entity_in_background(systemTestEnv, testcaseEnv, brokerEntityId) + + +def get_broker_shutdown_log_line(systemTestEnv, testcaseEnv, leaderAttributesDict): + logger.info("looking up broker shutdown...", extra=d) # keep track of broker related data in this dict such as broker id, @@ -344,22 +479,19 @@ shutdownBrokerDict = {} clusterEntityConfigDictList = systemTestEnv.clusterEntityConfigDictList - brokerEntityIdList = system_test_utils.get_data_from_list_of_dicts( \ + brokerEntityIdList = system_test_utils.get_data_from_list_of_dicts( clusterEntityConfigDictList, "role", "broker", "entity_id") for brokerEntityId in brokerEntityIdList: - hostname = system_test_utils.get_data_by_lookup_keyval( \ + hostname = system_test_utils.get_data_by_lookup_keyval( clusterEntityConfigDictList, "entity_id", brokerEntityId, "hostname") - logFile = system_test_utils.get_data_by_lookup_keyval( \ + logFile = system_test_utils.get_data_by_lookup_keyval( testcaseEnv.testcaseConfigsList, "entity_id", brokerEntityId, "log_filename") - shutdownBrokerDict["entity_id"] = brokerEntityId - shutdownBrokerDict["hostname"] = hostname - logPathName = get_testcase_config_log_dir_pathname(testcaseEnv, "broker", brokerEntityId, "default") cmdStrList = ["ssh " + hostname, - "\"grep -i -h '" + testcaseEnv.userDefinedEnvVarDict["BROKER_SHUT_DOWN_COMPLETED_MSG"] + "' ", + "\"grep -i -h '" + leaderAttributesDict["BROKER_SHUT_DOWN_COMPLETED_MSG"] + "' ", logPathName + "/" + logFile + " | ", "sort | tail -1\""] cmdStr = " ".join(cmdStrList) @@ -370,30 +502,35 @@ line = line.rstrip('\n') - if testcaseEnv.userDefinedEnvVarDict["BROKER_SHUT_DOWN_COMPLETED_MSG"] in line: + if leaderAttributesDict["BROKER_SHUT_DOWN_COMPLETED_MSG"] in line: logger.debug("found the log line : " + line, extra=d) try: - matchObj = re.match(testcaseEnv.userDefinedEnvVarDict["REGX_BROKER_SHUT_DOWN_COMPLETED_PATTERN"], line) + matchObj = re.match(leaderAttributesDict["REGX_BROKER_SHUT_DOWN_COMPLETED_PATTERN"], line) datetimeStr = matchObj.group(1) datetimeObj = datetime.strptime(datetimeStr, "%Y-%m-%d %H:%M:%S,%f") unixTs = time.mktime(datetimeObj.timetuple()) + 1e-6*datetimeObj.microsecond #print "{0:.3f}".format(unixTs) - shutdownBrokerDict["timestamp"] = unixTs - shutdownBrokerDict["brokerid"] = matchObj.group(2) - logger.debug("brokerid: [" + shutdownBrokerDict["brokerid"] + "] entity_id: [" + shutdownBrokerDict["entity_id"] + "]", extra=d) - return shutdownBrokerDict + + # update shutdownBrokerDict when + # 1. shutdownBrokerDict has no logline entry + # 2. shutdownBrokerDict has existing logline enty but found another logline with more recent timestamp + if (len(shutdownBrokerDict) > 0 and shutdownBrokerDict["timestamp"] < unixTs) or (len(shutdownBrokerDict) == 0): + shutdownBrokerDict["timestamp"] = unixTs + shutdownBrokerDict["brokerid"] = matchObj.group(2) + shutdownBrokerDict["hostname"] = hostname + shutdownBrokerDict["entity_id"] = brokerEntityId + logger.debug("brokerid: [" + shutdownBrokerDict["brokerid"] + \ + "] entity_id: [" + shutdownBrokerDict["entity_id"] + "]", extra=d) except: logger.error("ERROR [unable to find matching leader details: Has the matching pattern changed?]", extra=d) raise - #else: - # logger.debug("unmatched line found [" + line + "]", extra=d) return shutdownBrokerDict -def get_leader_elected_log_line(systemTestEnv, testcaseEnv): +def get_leader_elected_log_line(systemTestEnv, testcaseEnv, leaderAttributesDict): - logger.info("looking up leader...", extra=d) + logger.debug("looking up leader...", extra=d) # keep track of leader related data in this dict such as broker id, # entity id and timestamp and return it to the caller function @@ -412,7 +549,7 @@ logPathName = get_testcase_config_log_dir_pathname(testcaseEnv, "broker", brokerEntityId, "default") cmdStrList = ["ssh " + hostname, - "\"grep -i -h '" + testcaseEnv.userDefinedEnvVarDict["LEADER_ELECTION_COMPLETED_MSG"] + "' ", + "\"grep -i -h '" + leaderAttributesDict["LEADER_ELECTION_COMPLETED_MSG"] + "' ", logPathName + "/" + logFile + " | ", "sort | tail -1\""] cmdStr = " ".join(cmdStrList) @@ -423,10 +560,10 @@ line = line.rstrip('\n') - if testcaseEnv.userDefinedEnvVarDict["LEADER_ELECTION_COMPLETED_MSG"] in line: + if leaderAttributesDict["LEADER_ELECTION_COMPLETED_MSG"] in line: logger.debug("found the log line : " + line, extra=d) try: - matchObj = re.match(testcaseEnv.userDefinedEnvVarDict["REGX_LEADER_ELECTION_PATTERN"], line) + matchObj = re.match(leaderAttributesDict["REGX_LEADER_ELECTION_PATTERN"], line) datetimeStr = matchObj.group(1) datetimeObj = datetime.strptime(datetimeStr, "%Y-%m-%d %H:%M:%S,%f") unixTs = time.mktime(datetimeObj.timetuple()) + 1e-6*datetimeObj.microsecond @@ -462,6 +599,7 @@ kafkaHome = system_test_utils.get_data_by_lookup_keyval(clusterEntityConfigDictList, "entity_id", entityId, "kafka_home") javaHome = system_test_utils.get_data_by_lookup_keyval(clusterEntityConfigDictList, "entity_id", entityId, "java_home") jmxPort = system_test_utils.get_data_by_lookup_keyval(clusterEntityConfigDictList, "entity_id", entityId, "jmx_port") + clusterName = system_test_utils.get_data_by_lookup_keyval(clusterEntityConfigDictList, "entity_id", entityId, "cluster_name") # testcase configurations: testcaseConfigsList = testcaseEnv.testcaseConfigsList @@ -469,6 +607,11 @@ configFile = system_test_utils.get_data_by_lookup_keyval(testcaseConfigsList, "entity_id", entityId, "config_filename") logFile = system_test_utils.get_data_by_lookup_keyval(testcaseConfigsList, "entity_id", entityId, "log_filename") + mmConsumerConfigFile = system_test_utils.get_data_by_lookup_keyval(testcaseConfigsList, "entity_id", entityId, + "mirror_consumer_config_filename") + mmProducerConfigFile = system_test_utils.get_data_by_lookup_keyval(testcaseConfigsList, "entity_id", entityId, + "mirror_producer_config_filename") + logger.info("starting " + role + " in host [" + hostname + "] on client port [" + clientPort + "]", extra=d) configPathName = get_testcase_config_log_dir_pathname(testcaseEnv, role, entityId, "config") @@ -483,13 +626,6 @@ logPathName + "/" + logFile + " & echo pid:$! > ", logPathName + "/entity_" + entityId + "_pid'"] - # construct zk.connect str and update it to testcaseEnv.userDefinedEnvVarDict.zkConnectStr - if ( len(testcaseEnv.userDefinedEnvVarDict["zkConnectStr"]) > 0 ): - testcaseEnv.userDefinedEnvVarDict["zkConnectStr"] = \ - testcaseEnv.userDefinedEnvVarDict["zkConnectStr"] + "," + hostname + ":" + clientPort - else: - testcaseEnv.userDefinedEnvVarDict["zkConnectStr"] = hostname + ":" + clientPort - elif role == "broker": cmdList = ["ssh " + hostname, "'JAVA_HOME=" + javaHome, @@ -499,6 +635,17 @@ logPathName + "/" + logFile + " & echo pid:$! > ", logPathName + "/entity_" + entityId + "_pid'"] + elif role == "mirror_maker": + cmdList = ["ssh " + hostname, + "'JAVA_HOME=" + javaHome, + "JMX_PORT=" + jmxPort, + kafkaHome + "/bin/kafka-run-class.sh kafka.tools.MirrorMaker", + "--consumer.config " + configPathName + "/" + mmConsumerConfigFile, + "--producer.config " + configPathName + "/" + mmProducerConfigFile, + "--whitelist=\".*\" >> ", + logPathName + "/" + logFile + " & echo pid:$! > ", + logPathName + "/entity_" + entityId + "_pid'"] + cmdStr = " ".join(cmdList) logger.debug("executing command: [" + cmdStr + "]", extra=d) @@ -515,31 +662,35 @@ line = line.rstrip('\n') logger.debug("found pid line: [" + line + "]", extra=d) tokens = line.split(':') - testcaseEnv.entityParentPidDict[entityId] = tokens[1] - #print "\n#### testcaseEnv.entityParentPidDict ", testcaseEnv.entityParentPidDict, "\n" + if role == "zookeeper": + testcaseEnv.entityZkParentPidDict[entityId] = tokens[1] + elif role == "broker": + testcaseEnv.entityBrokerParentPidDict[entityId] = tokens[1] + elif role == "mirror_maker": + testcaseEnv.entityMirrorMakerParentPidDict[entityId] = tokens[1] time.sleep(1) - metrics.start_metrics_collection(hostname, jmxPort, role, entityId, systemTestEnv, testcaseEnv) + if role != "mirror_maker": + metrics.start_metrics_collection(hostname, jmxPort, role, entityId, systemTestEnv, testcaseEnv) def start_console_consumer(systemTestEnv, testcaseEnv): - clusterEntityConfigDictList = systemTestEnv.clusterEntityConfigDictList + clusterList = systemTestEnv.clusterEntityConfigDictList - consumerConfigList = system_test_utils.get_dict_from_list_of_dicts(clusterEntityConfigDictList, "role", "console_consumer") + consumerConfigList = system_test_utils.get_dict_from_list_of_dicts(clusterList, "role", "console_consumer") for consumerConfig in consumerConfigList: host = consumerConfig["hostname"] entityId = consumerConfig["entity_id"] jmxPort = consumerConfig["jmx_port"] - role = consumerConfig["role"] - kafkaHome = system_test_utils.get_data_by_lookup_keyval( \ - clusterEntityConfigDictList, "entity_id", entityId, "kafka_home") - javaHome = system_test_utils.get_data_by_lookup_keyval( \ - clusterEntityConfigDictList, "entity_id", entityId, "java_home") - jmxPort = system_test_utils.get_data_by_lookup_keyval( \ - clusterEntityConfigDictList, "entity_id", entityId, "jmx_port") + role = consumerConfig["role"] + clusterName = consumerConfig["cluster_name"] + kafkaHome = system_test_utils.get_data_by_lookup_keyval(clusterList, "entity_id", entityId, "kafka_home") + javaHome = system_test_utils.get_data_by_lookup_keyval(clusterList, "entity_id", entityId, "java_home") + jmxPort = system_test_utils.get_data_by_lookup_keyval(clusterList, "entity_id", entityId, "jmx_port") kafkaRunClassBin = kafkaHome + "/bin/kafka-run-class.sh" + logger.info("starting console consumer", extra=d) consumerLogPath = get_testcase_config_log_dir_pathname(testcaseEnv, "console_consumer", entityId, "default") @@ -547,12 +698,40 @@ testcaseEnv.userDefinedEnvVarDict["consumerLogPathName"] = consumerLogPathName - commandArgs = system_test_utils.convert_keyval_to_cmd_args(testcaseEnv.userDefinedEnvVarDict["consumerConfigPathName"]) + # testcase configurations: + testcaseList = testcaseEnv.testcaseConfigsList + topic = system_test_utils.get_data_by_lookup_keyval(testcaseList, "entity_id", entityId, "topic") + timeoutMs = system_test_utils.get_data_by_lookup_keyval(testcaseList, "entity_id", entityId, "consumer-timeout-ms") + + + formatterOption = "" + try: + formatterOption = system_test_utils.get_data_by_lookup_keyval(testcaseList, "entity_id", entityId, "formatter") + except: + pass + + if len(formatterOption) > 0: + formatterOption = " --formatter " + formatterOption + " " + + zkConnectStr = "" + if clusterName == "source": + zkConnectStr = testcaseEnv.userDefinedEnvVarDict["sourceZkConnectStr"] + elif clusterName == "target": + zkConnectStr = testcaseEnv.userDefinedEnvVarDict["targetZkConnectStr"] + else: + logger.error("Invalid cluster name : " + clusterName) + sys.exit(1) + cmdList = ["ssh " + host, "'JAVA_HOME=" + javaHome, "JMX_PORT=" + jmxPort, kafkaRunClassBin + " kafka.consumer.ConsoleConsumer", - commandArgs + " >> " + consumerLogPathName, + "--zookeeper " + zkConnectStr, + "--topic " + topic, + "--consumer-timeout-ms " + timeoutMs, + formatterOption, + "--from-beginning ", + " >> " + consumerLogPathName, " & echo pid:$! > " + consumerLogPath + "/entity_" + entityId + "_pid'"] cmdStr = " ".join(cmdList) @@ -574,7 +753,7 @@ tokens = line.split(':') testcaseEnv.consumerHostParentPidDict[host] = tokens[1] -def start_producer_performance(systemTestEnv, testcaseEnv): +def start_producer_performance(systemTestEnv, testcaseEnv, kafka07Client): entityConfigList = systemTestEnv.clusterEntityConfigDictList testcaseConfigsList = testcaseEnv.testcaseConfigsList @@ -588,11 +767,6 @@ entityId = entityConfig["entity_id"] port = system_test_utils.get_data_by_lookup_keyval(testcaseConfigsList, "entity_id", entityId, "port") - if len(brokerListStr) == 0: - brokerListStr = hostname + ":" + port - else: - brokerListStr = brokerListStr + "," + hostname + ":" + port - producerConfigList = system_test_utils.get_dict_from_list_of_dicts(entityConfigList, "role", "producer_performance") for producerConfig in producerConfigList: host = producerConfig["hostname"] @@ -600,41 +774,69 @@ jmxPort = producerConfig["jmx_port"] role = producerConfig["role"] - thread.start_new_thread(start_producer_in_thread, (testcaseEnv, entityConfigList, producerConfig, brokerListStr)) + thread.start_new_thread(start_producer_in_thread, (testcaseEnv, entityConfigList, producerConfig, kafka07Client)) + testcaseEnv.lock.acquire() + testcaseEnv.numProducerThreadsRunning += 1 + logger.debug("testcaseEnv.numProducerThreadsRunning : " + str(testcaseEnv.numProducerThreadsRunning), extra=d) time.sleep(1) + testcaseEnv.lock.release() + time.sleep(1) metrics.start_metrics_collection(host, jmxPort, role, entityId, systemTestEnv, testcaseEnv) -def start_producer_in_thread(testcaseEnv, entityConfigList, producerConfig, brokerListStr): +def start_producer_in_thread(testcaseEnv, entityConfigList, producerConfig, kafka07Client): host = producerConfig["hostname"] entityId = producerConfig["entity_id"] jmxPort = producerConfig["jmx_port"] - role = producerConfig["role"] + role = producerConfig["role"] + clusterName = producerConfig["cluster_name"] kafkaHome = system_test_utils.get_data_by_lookup_keyval(entityConfigList, "entity_id", entityId, "kafka_home") javaHome = system_test_utils.get_data_by_lookup_keyval(entityConfigList, "entity_id", entityId, "java_home") jmxPort = system_test_utils.get_data_by_lookup_keyval(entityConfigList, "entity_id", entityId, "jmx_port") kafkaRunClassBin = kafkaHome + "/bin/kafka-run-class.sh" + # testcase configurations: + testcaseConfigsList = testcaseEnv.testcaseConfigsList + topic = system_test_utils.get_data_by_lookup_keyval(testcaseConfigsList, "entity_id", entityId, "topic") + threads = system_test_utils.get_data_by_lookup_keyval(testcaseConfigsList, "entity_id", entityId, "threads") + compCodec = system_test_utils.get_data_by_lookup_keyval(testcaseConfigsList, "entity_id", entityId, "compression-codec") + messageSize = system_test_utils.get_data_by_lookup_keyval(testcaseConfigsList, "entity_id", entityId, "message-size") + noMsgPerBatch = system_test_utils.get_data_by_lookup_keyval(testcaseConfigsList, "entity_id", entityId, "message") + requestNumAcks = system_test_utils.get_data_by_lookup_keyval(testcaseConfigsList, "entity_id", entityId, "request-num-acks") + asyncMode = system_test_utils.get_data_by_lookup_keyval(testcaseConfigsList, "entity_id", entityId, "async") + + brokerListStr = "" + if clusterName == "source": + brokerListStr = testcaseEnv.userDefinedEnvVarDict["sourceBrokerList"] + elif clusterName == "target": + brokerListStr = testcaseEnv.userDefinedEnvVarDict["targetBrokerList"] + else: + logger.error("Unknown cluster name: " + clusterName) + sys.exit(1) + logger.info("starting producer preformance", extra=d) producerLogPath = get_testcase_config_log_dir_pathname(testcaseEnv, "producer_performance", entityId, "default") producerLogPathName = producerLogPath + "/producer_performance.log" testcaseEnv.userDefinedEnvVarDict["producerLogPathName"] = producerLogPathName - commandArgs = system_test_utils.convert_keyval_to_cmd_args(testcaseEnv.userDefinedEnvVarDict["producerConfigPathName"]) counter = 0 - noMsgPerBatch = int(testcaseEnv.testcaseArgumentsDict["num_messages_to_produce_per_producer_call"]) producerSleepSec = int(testcaseEnv.testcaseArgumentsDict["sleep_seconds_between_producer_calls"]) - # keep calling producer until signaled by: + boolArgumentsStr = "" + if asyncMode.lower() == "true": + boolArgumentsStr = boolArgumentsStr + " --async" + + # keep calling producer until signaled to stop by: # testcaseEnv.userDefinedEnvVarDict["stopBackgroundProducer"] while 1: + logger.debug("calling testcaseEnv.lock.acquire()", extra=d) testcaseEnv.lock.acquire() if not testcaseEnv.userDefinedEnvVarDict["stopBackgroundProducer"]: - initMsgId = counter * noMsgPerBatch + initMsgId = counter * int(noMsgPerBatch) - logger.info("#### [producer thread] status of stopBackgroundProducer : [False] => producing [" + str(noMsgPerBatch) + \ - "] messages with starting message id : [" + str(initMsgId) + "]", extra=d) + logger.info("#### [producer thread] status of stopBackgroundProducer : [False] => producing [" \ + + str(noMsgPerBatch) + "] messages with starting message id : [" + str(initMsgId) + "]", extra=d) cmdList = ["ssh " + host, "'JAVA_HOME=" + javaHome, @@ -642,10 +844,45 @@ kafkaRunClassBin + " kafka.perf.ProducerPerformance", "--broker-list " + brokerListStr, "--initial-message-id " + str(initMsgId), - "--messages " + str(noMsgPerBatch), - commandArgs + " >> " + producerLogPathName, + "--messages " + noMsgPerBatch, + "--topic " + topic, + "--threads " + threads, + "--compression-codec " + compCodec, + "--message-size " + messageSize, + "--request-num-acks " + requestNumAcks, + boolArgumentsStr, + " >> " + producerLogPathName, " & echo pid:$! > " + producerLogPath + "/entity_" + entityId + "_pid'"] + if kafka07Client: + cmdList[:] = [] + + brokerInfoStr = "" + tokenList = brokerListStr.split(',') + index = 1 + for token in tokenList: + if len(brokerInfoStr) == 0: + brokerInfoStr = str(index) + ":" + token + else: + brokerInfoStr += "," + str(index) + ":" + token + index += 1 + + brokerInfoStr = "broker.list=" + brokerInfoStr + + cmdList = ["ssh " + host, + "'JAVA_HOME=" + javaHome, + "JMX_PORT=" + jmxPort, + kafkaRunClassBin + " kafka.perf.ProducerPerformance", + "--brokerinfo " + brokerInfoStr, + "--messages " + noMsgPerBatch, + "--topic " + topic, + "--threads " + threads, + "--compression-codec " + compCodec, + "--message-size " + messageSize, + "--vary-message-size --async", + " >> " + producerLogPathName, + " & echo pid:$! > " + producerLogPath + "/entity_" + entityId + "_pid'"] + cmdStr = " ".join(cmdList) logger.debug("executing command: [" + cmdStr + "]", extra=d) @@ -653,21 +890,30 @@ for line in subproc.stdout.readlines(): pass # dummy loop to wait until producer is completed else: + testcaseEnv.numProducerThreadsRunning -= 1 + logger.debug("testcaseEnv.numProducerThreadsRunning : " + str(testcaseEnv.numProducerThreadsRunning), extra=d) testcaseEnv.lock.release() break counter += 1 + logger.debug("calling testcaseEnv.lock.release()", extra=d) testcaseEnv.lock.release() time.sleep(int(producerSleepSec)) - # let the main testcase know producer has stopped - testcaseEnv.lock.acquire() - testcaseEnv.userDefinedEnvVarDict["backgroundProducerStopped"] = True - time.sleep(1) - testcaseEnv.lock.release() - time.sleep(1) + # wait until other producer threads also stops and + # let the main testcase know all producers have stopped + while 1: + testcaseEnv.lock.acquire() + time.sleep(1) + if testcaseEnv.numProducerThreadsRunning == 0: + testcaseEnv.userDefinedEnvVarDict["backgroundProducerStopped"] = True + testcaseEnv.lock.release() + break + else: + logger.debug("waiting for TRUE of testcaseEnv.userDefinedEnvVarDict['backgroundProducerStopped']", extra=d) + testcaseEnv.lock.release() + time.sleep(1) - def stop_remote_entity(systemTestEnv, entityId, parentPid): clusterEntityConfigDictList = systemTestEnv.clusterEntityConfigDictList @@ -676,8 +922,6 @@ logger.debug("terminating process id: " + parentPid + " in host: " + hostname, extra=d) system_test_utils.sigterm_remote_process(hostname, pidStack) -# time.sleep(1) -# system_test_utils.sigkill_remote_process(hostname, pidStack) def force_stop_remote_entity(systemTestEnv, entityId, parentPid): @@ -694,34 +938,48 @@ clusterEntityConfigDictList = systemTestEnv.clusterEntityConfigDictList prodPerfCfgList = system_test_utils.get_dict_from_list_of_dicts(clusterEntityConfigDictList, "role", "producer_performance") - prodPerfCfgDict = system_test_utils.get_dict_from_list_of_dicts(testcaseEnv.testcaseConfigsList, "entity_id", prodPerfCfgList[0]["entity_id"]) - prodTopicList = prodPerfCfgDict[0]["topic"].split(',') - zkEntityId = system_test_utils.get_data_by_lookup_keyval(clusterEntityConfigDictList, "role", "zookeeper", "entity_id") - zkHost = system_test_utils.get_data_by_lookup_keyval(clusterEntityConfigDictList, "role", "zookeeper", "hostname") - kafkaHome = system_test_utils.get_data_by_lookup_keyval(clusterEntityConfigDictList, "entity_id", zkEntityId, "kafka_home") - javaHome = system_test_utils.get_data_by_lookup_keyval(clusterEntityConfigDictList, "entity_id", zkEntityId, "java_home") - createTopicBin = kafkaHome + "/bin/kafka-create-topic.sh" + for prodPerfCfg in prodPerfCfgList: + topic = system_test_utils.get_data_by_lookup_keyval(testcaseEnv.testcaseConfigsList, "entity_id", prodPerfCfg["entity_id"], "topic") + zkEntityId = system_test_utils.get_data_by_lookup_keyval(clusterEntityConfigDictList, "role", "zookeeper", "entity_id") + zkHost = system_test_utils.get_data_by_lookup_keyval(clusterEntityConfigDictList, "role", "zookeeper", "hostname") + kafkaHome = system_test_utils.get_data_by_lookup_keyval(clusterEntityConfigDictList, "entity_id", zkEntityId, "kafka_home") + javaHome = system_test_utils.get_data_by_lookup_keyval(clusterEntityConfigDictList, "entity_id", zkEntityId, "java_home") + createTopicBin = kafkaHome + "/bin/kafka-create-topic.sh" - logger.info("zkEntityId : " + zkEntityId, extra=d) - logger.info("createTopicBin : " + createTopicBin, extra=d) + logger.debug("zkEntityId : " + zkEntityId, extra=d) + logger.debug("createTopicBin : " + createTopicBin, extra=d) - for topic in prodTopicList: - logger.info("creating topic: [" + topic + "] at: [" + testcaseEnv.userDefinedEnvVarDict["zkConnectStr"] + "]", extra=d) - cmdList = ["ssh " + zkHost, - "'JAVA_HOME=" + javaHome, - createTopicBin, - " --topic " + topic, - " --zookeeper " + testcaseEnv.userDefinedEnvVarDict["zkConnectStr"], - " --replica " + testcaseEnv.testcaseArgumentsDict["replica_factor"], - " --partition " + testcaseEnv.testcaseArgumentsDict["num_partition"] + " &> ", - testcaseEnv.testCaseBaseDir + "/logs/create_topic.log'"] + if len(testcaseEnv.userDefinedEnvVarDict["sourceZkConnectStr"]) > 0: + logger.info("creating topic: [" + topic + "] at: [" + testcaseEnv.userDefinedEnvVarDict["sourceZkConnectStr"] + "]", extra=d) + cmdList = ["ssh " + zkHost, + "'JAVA_HOME=" + javaHome, + createTopicBin, + " --topic " + topic, + " --zookeeper " + testcaseEnv.userDefinedEnvVarDict["sourceZkConnectStr"], + " --replica " + testcaseEnv.testcaseArgumentsDict["replica_factor"], + " --partition " + testcaseEnv.testcaseArgumentsDict["num_partition"] + " >> ", + testcaseEnv.testCaseBaseDir + "/logs/create_source_cluster_topic.log'"] - cmdStr = " ".join(cmdList) - logger.debug("executing command: [" + cmdStr + "]", extra=d) - subproc = system_test_utils.sys_call_return_subproc(cmdStr) + cmdStr = " ".join(cmdList) + logger.debug("executing command: [" + cmdStr + "]", extra=d) + subproc = system_test_utils.sys_call_return_subproc(cmdStr) + if len(testcaseEnv.userDefinedEnvVarDict["targetZkConnectStr"]) > 0: + logger.info("creating topic: [" + topic + "] at: [" + testcaseEnv.userDefinedEnvVarDict["targetZkConnectStr"] + "]", extra=d) + cmdList = ["ssh " + zkHost, + "'JAVA_HOME=" + javaHome, + createTopicBin, + " --topic " + topic, + " --zookeeper " + testcaseEnv.userDefinedEnvVarDict["targetZkConnectStr"], + " --replica " + testcaseEnv.testcaseArgumentsDict["replica_factor"], + " --partition " + testcaseEnv.testcaseArgumentsDict["num_partition"] + " >> ", + testcaseEnv.testCaseBaseDir + "/logs/create_target_cluster_topic.log'"] + cmdStr = " ".join(cmdList) + logger.debug("executing command: [" + cmdStr + "]", extra=d) + subproc = system_test_utils.sys_call_return_subproc(cmdStr) + def get_message_id(logPathName): logLines = open(logPathName, "r").readlines() messageIdList = [] @@ -735,52 +993,88 @@ return messageIdList +def get_message_checksum(logPathName): + logLines = open(logPathName, "r").readlines() + messageChecksumList = [] + for line in logLines: + if not "checksum:" in line: + continue + else: + matchObj = re.match('.*checksum:(\d*?).*', line) + if matchObj is not None: + messageChecksumList.append( matchObj.group(1) ) + else: + logger.error("unexpected log line : " + line, extra=d) + + return messageChecksumList + + def validate_data_matched(systemTestEnv, testcaseEnv): validationStatusDict = testcaseEnv.validationStatusDict clusterEntityConfigDictList = systemTestEnv.clusterEntityConfigDictList - producerEntityId = system_test_utils.get_data_by_lookup_keyval( \ - clusterEntityConfigDictList, "role", "producer_performance", "entity_id") - consumerEntityId = system_test_utils.get_data_by_lookup_keyval( \ + prodPerfCfgList = system_test_utils.get_dict_from_list_of_dicts(clusterEntityConfigDictList, "role", "producer_performance") + consumerCfgList = system_test_utils.get_dict_from_list_of_dicts(clusterEntityConfigDictList, "role", "console_consumer") + + for prodPerfCfg in prodPerfCfgList: + producerEntityId = prodPerfCfg["entity_id"] + topic = system_test_utils.get_data_by_lookup_keyval(testcaseEnv.testcaseConfigsList, "entity_id", producerEntityId, "topic") + + consumerEntityIdList = system_test_utils.get_data_from_list_of_dicts( \ clusterEntityConfigDictList, "role", "console_consumer", "entity_id") - msgIdMissingInConsumerLogPathName = get_testcase_config_log_dir_pathname( \ - testcaseEnv, "console_consumer", consumerEntityId, "default") + \ - "/msg_id_missing_in_consumer.log" - producerMsgIdList = get_message_id(testcaseEnv.userDefinedEnvVarDict["producerLogPathName"]) - consumerMsgIdList = get_message_id(testcaseEnv.userDefinedEnvVarDict["consumerLogPathName"]) - producerMsgIdSet = set(producerMsgIdList) - consumerMsgIdSet = set(consumerMsgIdList) + matchingConsumerEntityId = None + for consumerEntityId in consumerEntityIdList: + consumerTopic = system_test_utils.get_data_by_lookup_keyval(testcaseEnv.testcaseConfigsList, "entity_id", consumerEntityId, "topic") + if consumerTopic in topic: + matchingConsumerEntityId = consumerEntityId + break - missingMsgIdInConsumer = producerMsgIdSet - consumerMsgIdSet + if matchingConsumerEntityId is None: + break - outfile = open(msgIdMissingInConsumerLogPathName, "w") - for id in missingMsgIdInConsumer: - outfile.write(id + "\n") - outfile.close() + msgIdMissingInConsumerLogPathName = get_testcase_config_log_dir_pathname( \ + testcaseEnv, "console_consumer", matchingConsumerEntityId, "default") + "/msg_id_missing_in_consumer.log" + producerLogPath = get_testcase_config_log_dir_pathname(testcaseEnv, "producer_performance", producerEntityId, "default") + producerLogPathName = producerLogPath + "/producer_performance.log" - logger.info("no. of unique messages sent from publisher : " + str(len(producerMsgIdSet)), extra=d) - logger.info("no. of unique messages received by consumer : " + str(len(consumerMsgIdSet)), extra=d) - validationStatusDict["Unique messages from producer"] = str(len(producerMsgIdSet)) - validationStatusDict["Unique messages from consumer"] = str(len(consumerMsgIdSet)) + consumerLogPath = get_testcase_config_log_dir_pathname(testcaseEnv, "console_consumer", matchingConsumerEntityId, "default") + consumerLogPathName = consumerLogPath + "/console_consumer.log" - if ( len(missingMsgIdInConsumer) == 0 and len(producerMsgIdSet) > 0 ): - validationStatusDict["Validate for data matched"] = "PASSED" - return True - else: - validationStatusDict["Validate for data matched"] = "FAILED" - logger.info("See " + msgIdMissingInConsumerLogPathName + " for missing MessageID", extra=d) - return False - + producerMsgIdList = get_message_id(producerLogPathName) + consumerMsgIdList = get_message_id(consumerLogPathName) + producerMsgIdSet = set(producerMsgIdList) + consumerMsgIdSet = set(consumerMsgIdList) + missingMsgIdInConsumer = producerMsgIdSet - consumerMsgIdSet + + outfile = open(msgIdMissingInConsumerLogPathName, "w") + for id in missingMsgIdInConsumer: + outfile.write(id + "\n") + outfile.close() + + logger.info("no. of unique messages on topic [" + topic + "] sent from publisher : " + str(len(producerMsgIdSet)), extra=d) + logger.info("no. of unique messages on topic [" + topic + "] received by consumer : " + str(len(consumerMsgIdSet)), extra=d) + validationStatusDict["Unique messages from producer on [" + topic + "]"] = str(len(producerMsgIdSet)) + validationStatusDict["Unique messages from consumer on [" + topic + "]"] = str(len(consumerMsgIdSet)) + + if ( len(missingMsgIdInConsumer) == 0 and len(producerMsgIdSet) > 0 ): + validationStatusDict["Validate for data matched on topic [" + topic + "]"] = "PASSED" + #return True + else: + validationStatusDict["Validate for data matched on topic [" + topic + "]"] = "FAILED" + logger.info("See " + msgIdMissingInConsumerLogPathName + " for missing MessageID", extra=d) + #return False + + def validate_leader_election_successful(testcaseEnv, leaderDict, validationStatusDict): if ( len(leaderDict) > 0 ): try: leaderBrokerId = leaderDict["brokerid"] leaderEntityId = leaderDict["entity_id"] - leaderPid = testcaseEnv.entityParentPidDict[leaderEntityId] + leaderPid = testcaseEnv.entityBrokerParentPidDict[leaderEntityId] hostname = leaderDict["hostname"] logger.info("found leader in entity [" + leaderEntityId + "] with brokerid [" + \ @@ -888,8 +1182,7 @@ system_test_utils.sys_call(cmdStr) - -def get_reelection_latency(systemTestEnv, testcaseEnv, leaderDict): +def get_reelection_latency(systemTestEnv, testcaseEnv, leaderDict, leaderAttributesDict): leaderEntityId = None leaderBrokerId = None leaderPPid = None @@ -897,14 +1190,13 @@ if testcaseEnv.validationStatusDict["Validate leader election successful"] == "FAILED": # leader election is not successful - something is wrong => so skip this testcase - #continue return None else: # leader elected => stop leader try: leaderEntityId = leaderDict["entity_id"] leaderBrokerId = leaderDict["brokerid"] - leaderPPid = testcaseEnv.entityParentPidDict[leaderEntityId] + leaderPPid = testcaseEnv.entityBrokerParentPidDict[leaderEntityId] except: logger.info("leader details unavailable", extra=d) raise @@ -912,22 +1204,260 @@ logger.info("stopping leader in entity "+leaderEntityId+" with pid "+leaderPPid, extra=d) stop_remote_entity(systemTestEnv, leaderEntityId, leaderPPid) - logger.info("sleeping for 5s for leader re-election to complete", extra=d) - time.sleep(5) + logger.info("sleeping for 10s for leader re-election to complete", extra=d) + time.sleep(10) # get broker shut down completed timestamp - shutdownBrokerDict = get_broker_shutdown_log_line(systemTestEnv, testcaseEnv) - #print shutdownBrokerDict + shutdownBrokerDict = get_broker_shutdown_log_line(systemTestEnv, testcaseEnv, leaderAttributesDict) logger.debug("unix timestamp of shut down completed: " + str("{0:.6f}".format(shutdownBrokerDict["timestamp"])), extra=d) - logger.debug("looking up new leader", extra=d) - leaderDict2 = get_leader_elected_log_line(systemTestEnv, testcaseEnv) - #print leaderDict2 + logger.info("looking up new leader", extra=d) + + leaderDict2 = get_leader_elected_log_line(systemTestEnv, testcaseEnv, leaderAttributesDict) logger.debug("unix timestamp of new elected leader: " + str("{0:.6f}".format(leaderDict2["timestamp"])), extra=d) + leaderReElectionLatency = float(leaderDict2["timestamp"]) - float(shutdownBrokerDict["timestamp"]) logger.info("leader Re-election Latency: " + str(leaderReElectionLatency) + " sec", extra=d) - #testcaseEnv.validationStatusDict["Leader Election Latency"] = str("{0:.2f}".format(leaderReElectionLatency * 1000)) + " ms" return leaderReElectionLatency +def validate_broker_log_segment_checksum(systemTestEnv, testcaseEnv): + anonLogger.info("================================================") + anonLogger.info("validating broker log segment checksums") + anonLogger.info("================================================") + + # brokerLogCksumDict - + # a dictionary to keep track of log segment files in each brokers and will look like this: + # + # {u'broker-1': {'test_1-0/00000000000000000000.kafka': '91500855', + # 'test_1-0/00000000000000010255.kafka': '1906285795', + # 'test_1-1/00000000000000000000.kafka': '3785861722', + # 'test_1-1/00000000000000010322.kafka': '1731628308'}, + # u'broker-2': {'test_1-0/00000000000000000000.kafka': '91500855', + # 'test_1-0/00000000000000010255.kafka': '1906285795', + # 'test_1-1/00000000000000000000.kafka': '3785861722', + # 'test_1-1/00000000000000010322.kafka': '1731628308'}, + # u'broker-3': {'test_1-0/00000000000000000000.kafka': '91500855', + # 'test_1-0/00000000000000010255.kafka': '1906285795', + # 'test_1-1/00000000000000000000.kafka': '3431356313'}} + brokerLogCksumDict = {} + + clusterEntityConfigDictList = systemTestEnv.clusterEntityConfigDictList + brokerEntityIdList = system_test_utils.get_data_from_list_of_dicts(clusterEntityConfigDictList, "role", "broker", "entity_id") + + # access all brokers' hosts to get broker id and its corresponding log + # segment file checksums and populate brokerLogCksumDict + for brokerEntityId in brokerEntityIdList: + logCksumDict = {} + + hostname = system_test_utils.get_data_by_lookup_keyval(clusterEntityConfigDictList, "entity_id", brokerEntityId, "hostname") + logDir = system_test_utils.get_data_by_lookup_keyval(testcaseEnv.testcaseConfigsList, "entity_id", brokerEntityId, "log.dir") + + # get the log segment file full path name + cmdStr = "ssh " + hostname + " \"find " + logDir + " -name '*.log'\" 2> /dev/null" + logger.debug("executing command [" + cmdStr + "]", extra=d) + subproc = system_test_utils.sys_call_return_subproc(cmdStr) + for line in subproc.stdout.readlines(): + # Need a key to identify each corresponding log segment file in different brokers: + # This can be achieved by using part of the full log segment file path as a key to identify + # the individual log segment file checksums. The key can be extracted from the path name + # and starting from "topic-partition" such as: + # full log segment path name : /tmp/kafka_server_1_logs/test_1-0/00000000000000010255.kafka + # part of the path name as key : test_1-0/00000000000000010255.kafka + logSegmentPathName = line.rstrip('\n') + substrIndex = logSegmentPathName.index(logDir) + len(logDir + "/") + logSegmentFile = logSegmentPathName[substrIndex:] + + # get log segment file checksum + cksumCmdStr = "ssh " + hostname + " \"cksum " + logSegmentPathName + " | cut -f1 -d ' '\" 2> /dev/null" + subproc2 = system_test_utils.sys_call_return_subproc(cksumCmdStr) + for line2 in subproc2.stdout.readlines(): + checksum = line2.rstrip('\n') + # use logSegmentFile as a key to associate with its checksum + logCksumDict[logSegmentFile] = checksum + + # associate this logCksumDict with its broker id + brokerLogCksumDict["broker-"+brokerEntityId] = logCksumDict + + # use a list of sets for checksums comparison + sets = [] + for brokerId, logCksumDict in brokerLogCksumDict.items(): + sets.append( set(logCksumDict.items()) ) + + # looping through the sets and compare between broker[n] & broker[n+1] ... + idx = 0 + diffItemSet = None + while idx < len(sets) - 1: + diffItemSet = sets[idx] ^ sets[idx + 1] + + if (len(diffItemSet) > 0): + logger.error("Mismatch found : " + str(diffItemSet), extra=d) + testcaseEnv.validationStatusDict["Log segment checksum matching across all replicas"] = "FAILED" + + # get the mismatched items key, i.e. the log segment file name + diffItemList = list(diffItemSet) + diffItemKeys = [] + for keyvalSet in diffItemList: + keyvalList = list(keyvalSet) + diffItemKeys.append(keyvalList[0]) + + # mismatch found - so print out the whole log segment file checksum + # info with the mismatched checksum highlighted + for brokerId in sorted(brokerLogCksumDict.iterkeys()): + logCksumDict = brokerLogCksumDict[brokerId] + print brokerId,":" + for logSegmentFile in sorted(logCksumDict.iterkeys()): + checksum = logCksumDict[logSegmentFile] + sys.stdout.write(logSegmentFile + " => " + checksum) + try: + if diffItemKeys.index(logSegmentFile) >= 0: + sys.stdout.write(" <<<< not matching across all replicas") + except: + pass + print + print + return + idx += 1 + + # getting here means all log segment checksums matched + testcaseEnv.validationStatusDict["Log segment checksum matching across all replicas"] = "PASSED" + + anonLogger.info("log segment files checksum :") + print + pprint.pprint(brokerLogCksumDict) + print + +def stop_all_remote_running_processes(systemTestEnv, testcaseEnv): + + entityConfigs = systemTestEnv.clusterEntityConfigDictList + + for hostname, producerPPid in testcaseEnv.producerHostParentPidDict.items(): + producerEntityId = system_test_utils.get_data_by_lookup_keyval(entityConfigs, "hostname", hostname, "entity_id") + stop_remote_entity(systemTestEnv, producerEntityId, producerPPid) + + for hostname, consumerPPid in testcaseEnv.consumerHostParentPidDict.items(): + consumerEntityId = system_test_utils.get_data_by_lookup_keyval(entityConfigs, "hostname", hostname, "entity_id") + stop_remote_entity(systemTestEnv, consumerEntityId, consumerPPid) + + for entityId, jmxParentPidList in testcaseEnv.entityJmxParentPidDict.items(): + for jmxParentPid in jmxParentPidList: + stop_remote_entity(systemTestEnv, entityId, jmxParentPid) + + for entityId, mirrorMakerParentPid in testcaseEnv.entityMirrorMakerParentPidDict.items(): + stop_remote_entity(systemTestEnv, entityId, mirrorMakerParentPid) + + for entityId, brokerParentPid in testcaseEnv.entityBrokerParentPidDict.items(): + stop_remote_entity(systemTestEnv, entityId, brokerParentPid) + + for entityId, zkParentPid in testcaseEnv.entityZkParentPidDict.items(): + stop_remote_entity(systemTestEnv, entityId, zkParentPid) + + +def start_migration_tool(systemTestEnv, testcaseEnv): + clusterConfigList = systemTestEnv.clusterEntityConfigDictList + migrationToolConfigList = system_test_utils.get_dict_from_list_of_dicts(clusterConfigList, "role", "migration_tool") + + migrationToolConfig = migrationToolConfigList[0] + host = migrationToolConfig["hostname"] + entityId = migrationToolConfig["entity_id"] + jmxPort = migrationToolConfig["jmx_port"] + role = migrationToolConfig["role"] + kafkaHome = system_test_utils.get_data_by_lookup_keyval(clusterConfigList, "entity_id", entityId, "kafka_home") + javaHome = system_test_utils.get_data_by_lookup_keyval(clusterConfigList, "entity_id", entityId, "java_home") + jmxPort = system_test_utils.get_data_by_lookup_keyval(clusterConfigList, "entity_id", entityId, "jmx_port") + kafkaRunClassBin = kafkaHome + "/bin/kafka-run-class.sh" + + logger.info("starting kafka migration tool", extra=d) + migrationToolLogPath = get_testcase_config_log_dir_pathname(testcaseEnv, "migration_tool", entityId, "default") + migrationToolLogPathName = migrationToolLogPath + "/migration_tool.log" + testcaseEnv.userDefinedEnvVarDict["migrationToolLogPathName"] = migrationToolLogPathName + + testcaseConfigsList = testcaseEnv.testcaseConfigsList + numProducers = system_test_utils.get_data_by_lookup_keyval(testcaseConfigsList, "entity_id", entityId, "num.producers") + numStreams = system_test_utils.get_data_by_lookup_keyval(testcaseConfigsList, "entity_id", entityId, "num.streams") + producerConfig = system_test_utils.get_data_by_lookup_keyval(testcaseConfigsList, "entity_id", entityId, "producer.config") + consumerConfig = system_test_utils.get_data_by_lookup_keyval(testcaseConfigsList, "entity_id", entityId, "consumer.config") + zkClientJar = system_test_utils.get_data_by_lookup_keyval(testcaseConfigsList, "entity_id", entityId, "zkclient.01.jar") + kafka07Jar = system_test_utils.get_data_by_lookup_keyval(testcaseConfigsList, "entity_id", entityId, "kafka.07.jar") + whiteList = system_test_utils.get_data_by_lookup_keyval(testcaseConfigsList, "entity_id", entityId, "whitelist") + logFile = system_test_utils.get_data_by_lookup_keyval(testcaseConfigsList, "entity_id", entityId, "log_filename") + + cmdList = ["ssh " + host, + "'JAVA_HOME=" + javaHome, + "JMX_PORT=" + jmxPort, + kafkaRunClassBin + " kafka.tools.KafkaMigrationTool", + "--whitelist=" + whiteList, + "--num.producers=" + numProducers, + "--num.streams=" + numStreams, + "--producer.config=" + systemTestEnv.SYSTEM_TEST_BASE_DIR + "/" + producerConfig, + "--consumer.config=" + systemTestEnv.SYSTEM_TEST_BASE_DIR + "/" + consumerConfig, + "--zkclient.01.jar=" + systemTestEnv.SYSTEM_TEST_BASE_DIR + "/" + zkClientJar, + "--kafka.07.jar=" + systemTestEnv.SYSTEM_TEST_BASE_DIR + "/" + kafka07Jar, + " &> " + migrationToolLogPath + "/migrationTool.log", + " & echo pid:$! > " + migrationToolLogPath + "/entity_" + entityId + "_pid'"] + + cmdStr = " ".join(cmdList) + logger.debug("executing command: [" + cmdStr + "]", extra=d) + system_test_utils.async_sys_call(cmdStr) + +def validate_07_08_migrated_data_matched(systemTestEnv, testcaseEnv): + validationStatusDict = testcaseEnv.validationStatusDict + clusterEntityConfigDictList = systemTestEnv.clusterEntityConfigDictList + + prodPerfCfgList = system_test_utils.get_dict_from_list_of_dicts(clusterEntityConfigDictList, "role", "producer_performance") + consumerCfgList = system_test_utils.get_dict_from_list_of_dicts(clusterEntityConfigDictList, "role", "console_consumer") + + for prodPerfCfg in prodPerfCfgList: + producerEntityId = prodPerfCfg["entity_id"] + topic = system_test_utils.get_data_by_lookup_keyval(testcaseEnv.testcaseConfigsList, "entity_id", producerEntityId, "topic") + + consumerEntityIdList = system_test_utils.get_data_from_list_of_dicts( + clusterEntityConfigDictList, "role", "console_consumer", "entity_id") + + matchingConsumerEntityId = None + for consumerEntityId in consumerEntityIdList: + consumerTopic = system_test_utils.get_data_by_lookup_keyval( + testcaseEnv.testcaseConfigsList, "entity_id", consumerEntityId, "topic") + if consumerTopic in topic: + matchingConsumerEntityId = consumerEntityId + break + + if matchingConsumerEntityId is None: + break + + msgChecksumMissingInConsumerLogPathName = get_testcase_config_log_dir_pathname( + testcaseEnv, "console_consumer", matchingConsumerEntityId, "default") \ + + "/msg_checksum_missing_in_consumer.log" + producerLogPath = get_testcase_config_log_dir_pathname(testcaseEnv, "producer_performance", producerEntityId, "default") + producerLogPathName = producerLogPath + "/producer_performance.log" + + consumerLogPath = get_testcase_config_log_dir_pathname(testcaseEnv, "console_consumer", matchingConsumerEntityId, "default") + consumerLogPathName = consumerLogPath + "/console_consumer.log" + + producerMsgChecksumList = get_message_checksum(producerLogPathName) + consumerMsgChecksumList = get_message_checksum(consumerLogPathName) + producerMsgChecksumSet = set(producerMsgChecksumList) + consumerMsgChecksumSet = set(consumerMsgChecksumList) + + missingMsgChecksumInConsumer = producerMsgChecksumSet - consumerMsgChecksumSet + + outfile = open(msgChecksumMissingInConsumerLogPathName, "w") + for id in missingMsgChecksumInConsumer: + outfile.write(id + "\n") + outfile.close() + + logger.info("no. of unique messages on topic [" + topic + "] sent from publisher : " + str(len(producerMsgChecksumList)), extra=d) + logger.info("no. of unique messages on topic [" + topic + "] received by consumer : " + str(len(consumerMsgChecksumList)), extra=d) + validationStatusDict["Unique messages from producer on [" + topic + "]"] = str(len(producerMsgChecksumList)) + validationStatusDict["Unique messages from consumer on [" + topic + "]"] = str(len(consumerMsgChecksumList)) + + if ( len(missingMsgChecksumInConsumer) == 0 and len(producerMsgChecksumList) > 0 ): + validationStatusDict["Validate for data matched on topic [" + topic + "]"] = "PASSED" + #return True + else: + validationStatusDict["Validate for data matched on topic [" + topic + "]"] = "FAILED" + logger.info("See " + msgChecksumMissingInConsumerLogPathName + " for missing MessageID", extra=d) + #return False + + Index: system_test/utils/setup_utils.py =================================================================== --- system_test/utils/setup_utils.py (revision 1396332) +++ system_test/utils/setup_utils.py (working copy) @@ -16,11 +16,16 @@ # under the License. #!/usr/bin/env python +# ================================================================= +# setup_utils.py +# - This module provides some basic helper functions. +# ================================================================= + import logging import kafka_system_test_utils import sys -class SetupUtils(): +class SetupUtils(object): # dict to pass user-defined attributes to logger argument: "extra" # to use: just update "thisClassName" to the appropriate value @@ -32,9 +37,8 @@ def __init__(self): d = {'name_of_class': self.__class__.__name__} - self.logger.info("constructor", extra=SetUpUtils.d) + self.logger.debug("#### constructor inside SetupUtils", extra=self.d) - def log_message(self, message): print self.anonLogger.info("======================================================") Index: system_test/utils/system_test_utils.py =================================================================== --- system_test/utils/system_test_utils.py (revision 1396332) +++ system_test/utils/system_test_utils.py (working copy) @@ -20,6 +20,7 @@ # system_test_utils.py # =================================== +import copy import inspect import json import logging @@ -31,7 +32,8 @@ import sys import time -logger = logging.getLogger("namedLogger") +logger = logging.getLogger("namedLogger") +aLogger = logging.getLogger("anonymousLogger") thisClassName = '(system_test_utils)' d = {'name_of_class': thisClassName} @@ -319,6 +321,14 @@ def setup_remote_hosts(systemTestEnv): + # sanity check on remote hosts to make sure: + # - all directories (eg. java_home) specified in cluster_config.json exists in all hosts + # - no conflicting running processes in remote hosts + + aLogger.info("=================================================") + aLogger.info("setting up remote hosts ...") + aLogger.info("=================================================") + clusterEntityConfigDictList = systemTestEnv.clusterEntityConfigDictList localKafkaHome = os.path.abspath(systemTestEnv.SYSTEM_TEST_BASE_DIR + "/..") @@ -353,19 +363,19 @@ kafkaHome = clusterEntityConfigDict["kafka_home"] javaHome = clusterEntityConfigDict["java_home"] - logger.info("checking java binary [" + localJavaBin + "] in host [" + hostname + "]", extra=d) + logger.debug("checking java binary [" + localJavaBin + "] in host [" + hostname + "]", extra=d) if not remote_host_directory_exists(hostname, javaHome): logger.error("Directory not found: [" + javaHome + "] in host [" + hostname + "]", extra=d) return False - logger.info("checking directory [" + kafkaHome + "] in host [" + hostname + "]", extra=d) + logger.debug("checking directory [" + kafkaHome + "] in host [" + hostname + "]", extra=d) if not remote_host_directory_exists(hostname, kafkaHome): logger.info("Directory not found: [" + kafkaHome + "] in host [" + hostname + "]", extra=d) if hostname == "localhost": return False else: localKafkaSourcePath = systemTestEnv.SYSTEM_TEST_BASE_DIR + "/.." - logger.info("copying local copy of [" + localKafkaSourcePath + "] to " + hostname + ":" + kafkaHome, extra=d) + logger.debug("copying local copy of [" + localKafkaSourcePath + "] to " + hostname + ":" + kafkaHome, extra=d) copy_source_to_remote_hosts(hostname, localKafkaSourcePath, kafkaHome) return True @@ -385,16 +395,135 @@ if remote_host_file_exists(hostname, kafkaHome + "/bin/kafka-run-class.sh"): cmdStr = "ssh " + hostname + " 'chmod -R 777 " + kafkaHome + "'" logger.info("executing command [" + cmdStr + "]", extra=d) - system_test_utils.sys_call(cmdStr) + sys_call(cmdStr) cmdStr = "ssh " + hostname + " 'rm -rf " + kafkaHome + "'" logger.info("executing command [" + cmdStr + "]", extra=d) - #system_test_utils.sys_call(cmdStr) + #sys_call(cmdStr) else: logger.warn("possible destructive command [" + cmdStr + "]", extra=d) logger.warn("check config file: system_test/cluster_config.properties", extra=d) logger.warn("aborting test...", extra=d) sys.exit(1) +def get_md5_for_file(filePathName, blockSize=8192): + md5 = hashlib.md5() + f = open(filePathName, 'rb') + while True: + data = f.read(blockSize) + if not data: + break + md5.update(data) + return md5.digest() +def load_cluster_config(clusterConfigPathName, clusterEntityConfigDictList): + # empty the list + clusterEntityConfigDictList[:] = [] + + # retrieve each entity's data from cluster config json file + # as "dict" and enter them into a "list" + jsonFileContent = open(clusterConfigPathName, "r").read() + jsonData = json.loads(jsonFileContent) + for key, cfgList in jsonData.items(): + if key == "cluster_config": + for cfg in cfgList: + clusterEntityConfigDictList.append(cfg) + +def setup_remote_hosts_with_testcase_level_cluster_config(systemTestEnv, testCasePathName): + # ======================================================================= + # starting a new testcase, check for local cluster_config.json + # ======================================================================= + # 1. if there is a xxxx_testsuite/testcase_xxxx/cluster_config.json + # => load it into systemTestEnv.clusterEntityConfigDictList + # 2. if there is NO testcase_xxxx/cluster_config.json but has a xxxx_testsuite/cluster_config.json + # => retore systemTestEnv.clusterEntityConfigDictListLastFoundInTestSuite + # 3. if there is NO testcase_xxxx/cluster_config.json NOR xxxx_testsuite/cluster_config.json + # => restore system_test/cluster_config.json + + testCaseLevelClusterConfigPathName = testCasePathName + "/cluster_config.json" + + if os.path.isfile(testCaseLevelClusterConfigPathName): + # if there is a cluster_config.json in this directory, load it and use it for this testsuite + logger.info("found a new cluster_config : " + testCaseLevelClusterConfigPathName, extra=d) + + # empty the current cluster config list + systemTestEnv.clusterEntityConfigDictList[:] = [] + + # load the cluster config for this testcase level + load_cluster_config(testCaseLevelClusterConfigPathName, systemTestEnv.clusterEntityConfigDictList) + + # back up this testcase level cluster config + systemTestEnv.clusterEntityConfigDictListLastFoundInTestCase = copy.deepcopy(systemTestEnv.clusterEntityConfigDictList) + + elif len(systemTestEnv.clusterEntityConfigDictListLastFoundInTestSuite) > 0: + # if there is NO testcase_xxxx/cluster_config.json, but has a xxxx_testsuite/cluster_config.json + # => restore the config in xxxx_testsuite/cluster_config.json + + # empty the current cluster config list + systemTestEnv.clusterEntityConfigDictList[:] = [] + + # restore the system_test/cluster_config.json + systemTestEnv.clusterEntityConfigDictList = copy.deepcopy(systemTestEnv.clusterEntityConfigDictListLastFoundInTestSuite) + + else: + # if there is NONE, restore the config in system_test/cluster_config.json + + # empty the current cluster config list + systemTestEnv.clusterEntityConfigDictList[:] = [] + + # restore the system_test/cluster_config.json + systemTestEnv.clusterEntityConfigDictList = copy.deepcopy(systemTestEnv.clusterEntityConfigDictListInSystemTestLevel) + + # set up remote hosts + if not setup_remote_hosts(systemTestEnv): + logger.error("Remote hosts sanity check failed. Aborting test ...", extra=d) + print + sys.exit(1) + print + +def setup_remote_hosts_with_testsuite_level_cluster_config(systemTestEnv, testModulePathName): + # ======================================================================= + # starting a new testsuite, check for local cluster_config.json: + # ======================================================================= + # 1. if there is a xxxx_testsuite/cluster_config.son + # => load it into systemTestEnv.clusterEntityConfigDictList + # 2. if there is NO xxxx_testsuite/cluster_config.son + # => restore system_test/cluster_config.json + + testSuiteLevelClusterConfigPathName = testModulePathName + "/cluster_config.json" + + if os.path.isfile(testSuiteLevelClusterConfigPathName): + # if there is a cluster_config.json in this directory, load it and use it for this testsuite + logger.info("found a new cluster_config : " + testSuiteLevelClusterConfigPathName, extra=d) + + # empty the current cluster config list + systemTestEnv.clusterEntityConfigDictList[:] = [] + + # load the cluster config for this testsuite level + load_cluster_config(testSuiteLevelClusterConfigPathName, systemTestEnv.clusterEntityConfigDictList) + + # back up this testsuite level cluster config + systemTestEnv.clusterEntityConfigDictListLastFoundInTestSuite = copy.deepcopy(systemTestEnv.clusterEntityConfigDictList) + + else: + # if there is NONE, restore the config in system_test/cluster_config.json + + # empty the last testsuite level cluster config list + systemTestEnv.clusterEntityConfigDictListLastFoundInTestSuite[:] = [] + + # empty the current cluster config list + systemTestEnv.clusterEntityConfigDictList[:] = [] + + # restore the system_test/cluster_config.json + systemTestEnv.clusterEntityConfigDictList = copy.deepcopy(systemTestEnv.clusterEntityConfigDictListInSystemTestLevel) + + # set up remote hosts + if not setup_remote_hosts(systemTestEnv): + logger.error("Remote hosts sanity check failed. Aborting test ...", extra=d) + print + sys.exit(1) + print + + + Index: system_test/utils/testcase_env.py =================================================================== --- system_test/utils/testcase_env.py (revision 1396332) +++ system_test/utils/testcase_env.py (working copy) @@ -25,18 +25,32 @@ import sys import thread +import system_test_utils + class TestcaseEnv(): # ================================ # Generic testcase environment # ================================ - # dictionary of entity_id to ppid for entities such as zookeepers & brokers + # dictionary of entity_id to ppid for Zookeeper entities # key: entity_id - # val: ppid of zk or broker associated to that entity_id + # val: ppid of Zookeeper associated to that entity_id # { 0: 12345, 1: 12389, ... } - entityParentPidDict = {} + entityZkParentPidDict = {} + # dictionary of entity_id to ppid for broker entities + # key: entity_id + # val: ppid of broker associated to that entity_id + # { 0: 12345, 1: 12389, ... } + entityBrokerParentPidDict = {} + + # dictionary of entity_id to ppid for mirror-maker entities + # key: entity_id + # val: ppid of broker associated to that entity_id + # { 0: 12345, 1: 12389, ... } + entityMirrorMakerParentPidDict = {} + # dictionary of entity_id to list of JMX ppid # key: entity_id # val: list of JMX ppid associated to that entity_id @@ -67,8 +81,8 @@ # gather the test case related info and add to an SystemTestEnv object self.testcaseResultsDict = {} - self.testcaseResultsDict["test_class_name"] = classInstance.__class__.__name__ - self.testcaseResultsDict["test_case_name"] = "" + self.testcaseResultsDict["_test_class_name"] = classInstance.__class__.__name__ + self.testcaseResultsDict["_test_case_name"] = "" self.validationStatusDict = {} self.testcaseResultsDict["validation_status"] = self.validationStatusDict self.systemTestEnv.systemTestResultsList.append(self.testcaseResultsDict) @@ -84,6 +98,8 @@ self.testCaseBaseDir = "" self.testCaseLogsDir = "" self.testCaseDashboardsDir = "" + self.testcasePropJsonPathName = "" + self.testcaseNonEntityDataDict = {} # ================================ # dictionary to keep track of @@ -103,4 +119,39 @@ # Lock object for producer threads synchronization self.lock = thread.allocate_lock() + self.numProducerThreadsRunning = 0 + def initWithKnownTestCasePathName(self, testCasePathName): + testcaseDirName = os.path.basename(testCasePathName) + self.testcaseResultsDict["_test_case_name"] = testcaseDirName + self.testCaseBaseDir = testCasePathName + self.testCaseLogsDir = self.testCaseBaseDir + "/logs" + self.testCaseDashboardsDir = self.testCaseBaseDir + "/dashboards" + + # find testcase properties json file + self.testcasePropJsonPathName = system_test_utils.get_testcase_prop_json_pathname(testCasePathName) + + # get the dictionary that contains the testcase arguments and description + self.testcaseNonEntityDataDict = system_test_utils.get_json_dict_data(self.testcasePropJsonPathName) + + def printTestCaseDescription(self, testcaseDirName): + testcaseDescription = "" + for k,v in self.testcaseNonEntityDataDict.items(): + if ( k == "description" ): + testcaseDescription = v + + print "\n" + print "=======================================================================================" + print "Test Case Name :", testcaseDirName + print "=======================================================================================" + print "Description :" + for step in sorted(testcaseDescription.iterkeys()): + print " ", step, ":", testcaseDescription[step] + print "=======================================================================================" + print "Test Case Args :" + for k,v in self.testcaseArgumentsDict.items(): + print " ", k, " : ", v + self.testcaseResultsDict["arg : " + k] = v + print "=======================================================================================" + + Index: system_test/utils/metrics.py =================================================================== --- system_test/utils/metrics.py (revision 1396332) +++ system_test/utils/metrics.py (working copy) @@ -239,7 +239,7 @@ system_test_utils.async_sys_call(startMetricsCommand) time.sleep(1) - pidCmdStr = "ssh " + jmxHost + " 'cat " + entityMetricsDir + "/entity_pid 2> /dev/null'" + pidCmdStr = "ssh " + jmxHost + " 'cat " + entityMetricsDir + "/entity_pid' 2> /dev/null" logger.debug("executing command: [" + pidCmdStr + "]", extra=d) subproc = system_test_utils.sys_call_return_subproc(pidCmdStr) Index: system_test/mirror_maker_testsuite/mirror_maker_test.py =================================================================== --- system_test/mirror_maker_testsuite/mirror_maker_test.py (revision 0) +++ system_test/mirror_maker_testsuite/mirror_maker_test.py (revision 0) @@ -0,0 +1,325 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +#!/usr/bin/env python + +# =================================== +# mirror_maker_test.py +# =================================== + +import inspect +import logging +import os +import signal +import subprocess +import sys +import time +import traceback + +from system_test_env import SystemTestEnv +sys.path.append(SystemTestEnv.SYSTEM_TEST_UTIL_DIR) + +from setup_utils import SetupUtils +from replication_utils import ReplicationUtils +import system_test_utils +from testcase_env import TestcaseEnv + +# product specific: Kafka +import kafka_system_test_utils +import metrics + +class MirrorMakerTest(ReplicationUtils, SetupUtils): + + testModuleAbsPathName = os.path.realpath(__file__) + testSuiteAbsPathName = os.path.abspath(os.path.dirname(testModuleAbsPathName)) + + def __init__(self, systemTestEnv): + + # SystemTestEnv - provides cluster level environment settings + # such as entity_id, hostname, kafka_home, java_home which + # are available in a list of dictionary named + # "clusterEntityConfigDictList" + self.systemTestEnv = systemTestEnv + + super(MirrorMakerTest, self).__init__(self) + + # dict to pass user-defined attributes to logger argument: "extra" + d = {'name_of_class': self.__class__.__name__} + + def signal_handler(self, signal, frame): + self.log_message("Interrupt detected - User pressed Ctrl+c") + + # perform the necessary cleanup here when user presses Ctrl+c and it may be product specific + self.log_message("stopping all entities - please wait ...") + kafka_system_test_utils.stop_all_remote_running_processes(self.systemTestEnv, self.testcaseEnv) + sys.exit(1) + + def runTest(self): + + # ====================================================================== + # get all testcase directories under this testsuite + # ====================================================================== + testCasePathNameList = system_test_utils.get_dir_paths_with_prefix( + self.testSuiteAbsPathName, SystemTestEnv.SYSTEM_TEST_CASE_PREFIX) + testCasePathNameList.sort() + + # ============================================================= + # launch each testcase one by one: testcase_1, testcase_2, ... + # ============================================================= + for testCasePathName in testCasePathNameList: + + skipThisTestCase = False + + try: + # ====================================================================== + # A new instance of TestcaseEnv to keep track of this testcase's env vars + # and initialize some env vars as testCasePathName is available now + # ====================================================================== + self.testcaseEnv = TestcaseEnv(self.systemTestEnv, self) + self.testcaseEnv.testSuiteBaseDir = self.testSuiteAbsPathName + self.testcaseEnv.initWithKnownTestCasePathName(testCasePathName) + self.testcaseEnv.testcaseArgumentsDict = self.testcaseEnv.testcaseNonEntityDataDict["testcase_args"] + + # ====================================================================== + # SKIP if this case is IN testcase_to_skip.json or NOT IN testcase_to_run.json + # ====================================================================== + testcaseDirName = self.testcaseEnv.testcaseResultsDict["_test_case_name"] + + if self.systemTestEnv.printTestDescriptionsOnly: + self.testcaseEnv.printTestCaseDescription(testcaseDirName) + continue + elif self.systemTestEnv.isTestCaseToSkip(self.__class__.__name__, testcaseDirName): + self.log_message("Skipping : " + testcaseDirName) + skipThisTestCase = True + continue + else: + self.testcaseEnv.printTestCaseDescription(testcaseDirName) + system_test_utils.setup_remote_hosts_with_testcase_level_cluster_config(self.systemTestEnv, testCasePathName) + + # ============================================================================== # + # ============================================================================== # + # Product Specific Testing Code Starts Here: # + # ============================================================================== # + # ============================================================================== # + + # initialize self.testcaseEnv with user-defined environment variables (product specific) + self.testcaseEnv.userDefinedEnvVarDict["zkConnectStr"] = "" + self.testcaseEnv.userDefinedEnvVarDict["stopBackgroundProducer"] = False + self.testcaseEnv.userDefinedEnvVarDict["backgroundProducerStopped"] = False + + # initialize signal handler + signal.signal(signal.SIGINT, self.signal_handler) + + + # create "LOCAL" log directories for metrics, dashboards for each entity under this testcase + # for collecting logs from remote machines + kafka_system_test_utils.generate_testcase_log_dirs(self.systemTestEnv, self.testcaseEnv) + + # TestcaseEnv.testcaseConfigsList initialized by reading testcase properties file: + # system_test/_testsuite/testcase_/testcase__properties.json + self.testcaseEnv.testcaseConfigsList = system_test_utils.get_json_list_data( + self.testcaseEnv.testcasePropJsonPathName) + + # TestcaseEnv - initialize producer & consumer config / log file pathnames + kafka_system_test_utils.init_entity_props(self.systemTestEnv, self.testcaseEnv) + + # clean up data directories specified in zookeeper.properties and kafka_server_.properties + kafka_system_test_utils.cleanup_data_at_remote_hosts(self.systemTestEnv, self.testcaseEnv) + + # generate remote hosts log/config dirs if not exist + kafka_system_test_utils.generate_testcase_log_dirs_in_remote_hosts(self.systemTestEnv, self.testcaseEnv) + + # generate properties files for zookeeper, kafka, producer, consumer and mirror-maker: + # 1. copy system_test/_testsuite/config/*.properties to + # system_test/_testsuite/testcase_/config/ + # 2. update all properties files in system_test/_testsuite/testcase_/config + # by overriding the settings specified in: + # system_test/_testsuite/testcase_/testcase__properties.json + kafka_system_test_utils.generate_overriden_props_files(self.testSuiteAbsPathName, + self.testcaseEnv, self.systemTestEnv) + + # ============================================= + # preparing all entities to start the test + # ============================================= + self.log_message("starting zookeepers") + kafka_system_test_utils.start_zookeepers(self.systemTestEnv, self.testcaseEnv) + self.anonLogger.info("sleeping for 2s") + time.sleep(2) + + self.log_message("starting brokers") + kafka_system_test_utils.start_brokers(self.systemTestEnv, self.testcaseEnv) + self.anonLogger.info("sleeping for 5s") + time.sleep(5) + + + self.log_message("starting mirror makers") + kafka_system_test_utils.start_mirror_makers(self.systemTestEnv, self.testcaseEnv) + self.anonLogger.info("sleeping for 5s") + time.sleep(5) + + #print "#### sleeping for 30 min" + #time.sleep(1800) + + self.log_message("creating topics") + kafka_system_test_utils.create_topic(self.systemTestEnv, self.testcaseEnv) + self.anonLogger.info("sleeping for 5s") + time.sleep(5) + + + # ============================================= + # starting producer + # ============================================= + self.log_message("starting producer in the background") + kafka_system_test_utils.start_producer_performance(self.systemTestEnv, self.testcaseEnv, False) + msgProducingFreeTimeSec = self.testcaseEnv.testcaseArgumentsDict["message_producing_free_time_sec"] + self.anonLogger.info("sleeping for " + msgProducingFreeTimeSec + " sec to produce some messages") + time.sleep(int(msgProducingFreeTimeSec)) + + # ============================================= + # A while-loop to bounce leader as specified + # by "num_iterations" in testcase_n_properties.json + # ============================================= + i = 1 + numIterations = int(self.testcaseEnv.testcaseArgumentsDict["num_iteration"]) + while i <= numIterations: + + self.log_message("Iteration " + str(i) + " of " + str(numIterations)) + + self.log_message("looking up leader") + leaderDict = kafka_system_test_utils.get_leader_elected_log_line( + self.systemTestEnv, self.testcaseEnv, self.leaderAttributesDict) + + # ========================== + # leaderDict looks like this: + # ========================== + #{'entity_id': u'3', + # 'partition': '0', + # 'timestamp': 1345050255.8280001, + # 'hostname': u'localhost', + # 'topic': 'test_1', + # 'brokerid': '3'} + + # ============================================= + # validate to see if leader election is successful + # ============================================= + self.log_message("validating leader election") + result = kafka_system_test_utils.validate_leader_election_successful( + self.testcaseEnv, leaderDict, self.testcaseEnv.validationStatusDict) + + # ============================================= + # trigger leader re-election by stopping leader + # to get re-election latency + # ============================================= + bounceLeaderFlag = self.testcaseEnv.testcaseArgumentsDict["bounce_leader"] + self.log_message("bounce_leader flag : " + bounceLeaderFlag) + if (bounceLeaderFlag.lower() == "true"): + reelectionLatency = kafka_system_test_utils.get_reelection_latency( + self.systemTestEnv, self.testcaseEnv, leaderDict, self.leaderAttributesDict) + latencyKeyName = "Leader Election Latency - iter " + str(i) + " brokerid " + leaderDict["brokerid"] + self.testcaseEnv.validationStatusDict[latencyKeyName] = str("{0:.2f}".format(reelectionLatency * 1000)) + " ms" + + # ============================================= + # starting previously terminated broker + # ============================================= + if bounceLeaderFlag.lower() == "true": + self.log_message("starting the previously terminated broker") + stoppedLeaderEntityId = leaderDict["entity_id"] + kafka_system_test_utils.start_entity_in_background(self.systemTestEnv, self.testcaseEnv, stoppedLeaderEntityId) + + self.anonLogger.info("sleeping for 15s") + time.sleep(15) + i += 1 + # while loop + + # ============================================= + # tell producer to stop + # ============================================= + self.testcaseEnv.lock.acquire() + self.testcaseEnv.userDefinedEnvVarDict["stopBackgroundProducer"] = True + time.sleep(1) + self.testcaseEnv.lock.release() + time.sleep(1) + + # ============================================= + # wait for producer thread's update of + # "backgroundProducerStopped" to be "True" + # ============================================= + while 1: + self.testcaseEnv.lock.acquire() + self.logger.info("status of backgroundProducerStopped : [" + \ + str(self.testcaseEnv.userDefinedEnvVarDict["backgroundProducerStopped"]) + "]", extra=self.d) + if self.testcaseEnv.userDefinedEnvVarDict["backgroundProducerStopped"]: + time.sleep(1) + self.logger.info("all producer threads completed", extra=self.d) + break + time.sleep(1) + self.testcaseEnv.lock.release() + time.sleep(2) + + # ============================================= + # starting consumer + # ============================================= + self.log_message("starting consumer in the background") + kafka_system_test_utils.start_console_consumer(self.systemTestEnv, self.testcaseEnv) + self.anonLogger.info("sleeping for 10s") + time.sleep(10) + + # ============================================= + # this testcase is completed - stop all entities + # ============================================= + self.log_message("stopping all entities") + for entityId, parentPid in self.testcaseEnv.entityBrokerParentPidDict.items(): + kafka_system_test_utils.stop_remote_entity(self.systemTestEnv, entityId, parentPid) + + for entityId, parentPid in self.testcaseEnv.entityZkParentPidDict.items(): + kafka_system_test_utils.stop_remote_entity(self.systemTestEnv, entityId, parentPid) + + # make sure all entities are stopped + kafka_system_test_utils.ps_grep_terminate_running_entity(self.systemTestEnv) + + # ============================================= + # collect logs from remote hosts + # ============================================= + kafka_system_test_utils.collect_logs_from_remote_hosts(self.systemTestEnv, self.testcaseEnv) + + # ============================================= + # validate the data matched and checksum + # ============================================= + self.log_message("validating data matched") + kafka_system_test_utils.validate_data_matched(self.systemTestEnv, self.testcaseEnv) + kafka_system_test_utils.validate_broker_log_segment_checksum(self.systemTestEnv, self.testcaseEnv) + + # ============================================= + # draw graphs + # ============================================= + metrics.draw_all_graphs(self.systemTestEnv.METRICS_PATHNAME, + self.testcaseEnv, + self.systemTestEnv.clusterEntityConfigDictList) + + # build dashboard, one for each role + metrics.build_all_dashboards(self.systemTestEnv.METRICS_PATHNAME, + self.testcaseEnv.testCaseDashboardsDir, + self.systemTestEnv.clusterEntityConfigDictList) + + except Exception as e: + self.log_message("Exception while running test {0}".format(e)) + traceback.print_exc() + + finally: + if not skipThisTestCase and not self.systemTestEnv.printTestDescriptionsOnly: + self.log_message("stopping all entities - please wait ...") + kafka_system_test_utils.stop_all_remote_running_processes(self.systemTestEnv, self.testcaseEnv) + Index: system_test/mirror_maker_testsuite/testcase_5001/testcase_5001_properties.json =================================================================== --- system_test/mirror_maker_testsuite/testcase_5001/testcase_5001_properties.json (revision 0) +++ system_test/mirror_maker_testsuite/testcase_5001/testcase_5001_properties.json (revision 0) @@ -0,0 +1,135 @@ +{ + "description": {"01":"To Test : 'Replication with Mirror Maker'", + "02":"Set up 2 clusters such as : SOURCE => MirrorMaker => TARGET", + "03":"Set up 2-node Zk cluster for both SOURCE & TARGET", + "04":"Produce and consume messages to a single topic - single partition.", + "05":"This test sends messages to 3 replicas", + "06":"At the end it verifies the log size and contents", + "07":"Use a consumer to verify no message loss in TARGET cluster.", + "08":"Producer dimensions : mode:sync, acks:-1, comp:0", + "09":"Log segment size : 10240" + }, + "testcase_args": { + "bounce_leader": "false", + "replica_factor": "3", + "num_partition": "1", + "num_iteration": "1", + "sleep_seconds_between_producer_calls": "1", + "message_producing_free_time_sec": "15", + "num_messages_to_produce_per_producer_call": "50" + }, + "entities": [ + { + "entity_id": "0", + "clientPort": "2108", + "dataDir": "/tmp/zookeeper_0", + "log_filename": "zookeeper_0.log", + "config_filename": "zookeeper_0.properties" + }, + { + "entity_id": "1", + "clientPort": "2118", + "dataDir": "/tmp/zookeeper_1", + "log_filename": "zookeeper_1.log", + "config_filename": "zookeeper_1.properties" + }, + + { + "entity_id": "2", + "clientPort": "2128", + "dataDir": "/tmp/zookeeper_2", + "log_filename": "zookeeper_2.log", + "config_filename": "zookeeper_2.properties" + }, + { + "entity_id": "3", + "clientPort": "2138", + "dataDir": "/tmp/zookeeper_3", + "log_filename": "zookeeper_3.log", + "config_filename": "zookeeper_3.properties" + }, + + { + "entity_id": "4", + "port": "9091", + "brokerid": "1", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_4_logs", + "log_filename": "kafka_server_4.log", + "config_filename": "kafka_server_4.properties" + }, + { + "entity_id": "5", + "port": "9092", + "brokerid": "2", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_5_logs", + "log_filename": "kafka_server_5.log", + "config_filename": "kafka_server_5.properties" + }, + { + "entity_id": "6", + "port": "9093", + "brokerid": "3", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_6_logs", + "log_filename": "kafka_server_6.log", + "config_filename": "kafka_server_6.properties" + }, + { + "entity_id": "7", + "port": "9094", + "brokerid": "4", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_7_logs", + "log_filename": "kafka_server_7.log", + "config_filename": "kafka_server_7.properties" + }, + { + "entity_id": "8", + "port": "9095", + "brokerid": "5", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_8_logs", + "log_filename": "kafka_server_8.log", + "config_filename": "kafka_server_8.properties" + }, + { + "entity_id": "9", + "port": "9096", + "brokerid": "6", + "log.file.size": "10240", + "log.dir": "/tmp/kafka_server_9_logs", + "log_filename": "kafka_server_9.log", + "config_filename": "kafka_server_9.properties" + }, + + { + "entity_id": "10", + "topic": "test_1", + "threads": "5", + "compression-codec": "0", + "message-size": "500", + "message": "500", + "request-num-acks": "-1", + "async":"false", + "log_filename": "producer_performance_10.log", + "config_filename": "producer_performance_10.properties" + }, + { + "entity_id": "11", + "topic": "test_1", + "groupid": "mytestgroup", + "consumer-timeout-ms": "10000", + "log_filename": "console_consumer_11.log", + "config_filename": "console_consumer_11.properties" + }, + + { + "entity_id": "12", + "log_filename": "mirror_maker_12.log", + "mirror_consumer_config_filename": "mirror_consumer_12.properties", + "mirror_producer_config_filename": "mirror_producer_12.properties" + } + ] +} Index: system_test/mirror_maker_testsuite/config/producer_performance.properties =================================================================== --- system_test/mirror_maker_testsuite/config/producer_performance.properties (revision 0) +++ system_test/mirror_maker_testsuite/config/producer_performance.properties (revision 0) @@ -0,0 +1,5 @@ +topic=mytest +message-size=100 +thread=5 +compression-codec=0 +request-num-acks=-1 Index: system_test/mirror_maker_testsuite/config/mirror_consumer.properties =================================================================== --- system_test/mirror_maker_testsuite/config/mirror_consumer.properties (revision 0) +++ system_test/mirror_maker_testsuite/config/mirror_consumer.properties (revision 0) @@ -0,0 +1,12 @@ +zk.connect=localhost:2108 +zk.connectiontimeout.ms=1000000 +groupid=mm_regtest_grp +autocommit.interval.ms=120000 +autooffset.reset=smallest +#fetch.size=1048576 +#rebalance.retries.max=4 +#rebalance.backoff.ms=2000 +socket.buffersize=1048576 +fetch.size=1048576 +zk.synctime.ms=15000 +shallowiterator.enable=true Index: system_test/mirror_maker_testsuite/config/server.properties =================================================================== --- system_test/mirror_maker_testsuite/config/server.properties (revision 0) +++ system_test/mirror_maker_testsuite/config/server.properties (revision 0) @@ -0,0 +1,122 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# see kafka.server.KafkaConfig for additional details and defaults + +############################# Server Basics ############################# + +# The id of the broker. This must be set to a unique integer for each broker. +brokerid=0 + +# Hostname the broker will advertise to consumers. If not set, kafka will use the value returned +# from InetAddress.getLocalHost(). If there are multiple interfaces getLocalHost +# may not be what you want. +#hostname= + + +############################# Socket Server Settings ############################# + +# The port the socket server listens on +port=9091 + +# The number of threads handling network requests +network.threads=2 + +# The number of threads doing disk I/O +io.threads=2 + +# The send buffer (SO_SNDBUF) used by the socket server +socket.send.buffer=1048576 + +# The receive buffer (SO_RCVBUF) used by the socket server +socket.receive.buffer=1048576 + +# The maximum size of a request that the socket server will accept (protection against OOM) +max.socket.request.bytes=104857600 + + +############################# Log Basics ############################# + +# The directory under which to store log files +log.dir=/tmp/kafka_server_logs + +# The number of logical partitions per topic per server. More partitions allow greater parallelism +# for consumption, but also mean more files. +num.partitions=5 + +# Overrides for for the default given by num.partitions on a per-topic basis +#topic.partition.count.map=topic1:3, topic2:4 + +############################# Log Flush Policy ############################# + +# The following configurations control the flush of data to disk. This is the most +# important performance knob in kafka. +# There are a few important trade-offs here: +# 1. Durability: Unflushed data is at greater risk of loss in the event of a crash. +# 2. Latency: Data is not made available to consumers until it is flushed (which adds latency). +# 3. Throughput: The flush is generally the most expensive operation. +# The settings below allow one to configure the flush policy to flush data after a period of time or +# every N messages (or both). This can be done globally and overridden on a per-topic basis. + +# The number of messages to accept before forcing a flush of data to disk +log.flush.interval=10000 + +# The maximum amount of time a message can sit in a log before we force a flush +log.default.flush.interval.ms=1000 + +# Per-topic overrides for log.default.flush.interval.ms +#topic.flush.intervals.ms=topic1:1000, topic2:3000 + +# The interval (in ms) at which logs are checked to see if they need to be flushed to disk. +log.default.flush.scheduler.interval.ms=1000 + +############################# Log Retention Policy ############################# + +# The following configurations control the disposal of log segments. The policy can +# be set to delete segments after a period of time, or after a given size has accumulated. +# A segment will be deleted whenever *either* of these criteria are met. Deletion always happens +# from the end of the log. + +# The minimum age of a log file to be eligible for deletion +log.retention.hours=168 + +# A size-based retention policy for logs. Segments are pruned from the log as long as the remaining +# segments don't drop below log.retention.size. +#log.retention.size=1073741824 + +# The maximum size of a log segment file. When this size is reached a new log segment will be created. +#log.file.size=536870912 +#log.file.size=102400 +log.file.size=128 + +# The interval at which log segments are checked to see if they can be deleted according +# to the retention policies +log.cleanup.interval.mins=1 + +############################# Zookeeper ############################# + +# Enable connecting to zookeeper +enable.zookeeper=true + +# Zk connection string (see zk docs for details). +# This is a comma separated host:port pairs, each corresponding to a zk +# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002". +# You can also append an optional chroot string to the urls to specify the +# root directory for all kafka znodes. +zk.connect=localhost:2181 + +# Timeout in ms for connecting to zookeeper +zk.connectiontimeout.ms=1000000 + +monitoring.period.secs=1 Index: system_test/mirror_maker_testsuite/config/consumer.properties =================================================================== --- system_test/mirror_maker_testsuite/config/consumer.properties (revision 0) +++ system_test/mirror_maker_testsuite/config/consumer.properties (revision 0) @@ -0,0 +1,29 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# see kafka.consumer.ConsumerConfig for more details + +# zk connection string +# comma separated host:port pairs, each corresponding to a zk +# server. e.g. "127.0.0.1:3000,127.0.0.1:3001,127.0.0.1:3002" +zk.connect=127.0.0.1:2181 + +# timeout in ms for connecting to zookeeper +zk.connectiontimeout.ms=1000000 + +#consumer group id +groupid=test-consumer-group + +#consumer timeout +#consumer.timeout.ms=5000 Index: system_test/mirror_maker_testsuite/config/console_consumer.properties =================================================================== --- system_test/mirror_maker_testsuite/config/console_consumer.properties (revision 0) +++ system_test/mirror_maker_testsuite/config/console_consumer.properties (revision 0) @@ -0,0 +1,4 @@ +zookeeper=local:2181 +topic=test_1 +from-beginning +consumer-timeout-ms=10000 Index: system_test/mirror_maker_testsuite/config/mirror_producer.properties =================================================================== --- system_test/mirror_maker_testsuite/config/mirror_producer.properties (revision 0) +++ system_test/mirror_maker_testsuite/config/mirror_producer.properties (revision 0) @@ -0,0 +1,5 @@ +producer.type=async +queue.enqueueTimeout.ms=-1 +broker.list=localhost:9094 +compression.codec=0 + Index: system_test/mirror_maker_testsuite/config/log4j.properties =================================================================== --- system_test/mirror_maker_testsuite/config/log4j.properties (revision 0) +++ system_test/mirror_maker_testsuite/config/log4j.properties (revision 0) @@ -0,0 +1,38 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +log4j.rootLogger=INFO, stdout + +log4j.appender.stdout=org.apache.log4j.ConsoleAppender +log4j.appender.stdout.layout=org.apache.log4j.PatternLayout +log4j.appender.stdout.layout.ConversionPattern=[%d] %p %m (%c)%n + +#log4j.appender.fileAppender=org.apache.log4j.FileAppender +#log4j.appender.fileAppender.File=kafka-request.log +#log4j.appender.fileAppender.layout=org.apache.log4j.PatternLayout +#log4j.appender.fileAppender.layout.ConversionPattern= %-4r [%t] %-5p %c %x - %m%n + + +# Turn on all our debugging info +#log4j.logger.kafka=INFO +#log4j.logger.org.I0Itec.zkclient.ZkClient=DEBUG + +# to print message checksum from ProducerPerformance +log4j.logger.kafka.perf=DEBUG +log4j.logger.kafka.perf.ProducerPerformance$ProducerThread=DEBUG + +# to print message checksum from ProducerPerformance +log4j.logger.kafka.perf=DEBUG +log4j.logger.kafka.perf.ProducerPerformance$ProducerThread=DEBUG + Index: system_test/mirror_maker_testsuite/config/producer.properties =================================================================== --- system_test/mirror_maker_testsuite/config/producer.properties (revision 0) +++ system_test/mirror_maker_testsuite/config/producer.properties (revision 0) @@ -0,0 +1,80 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# see kafka.producer.ProducerConfig for more details + +############################# Producer Basics ############################# + +# need to set either broker.list or zk.connect + +# configure brokers statically +# format: brokerid1:host1:port1,brokerid2:host2:port2 ... +#broker.list=0:localhost:9092 + +# discover brokers from ZK +zk.connect=localhost:2181 + +# zookeeper session timeout; default is 6000 +#zk.sessiontimeout.ms= + +# the max time that the client waits to establish a connection to zookeeper; default is 6000 +#zk.connectiontimeout.ms + +# name of the partitioner class for partitioning events; default partition spreads data randomly +#partitioner.class= + +# specifies whether the messages are sent asynchronously (async) or synchronously (sync) +producer.type=sync + +# specify the compression codec for all data generated: 0: no compression, 1: gzip +compression.codec=0 + +# message encoder +serializer.class=kafka.serializer.StringEncoder + +# allow topic level compression +#compressed.topics= + +# max message size; messages larger than that size are discarded; default is 1000000 +#max.message.size= + + +############################# Async Producer ############################# +# maximum time, in milliseconds, for buffering data on the producer queue +#queue.time= + +# the maximum size of the blocking queue for buffering on the producer +#queue.size= + +# Timeout for event enqueue: +# 0: events will be enqueued immediately or dropped if the queue is full +# -ve: enqueue will block indefinitely if the queue is full +# +ve: enqueue will block up to this many milliseconds if the queue is full +#queue.enqueueTimeout.ms= + +# the number of messages batched at the producer +#batch.size= + +# the callback handler for one or multiple events +#callback.handler= + +# properties required to initialize the callback handler +#callback.handler.props= + +# the handler for events +#event.handler= + +# properties required to initialize the event handler +#event.handler.props= + Index: system_test/mirror_maker_testsuite/config/zookeeper.properties =================================================================== --- system_test/mirror_maker_testsuite/config/zookeeper.properties (revision 0) +++ system_test/mirror_maker_testsuite/config/zookeeper.properties (revision 0) @@ -0,0 +1,23 @@ +# Licensed to the Apache Software Foundation (ASF) under one or more +# contributor license agreements. See the NOTICE file distributed with +# this work for additional information regarding copyright ownership. +# The ASF licenses this file to You under the Apache License, Version 2.0 +# (the "License"); you may not use this file except in compliance with +# the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# the directory where the snapshot is stored. +dataDir=/tmp/zookeeper +# the port at which the clients will connect +clientPort=2181 +# disable the per-ip limit on the number of connections since this is a non-production config +maxClientCnxns=0 +syncLimit=5 +initLimit=10 +tickTime=2000 Index: system_test/mirror_maker_testsuite/__init__.py =================================================================== --- system_test/mirror_maker_testsuite/__init__.py (revision 0) +++ system_test/mirror_maker_testsuite/__init__.py (revision 0) @@ -0,0 +1 @@ + Index: system_test/mirror_maker_testsuite/cluster_config.json =================================================================== --- system_test/mirror_maker_testsuite/cluster_config.json (revision 0) +++ system_test/mirror_maker_testsuite/cluster_config.json (revision 0) @@ -0,0 +1,126 @@ +{ + "cluster_config": [ + { + "entity_id": "0", + "hostname": "localhost", + "role": "zookeeper", + "cluster_name":"source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9100" + }, + { + "entity_id": "1", + "hostname": "localhost", + "role": "zookeeper", + "cluster_name":"source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9101" + }, + + { + "entity_id": "2", + "hostname": "localhost", + "role": "zookeeper", + "cluster_name":"target", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9102" + }, + { + "entity_id": "3", + "hostname": "localhost", + "role": "zookeeper", + "cluster_name":"target", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9103" + }, + + { + "entity_id": "4", + "hostname": "localhost", + "role": "broker", + "cluster_name":"source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9104" + }, + { + "entity_id": "5", + "hostname": "localhost", + "role": "broker", + "cluster_name":"source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9105" + }, + { + "entity_id": "6", + "hostname": "localhost", + "role": "broker", + "cluster_name":"source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9106" + }, + + { + "entity_id": "7", + "hostname": "localhost", + "role": "broker", + "cluster_name":"target", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9107" + }, + { + "entity_id": "8", + "hostname": "localhost", + "role": "broker", + "cluster_name":"target", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9108" + }, + { + "entity_id": "9", + "hostname": "localhost", + "role": "broker", + "cluster_name":"target", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9109" + }, + + { + "entity_id": "10", + "hostname": "localhost", + "role": "producer_performance", + "cluster_name":"source", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9110" + }, + { + "entity_id": "11", + "hostname": "localhost", + "role": "console_consumer", + "cluster_name":"target", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9111" + }, + + { + "entity_id": "12", + "hostname": "localhost", + "role": "mirror_maker", + "cluster_name":"target", + "kafka_home": "default", + "java_home": "default", + "jmx_port": "9112" + } + ] +} Index: system_test/system_test_runner.py =================================================================== --- system_test/system_test_runner.py (revision 1396332) +++ system_test/system_test_runner.py (working copy) @@ -16,111 +16,120 @@ # under the License. #!/usr/bin/evn python -# =================================== +# ================================================================= # system_test_runner.py -# =================================== +# +# - This script is the test driver for a distributed environment +# system testing framework. It is located at the top level of the +# framework hierachy (in this case - system_test/). +# +# - This test driver servers as an entry point to launch a series +# of test suites (module) with multiple functionally similar test +# cases which can be grouped together. +# +# - Please refer to system_test/README.txt for more details on +# how to add test suite and test case. +# +# - In most cases, it is not necessary to make any changes to this +# script. +# ================================================================= +from optparse import OptionParser from system_test_env import SystemTestEnv -from utils import system_test_utils +from utils import system_test_utils -import logging +import logging.config import os +import pprint import sys -# ==================================================================== -# Two logging formats are defined in system_test/system_test_runner.py -# ==================================================================== +# load the config file for logging +logging.config.fileConfig('logging.conf') -# 1. "namedLogger" is defined to log message in this format: -# "%(asctime)s - %(levelname)s - %(message)s %(name_of_class)s" -# -# usage: to log message and showing the class name of the message +# 'd' is an argument to be merged into the log message (see Python doc for logging). +# In this case, corresponding class name can be appended to the end of the logging +# message to facilitate debugging. +d = {'name_of_class': '(system_test_runner)'} -namedLogger = logging.getLogger("namedLogger") -namedLogger.setLevel(logging.INFO) -#namedLogger.setLevel(logging.DEBUG) -namedConsoleHandler = logging.StreamHandler() -namedConsoleHandler.setLevel(logging.DEBUG) -namedFormatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s %(name_of_class)s") -namedConsoleHandler.setFormatter(namedFormatter) -namedLogger.addHandler(namedConsoleHandler) +def main(): + nLogger = logging.getLogger('namedLogger') + aLogger = logging.getLogger('anonymousLogger') -# 2. "anonymousLogger" is defined to log message in this format: -# "%(asctime)s - %(levelname)s - %(message)s" -# -# usage: to log message without showing class name and it's appropriate -# for logging generic message such as "sleeping for 5 seconds" + optionParser = OptionParser() + optionParser.add_option("-p", "--print-test-descriptions-only", + dest="printTestDescriptionsOnly", + default=False, + action="store_true", + help="print test descriptions only - don't run the test") -anonymousLogger = logging.getLogger("anonymousLogger") -anonymousLogger.setLevel(logging.INFO) -#anonymousLogger.setLevel(logging.DEBUG) -anonymousConsoleHandler = logging.StreamHandler() -anonymousConsoleHandler.setLevel(logging.DEBUG) -anonymousFormatter = logging.Formatter("%(asctime)s - %(levelname)s - %(message)s") -anonymousConsoleHandler.setFormatter(anonymousFormatter) -anonymousLogger.addHandler(anonymousConsoleHandler) + optionParser.add_option("-n", "--do-not-validate-remote-host", + dest="doNotValidateRemoteHost", + default=False, + action="store_true", + help="do not validate remote host (due to different kafka versions are installed)") -d = {'name_of_class': '(system_test_runner)'} + (options, args) = optionParser.parse_args() -def main(): + print "\n" + aLogger.info("=================================================") + aLogger.info(" System Regression Test Framework") + aLogger.info("=================================================") + print "\n" - print - print - print - anonymousLogger.info("=================================================") - anonymousLogger.info(" System Regression Test Framework") - anonymousLogger.info("=================================================") - print - print - testSuiteClassDictList = [] # SystemTestEnv is a class to provide all environement settings for this session # such as the SYSTEM_TEST_BASE_DIR, SYSTEM_TEST_UTIL_DIR, ... systemTestEnv = SystemTestEnv() - # sanity check on remote hosts to make sure: - # - all directories (eg. java_home) specified in cluster_config.json exists in all hosts - # - no conflicting running processes in remote hosts - anonymousLogger.info("=================================================") - anonymousLogger.info("setting up remote hosts ...") - anonymousLogger.info("=================================================") - if not system_test_utils.setup_remote_hosts(systemTestEnv): - namedLogger.error("Remote hosts sanity check failed. Aborting test ...", extra=d) + if options.printTestDescriptionsOnly: + systemTestEnv.printTestDescriptionsOnly = True + if options.doNotValidateRemoteHost: + systemTestEnv.doNotValidateRemoteHost = True + + if not systemTestEnv.printTestDescriptionsOnly: + if not systemTestEnv.doNotValidateRemoteHost: + if not system_test_utils.setup_remote_hosts(systemTestEnv): + nLogger.error("Remote hosts sanity check failed. Aborting test ...", extra=d) + print + sys.exit(1) + else: + nLogger.info("SKIPPING : checking remote machines", extra=d) print - sys.exit(1) - print # get all defined names within a module: definedItemList = dir(SystemTestEnv) - anonymousLogger.debug("=================================================") - anonymousLogger.debug("SystemTestEnv keys:") + aLogger.debug("=================================================") + aLogger.debug("SystemTestEnv keys:") for item in definedItemList: - anonymousLogger.debug(" " + item) - anonymousLogger.debug("=================================================") + aLogger.debug(" " + item) + aLogger.debug("=================================================") - anonymousLogger.info("=================================================") - anonymousLogger.info("looking up test suites ...") - anonymousLogger.info("=================================================") + aLogger.info("=================================================") + aLogger.info("looking up test suites ...") + aLogger.info("=================================================") # find all test suites in SYSTEM_TEST_BASE_DIR for dirName in os.listdir(systemTestEnv.SYSTEM_TEST_BASE_DIR): # make sure this is a valid testsuite directory if os.path.isdir(dirName) and dirName.endswith(systemTestEnv.SYSTEM_TEST_SUITE_SUFFIX): - - namedLogger.info("found a testsuite : " + dirName, extra=d) + print + nLogger.info("found a testsuite : " + dirName, extra=d) testModulePathName = os.path.abspath(systemTestEnv.SYSTEM_TEST_BASE_DIR + "/" + dirName) + if not systemTestEnv.printTestDescriptionsOnly: + system_test_utils.setup_remote_hosts_with_testsuite_level_cluster_config(systemTestEnv, testModulePathName) + # go through all test modules file in this testsuite for moduleFileName in os.listdir(testModulePathName): # make sure it is a valid test module if moduleFileName.endswith(systemTestEnv.SYSTEM_TEST_MODULE_EXT) \ - and not moduleFileName.startswith("__"): + and not moduleFileName.startswith("__"): # found a test module file - namedLogger.info("found a test module file : " + moduleFileName, extra=d) + nLogger.info("found a test module file : " + moduleFileName, extra=d) testModuleClassName = system_test_utils.sys_call("grep ^class " + testModulePathName + "/" + \ moduleFileName + " | sed 's/^class //g' | sed 's/(.*):.*//g'") @@ -134,42 +143,44 @@ testSuiteClassDict["class"] = testModuleClassName testSuiteClassDictList.append(testSuiteClassDict) - # loop through testSuiteClassDictList and start the test class one by one - for testSuiteClassDict in testSuiteClassDictList: + suiteName = testSuiteClassDict["suite"] + moduleName = testSuiteClassDict["module"] + className = testSuiteClassDict["class"] - suiteName = testSuiteClassDict["suite"] - moduleName = testSuiteClassDict["module"] - className = testSuiteClassDict["class"] + # add testsuite directory to sys.path such that the module can be loaded + sys.path.append(systemTestEnv.SYSTEM_TEST_BASE_DIR + "/" + suiteName) + + if not systemTestEnv.printTestDescriptionsOnly: + aLogger.info("=================================================") + aLogger.info("Running Test for : ") + aLogger.info(" suite : " + suiteName) + aLogger.info(" module : " + moduleName) + aLogger.info(" class : " + className) + aLogger.info("=================================================") - # add testsuite directory to sys.path such that the module can be loaded - sys.path.append(systemTestEnv.SYSTEM_TEST_BASE_DIR + "/" + suiteName) + # dynamically loading a module and starting the test class + mod = __import__(moduleName) + theClass = getattr(mod, className) + instance = theClass(systemTestEnv) + instance.runTest() + print - anonymousLogger.info("=================================================") - anonymousLogger.info("Running Test for : ") - anonymousLogger.info(" suite : " + suiteName) - anonymousLogger.info(" module : " + moduleName) - anonymousLogger.info(" class : " + className) - anonymousLogger.info("=================================================") - - # dynamically loading a module and starting the test class - mod = __import__(moduleName) - theClass = getattr(mod, className) - instance = theClass(systemTestEnv) - instance.runTest() - - print - anonymousLogger.info("=================================================") - anonymousLogger.info(" TEST REPORTS") - anonymousLogger.info("=================================================") - for systemTestResult in systemTestEnv.systemTestResultsList: - for key,val in systemTestResult.items(): - if key == "validation_status": - anonymousLogger.info(key + " : ") - for validation, status in val.items(): - anonymousLogger.info(" " + validation + " : " + status) - else: - anonymousLogger.info(key + " : " + val) + if not systemTestEnv.printTestDescriptionsOnly: print + print "========================================================" + print " TEST REPORTS" + print "========================================================" + for systemTestResult in systemTestEnv.systemTestResultsList: + for key in sorted(systemTestResult.iterkeys()): + if key == "validation_status": + print key, " : " + for validatedItem in sorted(systemTestResult[key].iterkeys()): + print " ", validatedItem, " : ", systemTestResult[key][validatedItem] + else: + print key, " : ", systemTestResult[key] + print + print "========================================================" + print # ========================= # main entry point Index: system_test/run_sanity.sh =================================================================== --- system_test/run_sanity.sh (revision 0) +++ system_test/run_sanity.sh (revision 0) @@ -0,0 +1,10 @@ +#!/bin/bash + +my_ts=`date +"%s"` + +cp testcase_to_run.json testcase_to_run.json_${my_ts} +cp testcase_to_run_sanity.json testcase_to_run.json + +python -B system_test_runner.py + + Index: system_test/testcase_to_run.json =================================================================== --- system_test/testcase_to_run.json (revision 0) +++ system_test/testcase_to_run.json (revision 0) @@ -0,0 +1,5 @@ +{ + "ReplicaBasicTest" : [ + "testcase_0002" + ] +} Index: system_test/logging.conf =================================================================== --- system_test/logging.conf (revision 0) +++ system_test/logging.conf (revision 0) @@ -0,0 +1,56 @@ +# ============================================== +# declaration - must have a 'root' logger +# ============================================== +[loggers] +keys=root,namedLogger,anonymousLogger + +[handlers] +keys=namedConsoleHandler,anonymousConsoleHandler + +[formatters] +keys=namedFormatter,anonymousFormatter + +# ============================================== +# loggers session +# ============================================== +[logger_root] +level=NOTSET +handlers= + +[logger_namedLogger] +level=DEBUG +handlers=namedConsoleHandler +qualname=namedLogger +propagate=0 + +[logger_anonymousLogger] +level=DEBUG +handlers=anonymousConsoleHandler +qualname=anonymousLogger +propagate=0 + +# ============================================== +# handlers session +# ** Change 'level' to INFO/DEBUG in this session +# ============================================== +[handler_namedConsoleHandler] +class=StreamHandler +level=INFO +formatter=namedFormatter +args=[] + +[handler_anonymousConsoleHandler] +class=StreamHandler +level=INFO +formatter=anonymousFormatter +args=[] + +# ============================================== +# formatters session +# ============================================== +[formatter_namedFormatter] +format=%(asctime)s - %(levelname)s - %(message)s %(name_of_class)s + +[formatter_anonymousFormatter] +format=%(asctime)s - %(levelname)s - %(message)s + Index: system_test/testcase_to_skip.json =================================================================== --- system_test/testcase_to_skip.json (revision 0) +++ system_test/testcase_to_skip.json (revision 0) @@ -0,0 +1,3 @@ +{ + "ReplicaBasicTest": [ "testcase_1" ] +} Index: system_test/cluster_config.json =================================================================== --- system_test/cluster_config.json (revision 1396332) +++ system_test/cluster_config.json (working copy) @@ -4,6 +4,7 @@ "entity_id": "0", "hostname": "localhost", "role": "zookeeper", + "cluster_name": "source", "kafka_home": "default", "java_home": "default", "jmx_port": "9990" @@ -12,6 +13,7 @@ "entity_id": "1", "hostname": "localhost", "role": "broker", + "cluster_name": "source", "kafka_home": "default", "java_home": "default", "jmx_port": "9991" @@ -20,6 +22,7 @@ "entity_id": "2", "hostname": "localhost", "role": "broker", + "cluster_name": "source", "kafka_home": "default", "java_home": "default", "jmx_port": "9992" @@ -28,6 +31,7 @@ "entity_id": "3", "hostname": "localhost", "role": "broker", + "cluster_name": "source", "kafka_home": "default", "java_home": "default", "jmx_port": "9993" @@ -36,17 +40,19 @@ "entity_id": "4", "hostname": "localhost", "role": "producer_performance", + "cluster_name": "source", "kafka_home": "default", "java_home": "default", - "jmx_port": "9994" + "jmx_port": "9997" }, { "entity_id": "5", "hostname": "localhost", "role": "console_consumer", + "cluster_name": "source", "kafka_home": "default", "java_home": "default", - "jmx_port": "9995" + "jmx_port": "9998" } ] } Index: core/src/main/scala/kafka/controller/KafkaController.scala =================================================================== --- core/src/main/scala/kafka/controller/KafkaController.scala (revision 1396332) +++ core/src/main/scala/kafka/controller/KafkaController.scala (working copy) @@ -147,8 +147,20 @@ info("New partition creation callback for %s".format(newPartitions.mkString(","))) partitionStateMachine.handleStateChanges(newPartitions, NewPartition) partitionStateMachine.handleStateChanges(newPartitions, OnlinePartition) + val replicas = getAllReplicasForPartition(newPartitions) + replicas.foreach { + case(topic, partitionId, replicaId) => + replicaStateMachine.replicaState.put((topic, partitionId, replicaId), OnlineReplica) + } } + private def getAllReplicasForPartition(partitions: Seq[(String, Int)]): Seq[Tuple3[String, Int, Int]] = { + partitions.map { p => + val replicas = controllerContext.partitionReplicaAssignment(p) + replicas.map(r => (p._1, p._2, r)) + }.flatten + } + /* TODO: kafka-330 This API is unused until we introduce the delete topic functionality. remove the unneeded leaderAndISRPath that the previous controller didn't get a chance to remove*/ def onTopicDeletion(topics: Set[String], replicaAssignment: mutable.Map[(String, Int), Seq[Int]]) { Index: core/src/main/scala/kafka/controller/ReplicaStateMachine.scala =================================================================== --- core/src/main/scala/kafka/controller/ReplicaStateMachine.scala (revision 1396332) +++ core/src/main/scala/kafka/controller/ReplicaStateMachine.scala (working copy) @@ -100,7 +100,7 @@ * @param replicaId The replica for which the state transition is invoked * @param targetState The end state that the replica should be moved to */ - private def handleStateChange(topic: String, partition: Int, replicaId: Int, targetState: ReplicaState) { + def handleStateChange(topic: String, partition: Int, replicaId: Int, targetState: ReplicaState) { try { targetState match { case OnlineReplica =>