Details
-
Bug
-
Status: Resolved
-
Major
-
Resolution: Fixed
-
2.2.0
-
None
Description
During an Express Upgrade, all of the tasks for Hive Server and Hive MetaStore run in parallel, so if these components have multiple hosts, then running in parallel can cause errors during the upgrade.
E.g.,
Hive Server tries to upload tarballs at the same time.
Traceback (most recent call last): File "/var/lib/ambari-agent/cache/common-services/HIVE/0.12.0.2.0/package/scripts/hive_server.py", line 203, in <module> HiveServer().execute() File "/usr/lib/python2.6/site-packages/resource_management/libraries/script/script.py", line 219, in execute method(env) File "/usr/lib/python2.6/site-packages/resource_management/libraries/script/script.py", line 524, in restart self.start(env, upgrade_type=upgrade_type) File "/var/lib/ambari-agent/cache/common-services/HIVE/0.12.0.2.0/package/scripts/hive_server.py", line 84, in start self.configure(env) # FOR SECURITY File "/var/lib/ambari-agent/cache/common-services/HIVE/0.12.0.2.0/package/scripts/hive_server.py", line 55, in configure hive(name='hiveserver2') File "/usr/lib/python2.6/site-packages/ambari_commons/os_family_impl.py", line 89, in thunk return fn(*args, **kwargs) File "/var/lib/ambari-agent/cache/common-services/HIVE/0.12.0.2.0/package/scripts/hive.py", line 177, in hive host_sys_prepped=params.host_sys_prepped) File "/usr/lib/python2.6/site-packages/resource_management/libraries/functions/copy_tarball.py", line 201, in copy_to_hdfs replace_existing_files=replace_existing_files, File "/usr/lib/python2.6/site-packages/resource_management/core/base.py", line 154, in __init__ self.env.run() File "/usr/lib/python2.6/site-packages/resource_management/core/environment.py", line 158, in run self.run_action(resource, action) File "/usr/lib/python2.6/site-packages/resource_management/core/environment.py", line 121, in run_action provider_action() File "/usr/lib/python2.6/site-packages/resource_management/libraries/providers/hdfs_resource.py", line 402, in action_create_on_execute self.action_delayed("create") File "/usr/lib/python2.6/site-packages/resource_management/libraries/providers/hdfs_resource.py", line 399, in action_delayed self.get_hdfs_resource_executor().action_delayed(action_name, self) File "/usr/lib/python2.6/site-packages/resource_management/libraries/providers/hdfs_resource.py", line 255, in action_delayed self._create_resource() File "/usr/lib/python2.6/site-packages/resource_management/libraries/providers/hdfs_resource.py", line 269, in _create_resource self._create_file(self.main_resource.resource.target, source=self.main_resource.resource.source, mode=self.mode) File "/usr/lib/python2.6/site-packages/resource_management/libraries/providers/hdfs_resource.py", line 322, in _create_file self.util.run_command(target, 'CREATE', method='PUT', overwrite=True, assertable_result=False, file_to_put=source, **kwargs) File "/usr/lib/python2.6/site-packages/resource_management/libraries/providers/hdfs_resource.py", line 197, in run_command _, out, err = get_user_call_output(cmd, user=self.run_user, logoutput=self.logoutput, quiet=False) File "/usr/lib/python2.6/site-packages/resource_management/libraries/functions/get_user_call_output.py", line 61, in get_user_call_output raise Fail(err_msg) resource_management.core.exceptions.Fail: Execution of 'curl -sS -L -w '%{http_code}' -X PUT -T /usr/hdp/2.3.4.0-3485/sqoop/sqoop.tar.gz --negotiate -u : 'http://os-r7-ueajas-daltom20sechanrmlevel-1.novalocal:50070/webhdfs/v1/hdp/apps/2.3.4.0-3485/sqoop/sqoop.tar.gz?op=CREATE&user.name=hdfs&overwrite=True&permission=444' 1>/tmp/tmppXGRGK 2>/tmp/tmpfojEmU' returned 55. curl: (55) Send failure: Connection reset by peer 201
Hive MetaStore tries to upgrade schema at the same time.
Traceback (most recent call last): File "/var/lib/ambari-agent/cache/common-services/HIVE/0.12.0.2.0/package/scripts/hive_metastore.py", line 245, in <module> HiveMetastore().execute() File "/usr/lib/python2.6/site-packages/resource_management/libraries/script/script.py", line 219, in execute method(env) File "/usr/lib/python2.6/site-packages/resource_management/libraries/script/script.py", line 517, in restart self.pre_upgrade_restart(env, upgrade_type=upgrade_type) File "/var/lib/ambari-agent/cache/common-services/HIVE/0.12.0.2.0/package/scripts/hive_metastore.py", line 109, in pre_upgrade_restart self.upgrade_schema(env) File "/var/lib/ambari-agent/cache/common-services/HIVE/0.12.0.2.0/package/scripts/hive_metastore.py", line 241, in upgrade_schema Execute(command, user=params.hive_user, tries=1, environment=env_dict, logoutput=True) File "/usr/lib/python2.6/site-packages/resource_management/core/base.py", line 154, in __init__ self.env.run() File "/usr/lib/python2.6/site-packages/resource_management/core/environment.py", line 158, in run self.run_action(resource, action) File "/usr/lib/python2.6/site-packages/resource_management/core/environment.py", line 121, in run_action provider_action() File "/usr/lib/python2.6/site-packages/resource_management/core/providers/system.py", line 238, in action_run tries=self.resource.tries, try_sleep=self.resource.try_sleep) File "/usr/lib/python2.6/site-packages/resource_management/core/shell.py", line 70, in inner result = function(command, **kwargs) File "/usr/lib/python2.6/site-packages/resource_management/core/shell.py", line 92, in checked_call tries=tries, try_sleep=try_sleep) File "/usr/lib/python2.6/site-packages/resource_management/core/shell.py", line 140, in _call_wrapper result = _call(command, **kwargs_copy) File "/usr/lib/python2.6/site-packages/resource_management/core/shell.py", line 291, in _call raise Fail(err_msg) resource_management.core.exceptions.Fail: Execution of '/usr/hdp/2.3.4.0-3485/hive/bin/schematool -dbType postgres -upgradeSchema' returned 1. WARNING: Use "yarn jar" to launch YARN applications. Metastore connection URL: jdbc:postgresql://172.22.76.197:5432/hivedb Metastore Connection Driver : org.postgresql.Driver Metastore connection User: hiveuser Starting upgrade metastore schema from version 0.14.0 to 1.2.0 Upgrade script upgrade-0.14.0-to-1.1.0.postgres.sql Error: ERROR: relation "NOTIFICATION_LOG" already exists (state=42P07,code=0) org.apache.hadoop.hive.metastore.HiveMetaException: Upgrade FAILED! Metastore state would be inconsistent !! *** schemaTool failed ***
The fix is to use the parallel-scheduler to specify a batch size of 1 at the task-level.
Attachments
Attachments
Issue Links
- links to