diff --git a/hcatalog/bin/hcat.py b/hcatalog/bin/hcat.py new file mode 100644 index 0000000..104561a --- /dev/null +++ b/hcatalog/bin/hcat.py @@ -0,0 +1,154 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Resolve our absolute path +# resolve links - $0 may be a softlink + +import os +import sys +import glob +import subprocess + +# Determine our absolute path, resolving any symbolic links +this = os.path.realpath(sys.argv[0]) +bindir = os.path.dirname(this) + os.path.sep + +# Add the libexec directory to our search path so we can find the hcat-config +# module +sys.path.append(os.path.join(bindir, os.path.pardir, "libexec")) +import hcatcfg + +# Find our config directory and Hadoop +hcatcfg.findCfgFile() +hcatcfg.findHadoop() + +# See if any debug flags have been turned on +debug = 0 +try: + sys.argv.remove('-secretDebugCmd') + debug = 1 +except ValueError: + pass + +dumpClasspath = 0 +try: + sys.argv.remove('-classpath') + dumpClasspath = 1 +except ValueError: + pass + +# find HIVE installation directory +hcatcfg.findHive() +if 'HIVE_HOME' not in os.environ: + sys.exit("Hive not found. Set HIVE_HOME to directory containing Hive.") + +if 'HIVE_LIB_DIR' not in os.environ: + sys.exit("Cannot find lib dir within HIVE_HOME %s" % (os.environ['HIVE_HOME'] + os.path.sep + "lib")) + +if 'HIVE_CONF_DIR' not in os.environ: + sys.exit("Cannot find conf dir within HIVE_HOME %s" % (os.environ['HIVE_HOME'] + os.path.sep + "conf")) + +##### jars addition +# find the hcatalog jar and add it to hadoop classpath +hcatPrefix = hcatcfg.findHCatPrefix(bindir) + +hcatJars = glob.glob(os.path.join(hcatPrefix, 'share', 'hcatalog', 'hcatalog-core-*.jar')) + +if len(hcatJars) > 1: + sys.exit("Found more than one hcatalog jar in the prefix path") + +if len(hcatJars) < 1: + sys.exit("HCatalog jar not found in directory %s" % (os.path.join(hcatPrefix, 'share', 'hcatalog', 'hcatalog-core-*.jar'))) + +if 'HADOOP_CLASSPATH' not in os.environ: + os.putenv('HADOOP_CLASSPATH', '') + os.environ['HADOOP_CLASSPATH'] = '' + +os.environ['HADOOP_CLASSPATH'] += os.pathsep + hcatJars[0] +# done adding the hcatalog jar to the hadoop classpath + +# add all the other jars +hcatLibJarFiles = os.path.join(hcatPrefix, 'share', 'hcatalog', 'lib', '*') +os.environ['HADOOP_CLASSPATH'] += os.pathsep + hcatLibJarFiles + +# adding hive jars +hiveJars = os.path.join(os.environ['HIVE_LIB_DIR'], '*') +os.environ['HADOOP_CLASSPATH'] += os.pathsep + hiveJars + +##### done with addition of jars + + +##### add conf dirs to the classpath + +# add the hive conf dir and if exists hbase conf dir + +os.environ['HADOOP_CLASSPATH'] += os.pathsep + os.environ['HIVE_CONF_DIR'] + +# if the hbase conf dir is present in the environment, add it. +# there are no checks to see if that path exists +# FIXME add check - original shell script does not do much if the path +# does not exist either +try: + if os.environ['HBASE_CONF_DIR'] != "": + os.environ['HADOOP_CLASSPATH'] += os.pathsep + os.environ['HBASE_CONF_DIR'] +except: + pass + +##### done with adding conf dirs to the classpath + + +sys.stdout.flush() + +if os.name == "posix": + hadoopcmd = "hadoop" +else: + hadoopcmd = "hadoop.cmd" + +if 'HADOOP_OPTS' not in os.environ: + os.environ['HADOOP_OPTS'] = '' + +# log under the Hive log dir but use a separate log file for HCat logs +os.environ['HADOOP_OPTS'] += " " + "-Dhive.log.file=hcat.log" + " " + "-Dhive.log.dir=" + os.path.join(os.environ['HIVE_HOME'], "logs") + +##### Uncomment to debug log4j configuration +#os.environ['HADOOP_OPTS'] += " -Dlog4j.debug" + +cmdLine = os.path.join(os.environ['HADOOP_PREFIX'], "bin", hadoopcmd) +if os.name == "posix": + cmd = [cmdLine, "jar", hcatJars[0], "org.apache.hcatalog.cli.HCatCli"] + sys.argv[1:len(sys.argv)] +else: + cmd = ["call", cmdLine, "jar", hcatJars[0], "org.apache.hcatalog.cli.HCatCli"] + sys.argv[1:len(sys.argv)] + + +if debug == 1: + print "Would run:" + print "exec " + str(cmd) + print " with HADOOP_CLASSPATH set to %s" % (os.environ['HADOOP_CLASSPATH']) + try: + print " and HADOOP_OPTS set to %s" % (os.environ['HADOOP_OPTS']) + except: + pass +else: + if dumpClasspath == 1: + print os.environ['HADOOP_CLASSPATH'] + else: + if os.name == "posix": + retval = subprocess.call(cmd) + else: + retval = subprocess.call(cmd, stdin=None, stdout=None, stderr=None, shell=True) + os.environ['errorlevel'] = str(retval) + sys.exit(retval) + diff --git a/hcatalog/bin/hcat_server.py b/hcatalog/bin/hcat_server.py new file mode 100644 index 0000000..91a19ef --- /dev/null +++ b/hcatalog/bin/hcat_server.py @@ -0,0 +1,165 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import sys +import os +import subprocess +import time +import glob + +from time import strftime + +sleepTime = 3 +def print_usage(): + print "Usage: %s [--config confdir] COMMAND" % (sys.argv[0]) + print " start Start HCatalog Server" + print " stop Stop HCatalog Server" + +def start_hcat(): + global sleepTime + # back ground the metastore service and record the pid + pidFile = os.path.join(os.environ['HCAT_LOG_DIR'], 'hcat.pid') + + try: + pidFileDesc = open(pidFile, 'r') + for line in pidFileDesc: + pidWords = line.split() + for pidStr in pidWords: + pid = int(pidStr.rstrip('\n')) + + pidFileDesc.close() + # check if service is already running, if so exit + os.kill(pid, 0) + sys.exit("HCatalog server appears to be running. If you are sure it is not remove %s and re-run this script" % (pidFile)) + except: + pass + + os.environ['HIVE_SITE_XML'] = os.path.join(os.environ['HIVE_HOME'], 'conf', 'hive-site.xml') + if os.path.exists(os.environ['HIVE_SITE_XML']) == False: + sys.exit("Missing hive-site.xml, expected at %s" % (os.environ['HIVE_SITE_XML'])) + + # Find our Warehouse dir from the config file + # WAREHOUSE_DIR=`sed -n '/hive.metastore.warehouse.dir<\/name>/ { + # n + # s/.*\(.*\)<\/value>.*/\1/p + # }' $HIVE_SITE_XML` + # HADOOP_OPTS="$HADOOP_OPTS -Dhive.metastore.warehouse.dir=$WAREHOUSE_DIR " + + # add in hive-site.xml to classpath + if 'AUX_CLASSPATH' not in os.environ: + os.environ['AUX_CLASSPATH'] = '' + + os.environ['AUX_CLASSPATH'] += os.pathsep + os.path.dirname(os.environ['HIVE_SITE_XML']) + + # add jars from db connectivity dir - be careful to not point to something like /lib + try: + for dbRootJars in glob.glob(os.path.join(os.environ['DBROOT'], '*.jar')): + os.environ['AUX_CLASSPATH'] += os.pathsep + dbRootJars + except: + pass + + for hcatLibJars in glob.glob(os.path.join(os.environ['HCAT_PREFIX'], 'share', 'hcatalog', 'lib', '*.jar')): + os.environ['AUX_CLASSPATH'] += os.pathsep + hcatLibJars + + for hcatJar in glob.glob(os.path.join(os.environ['HCAT_PREFIX'], 'share', 'hcatalog', '*.jar')): + os.environ['AUX_CLASSPATH'] += os.pathsep + hcatJar + + if 'HADOOP_OPTS' not in os.environ: + os.environ['HADOOP_OPTS'] = '' + + os.environ['HADOOP_OPTS'] += " -server -XX:+UseConcMarkSweepGC -XX:ErrorFile=" + os.path.join(os.environ['HCAT_LOG_DIR'], 'hcat_err_pid%p.log') + " -Xloggc:" + os.path.join(os.environ['HCAT_LOG_DIR'], 'hcat_gc.log-') + strftime("%Y%m%d%H%M") + " -verbose:gc -XX:+PrintGCDetails -XX:+PrintGCTimeStamps -XX:+PrintGCDateStamps" + os.environ['HADOOP_HEAPSIZE'] = '2048' # 8G is better if you have it + + if os.name == "posix": + hivecmd = "hive" + else: + hivecmd = "hive.cmd" + + command = os.path.join(os.environ['HIVE_HOME'], "bin", hivecmd) + outFile = os.path.join(os.environ['HCAT_LOG_DIR'], "hcat.out") + outfd = open(outFile, 'w') + errFile = os.path.join(os.environ['HCAT_LOG_DIR'], "hcat.err") + errfd = open(errFile, 'w') + windowsTmpFile = os.path.join(os.environ['HCAT_LOG_DIR'], "windows.tmp") + child = subprocess.Popen([command, "--service", "metastore"], stdout=outfd, stderr=errfd) + pid = child.pid + print "Started metastore server init, testing if initialized correctly..." + time.sleep(sleepTime) + try: + if os.name == "posix": + os.kill(pid, 0) + else: + ret = os.system("jps | find /I \"HiveMetaStore\" > " + windowsTmpFile + "") + if ret != 0: + raise Exception("error starting process") + windowsTmpFd = open(windowsTmpFile, 'r') + pid = int(windowsTmpFd.readline().split(" ")[0]) + pidFileDesc = open(pidFile, 'w') + pidFileDesc.write(str(pid)) + pidFileDesc.close() + print "Metastore initialized successfully" + except Exception as inst: + print inst + sys.exit("Metastore startup failed, see %s" % (errFile)) + + return + +def stop_hcat(): + + pidFile = os.path.join(os.environ['HCAT_LOG_DIR'], 'hcat.pid') + + pid = 0 + kill = False + try: + pidFileDesc = open(pidFile, 'r') + for line in pidFileDesc: + words = line.split() + pid = int(words[0]) + + os.kill(pid, 6) + + except: + kill = True + pass + + if kill == True: + try: + os.kill(pid, 9) + + except: + sys.exit("Failed to stop metastore server") + + return + +if __name__ == "__main__": + + this = os.path.realpath(sys.argv[0]) + bindir = os.path.dirname(this) + os.path.sep + + import hcatcfg + hcatLogDir = hcatcfg.getHCatLogDir(bindir) + hcatcfg.findHCatPrefix(bindir) + os.environ['HCAT_LOG_DIR'] = hcatLogDir + + if len(sys.argv) == 1: + print_usage() + sys.exit() + + if sys.argv[1] == 'start': + start_hcat() + + else: + stop_hcat() diff --git a/hcatalog/bin/hcatcfg.py b/hcatalog/bin/hcatcfg.py new file mode 100644 index 0000000..ba23837 --- /dev/null +++ b/hcatalog/bin/hcatcfg.py @@ -0,0 +1,86 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# Resolve our absolute path +# resolve links - $0 may be a softlink + +import os.path +import sys + +# Find the config file +def findCfgFile(): + defaultConfDir = None + if 'HCAT_PREFIX' in os.environ and os.path.exists(os.environ['HCAT_PREFIX'] + \ + os.path.sep + buildPath(["etc","hcatalog"])): + defaultConfDir = os.environ['HCAT_PREFIX'] + os.path.sep + \ + buildPath(["etc", "hcatalog"]) + else: + defaultConfDir = buildAbsPath(["etc", "hcatalog"]) + if 'HCAT_CONF_DIR' not in os.environ: + os.environ['HCAT_CONF_DIR'] = defaultConfDir + +def findHadoop(): + if 'HADOOP_HOME' in os.environ and os.path.exists(os.environ['HADOOP_HOME'] \ + + os.path.sep + buildPath(["bin", "hadoop"])): + os.environ['HADOOP_PREFIX'] = os.environ['HADOOP_HOME'] + elif 'HCAT_PREFIX' in os.environ and os.path.exists(os.environ['HCAT_PREFIX'] \ + + os.path.sep + buildPath(["bin", "hadoop"])): + os.environ['HADOOP_PREFIX'] = os.environ['HCAT_PREFIX'] + elif not ('HADOOP_PREFIX' in os.environ and \ + os.path.exists(os.environ['HADOOP_PREFIX'] + os.path.sep + \ + buildPath(["bin", "hadoop"]))): + sys.exit("Hadoop not found. Set HADOOP_HOME to the directory containing Hadoop.") + +def concatPath(x, y): + return x + os.path.sep + y + +def buildPath(pathElements): + return reduce(concatPath, pathElements) + +def buildAbsPath(pathElements): + return os.path.sep + buildPath(pathElements) + +def findHive(): + # TODO, check for Hive in path. For now, just look in known locations and + # HIVE_HOME + # No need to be OS independent checkinf for /usr/bin/hive since this is an + # RPM specific path + # If HIVE_HOME is set it overrides default locations + if os.path.exists("/usr/bin/hive") and ('HIVE_HOME' not in os.environ): + os.environ['HIVE_HOME'] = buildAbsPath(["usr", "lib", "hive"]); + + if 'HIVE_HOME' not in os.environ: + # the api user determines how to handle the non-existence of HIVE_HOME + return + + if os.path.exists(os.path.join(os.environ['HIVE_HOME'], 'lib')): + os.environ['HIVE_LIB_DIR'] = os.path.join(os.environ['HIVE_HOME'], 'lib') + else: + return + + if os.path.exists(os.path.join(os.environ['HIVE_HOME'], 'conf')): + os.environ['HIVE_CONF_DIR'] = os.path.join(os.environ['HIVE_HOME'], 'conf') + else: + return + + return + +def findHCatPrefix(binDir): + os.environ['HCAT_PREFIX'] = binDir + '..' + os.path.sep + return os.environ['HCAT_PREFIX'] + +def getHCatLogDir(binDir): + return os.path.join(binDir, '..', 'var', 'log')