diff --git build-common.xml build-common.xml index bb37abf..43d8e9c 100644 --- build-common.xml +++ build-common.xml @@ -59,7 +59,7 @@ - + diff --git cli/src/java/org/apache/hadoop/hive/cli/CliSessionState.java cli/src/java/org/apache/hadoop/hive/cli/CliSessionState.java index dfb30e2..d11b873 100644 --- cli/src/java/org/apache/hadoop/hive/cli/CliSessionState.java +++ cli/src/java/org/apache/hadoop/hive/cli/CliSessionState.java @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.cli; +import java.io.IOException; import java.util.ArrayList; import java.util.List; import java.util.Properties; @@ -77,11 +78,6 @@ private Hive hive; // currently only used (and init'ed) in getCurrentDbName - public CliSessionState() { - super(); - remoteMode = false; - } - public CliSessionState(HiveConf conf) { super(conf); remoteMode = false; @@ -112,10 +108,13 @@ public int getPort() { public void close() { try { + super.close(); if (remoteMode) { client.clean(); transport.close(); } + } catch (IOException ioe) { + ioe.printStackTrace(); } catch (TException e) { e.printStackTrace(); } diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index b24e218..28d8f52 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -201,7 +201,8 @@ DYNAMICPARTITIONMAXPARTS("hive.exec.max.dynamic.partitions", 1000), DYNAMICPARTITIONMAXPARTSPERNODE("hive.exec.max.dynamic.partitions.pernode", 100), MAXCREATEDFILES("hive.exec.max.created.files", 100000L), - DOWNLOADED_RESOURCES_DIR("hive.downloaded.resources.dir", "/tmp/"+System.getProperty("user.name")+"/hive_resources"), + DOWNLOADED_RESOURCES_DIR("hive.downloaded.resources.dir", + "/tmp/${hive.session.id}_resources"), DEFAULTPARTITIONNAME("hive.exec.default.partition.name", "__HIVE_DEFAULT_PARTITION__"), DEFAULT_ZOOKEEPER_PARTITION_NAME("hive.lockmgr.zookeeper.default.partition.name", "__HIVE_DEFAULT_ZOOKEEPER_PARTITION__"), // Whether to show a link to the most failed task + debugging tips diff --git ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java index 3d43451..8e6e24a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java +++ ql/src/java/org/apache/hadoop/hive/ql/session/SessionState.java @@ -22,16 +22,18 @@ import java.io.IOException; import java.io.InputStream; import java.io.PrintStream; +import java.lang.management.ManagementFactory; import java.net.URI; +import java.text.SimpleDateFormat; import java.util.ArrayList; -import java.util.Calendar; -import java.util.GregorianCalendar; +import java.util.Date; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; +import org.apache.commons.io.FileUtils; import org.apache.commons.lang.StringUtils; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -57,6 +59,7 @@ * configuration information */ public class SessionState { + private static final Log LOG = LogFactory.getLog(SessionState.class); /** * current configuration. @@ -179,18 +182,21 @@ public void setIsVerbose(boolean isVerbose) { this.isVerbose = isVerbose; } - public SessionState() { - this(null); - } - public SessionState(HiveConf conf) { this.conf = conf; isSilent = conf.getBoolVar(HiveConf.ConfVars.HIVESESSIONSILENT); ls = new LineageState(); overriddenConfigurations = new HashMap(); overriddenConfigurations.putAll(HiveConf.getConfSystemProperties()); + // if there isn't already a session name, go ahead and create it. + if (StringUtils.isEmpty(conf.getVar(HiveConf.ConfVars.HIVESESSIONID))) { + conf.setVar(HiveConf.ConfVars.HIVESESSIONID, makeSessionId()); + } } + private static final SimpleDateFormat DATE_FORMAT = + new SimpleDateFormat("yyyyMMddHHmm"); + public void setCmd(String cmdString) { conf.setVar(HiveConf.ConfVars.HIVEQUERYSTRING, cmdString); } @@ -242,12 +248,6 @@ public static SessionState start(SessionState startSs) { tss.set(startSs); - if (StringUtils.isEmpty(startSs.getConf().getVar( - HiveConf.ConfVars.HIVESESSIONID))) { - startSs.getConf() - .setVar(HiveConf.ConfVars.HIVESESSIONID, makeSessionId()); - } - if (startSs.hiveHist == null) { startSs.hiveHist = new HiveHistory(startSs); } @@ -297,15 +297,15 @@ public HiveHistory getHiveHistory() { return hiveHist; } + /** + * Create a session ID. Looks like: + * $user_$pid@$host_$date + * @return the unique string + */ private static String makeSessionId() { - GregorianCalendar gc = new GregorianCalendar(); String userid = System.getProperty("user.name"); - - return userid - + "_" - + String.format("%1$4d%2$02d%3$02d%4$02d%5$02d", gc.get(Calendar.YEAR), - gc.get(Calendar.MONTH) + 1, gc.get(Calendar.DAY_OF_MONTH), gc - .get(Calendar.HOUR_OF_DAY), gc.get(Calendar.MINUTE)); + return userid + "_" + ManagementFactory.getRuntimeMXBean().getName() + "_" + + DATE_FORMAT.format(new Date()); } /** @@ -588,35 +588,15 @@ public static boolean canDownloadResource(String value) { private String downloadResource(String value, boolean convertToUnix) { if (canDownloadResource(value)) { getConsole().printInfo("converting to local " + value); - String location = getConf().getVar(HiveConf.ConfVars.DOWNLOADED_RESOURCES_DIR); - + File resourceDir = new File(getConf().getVar(HiveConf.ConfVars.DOWNLOADED_RESOURCES_DIR)); String destinationName = new Path(value).getName(); - String prefix = destinationName; - String postfix = null; - int index = destinationName.lastIndexOf("."); - if (index > 0) { - prefix = destinationName.substring(0, index); - postfix = destinationName.substring(index); - } - if (prefix.length() < 3) { - prefix += ".tmp"; // prefix should be longer than 3 - } - - File resourceDir = new File(location); - if (resourceDir.exists() && !resourceDir.isDirectory()) { - throw new RuntimeException("The resource directory is not a directory, " + - "resourceDir is set to " + resourceDir); + File destinationFile = new File(resourceDir, destinationName); + if (resourceDir.exists() && ! resourceDir.isDirectory()) { + throw new RuntimeException("The resource directory is not a directory, resourceDir is set to" + resourceDir); } if (!resourceDir.exists() && !resourceDir.mkdirs()) { throw new RuntimeException("Couldn't create directory " + resourceDir); } - - File destinationFile; - try { - destinationFile = File.createTempFile(prefix, postfix, resourceDir); - } catch (Exception e) { - throw new RuntimeException("Failed to create temporary file for " + value, e); - } try { FileSystem fs = FileSystem.get(new URI(value), conf); fs.copyToLocalFile(new Path(value), new Path(destinationFile.getCanonicalPath())); @@ -756,4 +736,17 @@ public void addLocalMapRedErrors(String id, List localMapRedErrors) { public void setLocalMapRedErrors(Map> localMapRedErrors) { this.localMapRedErrors = localMapRedErrors; } + + public void close() throws IOException { + File resourceDir = + new File(getConf().getVar(HiveConf.ConfVars.DOWNLOADED_RESOURCES_DIR)); + LOG.debug("Removing resource dir " + resourceDir); + try { + if (resourceDir.exists()) { + FileUtils.deleteDirectory(resourceDir); + } + } catch (IOException e) { + LOG.info("Error removing session resource dir " + resourceDir, e); + } + } } diff --git ql/src/test/queries/clientpositive/remote_script.q ql/src/test/queries/clientpositive/remote_script.q new file mode 100644 index 0000000..926601c --- /dev/null +++ ql/src/test/queries/clientpositive/remote_script.q @@ -0,0 +1,13 @@ +dfs -put ../data/scripts/newline.py /newline.py; +add file hdfs:///newline.py; +set hive.transform.escape.input=true; + +create table tmp_tmp(key string, value string) stored as rcfile; +insert overwrite table tmp_tmp +SELECT TRANSFORM(key, value) USING +'python newline.py' AS key, value FROM src limit 6; + +select * from tmp_tmp ORDER BY key ASC, value ASC; + +dfs -rmr /newline.py; +drop table tmp_tmp; diff --git ql/src/test/results/clientpositive/remote_script.q.out ql/src/test/results/clientpositive/remote_script.q.out new file mode 100644 index 0000000..8806b2b --- /dev/null +++ ql/src/test/results/clientpositive/remote_script.q.out @@ -0,0 +1,50 @@ +PREHOOK: query: create table tmp_tmp(key string, value string) stored as rcfile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table tmp_tmp(key string, value string) stored as rcfile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@tmp_tmp +PREHOOK: query: insert overwrite table tmp_tmp +SELECT TRANSFORM(key, value) USING +'python newline.py' AS key, value FROM src limit 6 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tmp_tmp +POSTHOOK: query: insert overwrite table tmp_tmp +SELECT TRANSFORM(key, value) USING +'python newline.py' AS key, value FROM src limit 6 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tmp_tmp +POSTHOOK: Lineage: tmp_tmp.key SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tmp_tmp.value SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select * from tmp_tmp ORDER BY key ASC, value ASC +PREHOOK: type: QUERY +PREHOOK: Input: default@tmp_tmp +#### A masked pattern was here #### +POSTHOOK: query: select * from tmp_tmp ORDER BY key ASC, value ASC +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tmp_tmp +#### A masked pattern was here #### +POSTHOOK: Lineage: tmp_tmp.key SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tmp_tmp.value SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +1 2 NULL +1 2 NULL +1 NULL +2 NULL +1 NULL +2 NULL +1 NULL +2 NULL +1 NULL +2 NULL +#### A masked pattern was here #### +PREHOOK: query: drop table tmp_tmp +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tmp_tmp +PREHOOK: Output: default@tmp_tmp +POSTHOOK: query: drop table tmp_tmp +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tmp_tmp +POSTHOOK: Output: default@tmp_tmp +POSTHOOK: Lineage: tmp_tmp.key SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tmp_tmp.value SCRIPT [(src)src.FieldSchema(name:key, type:string, comment:default), (src)src.FieldSchema(name:value, type:string, comment:default), ] diff --git service/src/java/org/apache/hive/service/cli/session/HiveSessionImpl.java service/src/java/org/apache/hive/service/cli/session/HiveSessionImpl.java index 3a6231c..7254491 100644 --- service/src/java/org/apache/hive/service/cli/session/HiveSessionImpl.java +++ service/src/java/org/apache/hive/service/cli/session/HiveSessionImpl.java @@ -18,13 +18,19 @@ package org.apache.hive.service.cli.session; +import java.io.File; +import java.io.IOException; import java.util.HashMap; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; +import org.apache.commons.io.FileUtils; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.metastore.HiveMetaStoreClient; import org.apache.hadoop.hive.metastore.IMetaStoreClient; import org.apache.hadoop.hive.metastore.api.MetaException; @@ -64,6 +70,8 @@ private static final String FETCH_WORK_SERDE_CLASS = "org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"; + private static final Log LOG = LogFactory.getLog(HiveSessionImpl.class); + private SessionManager sessionManager; private OperationManager operationManager; @@ -79,7 +87,9 @@ public HiveSessionImpl(String username, String password, Map ses hiveConf.set(entry.getKey(), entry.getValue()); } } - + // set an explicit session name to control the download directory name + hiveConf.set(ConfVars.HIVESESSIONID.varname, + sessionHandle.getHandleIdentifier().toString()); sessionState = new SessionState(hiveConf); } @@ -300,8 +310,11 @@ public void close() throws HiveSQLException { if (null != hiveHist) { hiveHist.closeStream(); } - } finally { + sessionState.close(); + release(); + } catch (IOException ioe) { release(); + throw new HiveSQLException("Failure to close", ioe); } }