Index: shims/src/0.20/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java =================================================================== --- shims/src/0.20/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java (revision 1379535) +++ shims/src/0.20/java/org/apache/hadoop/hive/shims/Hadoop20Shims.java (working copy) @@ -22,6 +22,8 @@ import java.io.IOException; import java.lang.reflect.Constructor; import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; import java.net.URL; import java.security.PrivilegedActionException; import java.security.PrivilegedExceptionAction; @@ -441,26 +443,34 @@ HadoopArchives har = new HadoopArchives(conf); List args = new ArrayList(); - if (conf.get("hive.archive.har.parentdir.settable") == null) { - throw new RuntimeException("hive.archive.har.parentdir.settable is not set"); - } - boolean parentSettable = - conf.getBoolean("hive.archive.har.parentdir.settable", false); + args.add("-archiveName"); + args.add(archiveName); + args.add(sourceDir.toString()); + args.add(destDir.toString()); - if (parentSettable) { - args.add("-archiveName"); - args.add(archiveName); - args.add("-p"); - args.add(sourceDir.toString()); - args.add(destDir.toString()); - } else { - args.add("-archiveName"); - args.add(archiveName); - args.add(sourceDir.toString()); - args.add(destDir.toString()); + return ToolRunner.run(har, args.toArray(new String[0])); + } + + /* + *(non-Javadoc) + * @see org.apache.hadoop.hive.shims.HadoopShims#getHarUri(java.net.URI, java.net.URI, java.net.URI) + * This particular instance is for Hadoop 20 which creates an archive + * with the entire directory path from which one created the archive as + * compared against the one used by Hadoop 1.0 (within HadoopShimsSecure) + * where a relative path is stored within the archive. + */ + public URI getHarUri (URI original, URI base, URI originalBase) + throws URISyntaxException { + URI relative = null; + + String dirInArchive = original.getPath(); + if (dirInArchive.length() > 1 && dirInArchive.charAt(0) == '/') { + dirInArchive = dirInArchive.substring(1); } - return ToolRunner.run(har, args.toArray(new String[0])); + relative = new URI(null, null, dirInArchive, null); + + return base.resolve(relative); } public static class NullOutputCommitter extends OutputCommitter { Index: shims/src/common/java/org/apache/hadoop/hive/shims/HadoopShims.java =================================================================== --- shims/src/common/java/org/apache/hadoop/hive/shims/HadoopShims.java (revision 1379535) +++ shims/src/common/java/org/apache/hadoop/hive/shims/HadoopShims.java (working copy) @@ -21,6 +21,8 @@ import java.io.DataOutput; import java.io.IOException; import java.net.MalformedURLException; +import java.net.URI; +import java.net.URISyntaxException; import java.security.PrivilegedExceptionAction; import java.util.List; @@ -159,6 +161,9 @@ int createHadoopArchive(Configuration conf, Path parentDir, Path destDir, String archiveName) throws Exception; + + public URI getHarUri(URI original, URI base, URI originalBase) + throws URISyntaxException; /** * Hive uses side effect files exclusively for it's output. It also manages * the setup/cleanup/commit of output from the hive client. As a result it does Index: shims/src/common-secure/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java =================================================================== --- shims/src/common-secure/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java (revision 1379535) +++ shims/src/common-secure/java/org/apache/hadoop/hive/shims/HadoopShimsSecure.java (working copy) @@ -21,6 +21,8 @@ import java.io.DataOutput; import java.io.IOException; import java.lang.reflect.Constructor; +import java.net.URI; +import java.net.URISyntaxException; import java.security.PrivilegedExceptionAction; import java.util.ArrayList; import java.util.List; @@ -438,26 +440,31 @@ HadoopArchives har = new HadoopArchives(conf); List args = new ArrayList(); - if (conf.get("hive.archive.har.parentdir.settable") == null) { - throw new RuntimeException("hive.archive.har.parentdir.settable is not set"); - } - boolean parentSettable = - conf.getBoolean("hive.archive.har.parentdir.settable", false); + args.add("-archiveName"); + args.add(archiveName); + args.add("-p"); + args.add(sourceDir.toString()); + args.add(destDir.toString()); - if (parentSettable) { - args.add("-archiveName"); - args.add(archiveName); - args.add("-p"); - args.add(sourceDir.toString()); - args.add(destDir.toString()); - } else { - args.add("-archiveName"); - args.add(archiveName); - args.add(sourceDir.toString()); - args.add(destDir.toString()); + return ToolRunner.run(har, args.toArray(new String[0])); + } + + /* + * This particular instance is for Hadoop 1.0 which creates an archive + * with only the relative path of the archived directory stored within + * the archive as compared to the full path in case of earlier versions. + * See this api in Hadoop20Shims for comparison. + */ + public URI getHarUri(URI original, URI base, URI originalBase) + throws URISyntaxException { + URI relative = originalBase.relativize(original); + if (relative.isAbsolute()) { + throw new URISyntaxException("Couldn't create URI for location.", + "Relative: " + relative + " Base: " + + base + " OriginalBase: " + originalBase); } - return ToolRunner.run(har, args.toArray(new String[0])); + return base.resolve(relative); } public static class NullOutputCommitter extends OutputCommitter { Index: conf/hive-default.xml.template =================================================================== --- conf/hive-default.xml.template (revision 1379535) +++ conf/hive-default.xml.template (working copy) @@ -1044,14 +1044,6 @@ - hive.archive.har.parentdir.settable - false - In new Hadoop versions, the parent directory must be set while - creating a HAR. Because this functionality is hard to detect with just version - numbers, this conf var needs to be set manually. - - - hive.fetch.output.serde org.apache.hadoop.hive.serde2.DelimitedJSONSerDe The serde used by FetchTask to serialize the fetch output. Index: build-common.xml =================================================================== --- build-common.xml (revision 1379535) +++ build-common.xml (working copy) @@ -91,7 +91,7 @@ - + Index: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java =================================================================== --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (revision 1379535) +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (working copy) @@ -542,7 +542,6 @@ // For har files HIVEARCHIVEENABLED("hive.archive.enabled", false), - HIVEHARPARENTDIRSETTABLE("hive.archive.har.parentdir.settable", false), //Enable/Disable gbToIdx rewrite rule HIVEOPTGBYUSINGINDEX("hive.optimize.index.groupby", false), Index: ql/src/test/results/clientpositive/archive.q.out =================================================================== --- ql/src/test/results/clientpositive/archive.q.out (revision 1379535) +++ ql/src/test/results/clientpositive/archive.q.out (working copy) @@ -1,6 +1,10 @@ -PREHOOK: query: drop table tstsrc +PREHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +drop table tstsrc PREHOOK: type: DROPTABLE -POSTHOOK: query: drop table tstsrc +POSTHOOK: query: -- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +drop table tstsrc POSTHOOK: type: DROPTABLE PREHOOK: query: drop table tstsrcpart PREHOOK: type: DROPTABLE @@ -96,17 +100,13 @@ POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] -PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 0.18, 0.19) - -SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +PREHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2 PREHOOK: type: QUERY PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=11 PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 #### A masked pattern was here #### -POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 0.18, 0.19) - -SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2 POSTHOOK: type: QUERY POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=11 Index: ql/src/test/results/clientpositive/archive_excludeHadoop20.q.out =================================================================== --- ql/src/test/results/clientpositive/archive_excludeHadoop20.q.out (revision 0) +++ ql/src/test/results/clientpositive/archive_excludeHadoop20.q.out (revision 0) @@ -0,0 +1,573 @@ +PREHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +drop table tstsrc +PREHOOK: type: DROPTABLE +POSTHOOK: query: -- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +drop table tstsrc +POSTHOOK: type: DROPTABLE +PREHOOK: query: drop table tstsrcpart +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table tstsrcpart +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table tstsrc like src +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table tstsrc like src +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@tstsrc +PREHOOK: query: insert overwrite table tstsrc select key, value from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@tstsrc +POSTHOOK: query: insert overwrite table tstsrc select key, value from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@tstsrc +POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: create table tstsrcpart (key string, value string) partitioned by (ds string, hr string) clustered by (key) into 10 buckets +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table tstsrcpart (key string, value string) partitioned by (ds string, hr string) clustered by (key) into 10 buckets +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@tstsrcpart +POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='11') +select key, value from srcpart where ds='2008-04-08' and hr='11' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=11 +POSTHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='11') +select key, value from srcpart where ds='2008-04-08' and hr='11' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=11 +POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='12') +select key, value from srcpart where ds='2008-04-08' and hr='12' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 +POSTHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='12') +select key, value from srcpart where ds='2008-04-08' and hr='12' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 +POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='11') +select key, value from srcpart where ds='2008-04-09' and hr='11' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Output: default@tstsrcpart@ds=2008-04-09/hr=11 +POSTHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='11') +select key, value from srcpart where ds='2008-04-09' and hr='11' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@tstsrcpart@ds=2008-04-09/hr=11 +POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='12') +select key, value from srcpart where ds='2008-04-09' and hr='12' +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@tstsrcpart@ds=2008-04-09/hr=12 +POSTHOOK: query: insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='12') +select key, value from srcpart where ds='2008-04-09' and hr='12' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@tstsrcpart@ds=2008-04-09/hr=12 +POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2 +PREHOOK: type: QUERY +PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +48479881068 +PREHOOK: query: ALTER TABLE tstsrcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12') +PREHOOK: type: ALTERTABLE_ARCHIVE +PREHOOK: Input: default@tstsrcpart +PREHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 +POSTHOOK: query: ALTER TABLE tstsrcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12') +POSTHOOK: type: ALTERTABLE_ARCHIVE +POSTHOOK: Input: default@tstsrcpart +POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 +POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2 +PREHOOK: type: QUERY +PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +48479881068 +PREHOOK: query: SELECT key, count(1) FROM tstsrcpart WHERE ds='2008-04-08' AND hr='12' AND key='0' GROUP BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key, count(1) FROM tstsrcpart WHERE ds='2008-04-08' AND hr='12' AND key='0' GROUP BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +0 3 +PREHOOK: query: SELECT * FROM tstsrcpart a JOIN tstsrc b ON a.key=b.key +WHERE a.ds='2008-04-08' AND a.hr='12' AND a.key='0' +PREHOOK: type: QUERY +PREHOOK: Input: default@tstsrc +PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM tstsrcpart a JOIN tstsrc b ON a.key=b.key +WHERE a.ds='2008-04-08' AND a.hr='12' AND a.key='0' +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tstsrc +POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +0 val_0 2008-04-08 12 0 val_0 +0 val_0 2008-04-08 12 0 val_0 +0 val_0 2008-04-08 12 0 val_0 +0 val_0 2008-04-08 12 0 val_0 +0 val_0 2008-04-08 12 0 val_0 +0 val_0 2008-04-08 12 0 val_0 +0 val_0 2008-04-08 12 0 val_0 +0 val_0 2008-04-08 12 0 val_0 +0 val_0 2008-04-08 12 0 val_0 +PREHOOK: query: ALTER TABLE tstsrcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12') +PREHOOK: type: ALTERTABLE_UNARCHIVE +PREHOOK: Input: default@tstsrcpart +PREHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 +POSTHOOK: query: ALTER TABLE tstsrcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12') +POSTHOOK: type: ALTERTABLE_UNARCHIVE +POSTHOOK: Input: default@tstsrcpart +POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 +POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2 +PREHOOK: type: QUERY +PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@tstsrcpart@ds=2008-04-08/hr=12 +#### A masked pattern was here #### +POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +48479881068 +PREHOOK: query: CREATE TABLE harbucket(key INT) +PARTITIONED by (ds STRING) +CLUSTERED BY (key) INTO 10 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE harbucket(key INT) +PARTITIONED by (ds STRING) +CLUSTERED BY (key) INTO 10 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@harbucket +POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE harbucket PARTITION(ds='1') SELECT CAST(key AS INT) AS a FROM tstsrc WHERE key < 50 +PREHOOK: type: QUERY +PREHOOK: Input: default@tstsrc +PREHOOK: Output: default@harbucket@ds=1 +POSTHOOK: query: INSERT OVERWRITE TABLE harbucket PARTITION(ds='1') SELECT CAST(key AS INT) AS a FROM tstsrc WHERE key < 50 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tstsrc +POSTHOOK: Output: default@harbucket@ds=1 +POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@harbucket@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@harbucket@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +0 +0 +0 +10 +20 +30 +PREHOOK: query: ALTER TABLE tstsrcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12') +PREHOOK: type: ALTERTABLE_ARCHIVE +PREHOOK: Input: default@tstsrcpart +PREHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 +POSTHOOK: query: ALTER TABLE tstsrcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12') +POSTHOOK: type: ALTERTABLE_ARCHIVE +POSTHOOK: Input: default@tstsrcpart +POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 +POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@harbucket@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@harbucket@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +0 +0 +0 +10 +20 +30 +PREHOOK: query: ALTER TABLE tstsrcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12') +PREHOOK: type: ALTERTABLE_UNARCHIVE +PREHOOK: Input: default@tstsrcpart +PREHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 +POSTHOOK: query: ALTER TABLE tstsrcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12') +POSTHOOK: type: ALTERTABLE_UNARCHIVE +POSTHOOK: Input: default@tstsrcpart +POSTHOOK: Output: default@tstsrcpart@ds=2008-04-08/hr=12 +POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key +PREHOOK: type: QUERY +PREHOOK: Input: default@harbucket@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@harbucket@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +0 +0 +0 +10 +20 +30 +PREHOOK: query: CREATE TABLE old_name(key INT) +PARTITIONED by (ds STRING) +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE old_name(key INT) +PARTITIONED by (ds STRING) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@old_name +POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: INSERT OVERWRITE TABLE old_name PARTITION(ds='1') SELECT CAST(key AS INT) AS a FROM tstsrc WHERE key < 50 +PREHOOK: type: QUERY +PREHOOK: Input: default@tstsrc +PREHOOK: Output: default@old_name@ds=1 +POSTHOOK: query: INSERT OVERWRITE TABLE old_name PARTITION(ds='1') SELECT CAST(key AS INT) AS a FROM tstsrc WHERE key < 50 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@tstsrc +POSTHOOK: Output: default@old_name@ds=1 +POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: old_name PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: ALTER TABLE old_name ARCHIVE PARTITION (ds='1') +PREHOOK: type: ALTERTABLE_ARCHIVE +PREHOOK: Input: default@old_name +PREHOOK: Output: default@old_name@ds=1 +POSTHOOK: query: ALTER TABLE old_name ARCHIVE PARTITION (ds='1') +POSTHOOK: type: ALTERTABLE_ARCHIVE +POSTHOOK: Input: default@old_name +POSTHOOK: Output: default@old_name@ds=1 +POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: old_name PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +FROM (SELECT * FROM old_name WHERE ds='1') subq1) subq2 +PREHOOK: type: QUERY +PREHOOK: Input: default@old_name@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +FROM (SELECT * FROM old_name WHERE ds='1') subq1) subq2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@old_name@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: old_name PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +48656137 +PREHOOK: query: ALTER TABLE old_name RENAME TO new_name +PREHOOK: type: ALTERTABLE_RENAME +PREHOOK: Input: default@old_name +PREHOOK: Output: default@old_name +POSTHOOK: query: ALTER TABLE old_name RENAME TO new_name +POSTHOOK: type: ALTERTABLE_RENAME +POSTHOOK: Input: default@old_name +POSTHOOK: Output: default@new_name +POSTHOOK: Output: default@old_name +POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: old_name PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +FROM (SELECT * FROM new_name WHERE ds='1') subq1) subq2 +PREHOOK: type: QUERY +PREHOOK: Input: default@new_name@ds=1 +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +FROM (SELECT * FROM new_name WHERE ds='1') subq1) subq2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@new_name@ds=1 +#### A masked pattern was here #### +POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: old_name PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +48656137 +PREHOOK: query: drop table tstsrc +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tstsrc +PREHOOK: Output: default@tstsrc +POSTHOOK: query: drop table tstsrc +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tstsrc +POSTHOOK: Output: default@tstsrc +POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: old_name PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: drop table tstsrcpart +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@tstsrcpart +PREHOOK: Output: default@tstsrcpart +POSTHOOK: query: drop table tstsrcpart +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@tstsrcpart +POSTHOOK: Output: default@tstsrcpart +POSTHOOK: Lineage: harbucket PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: old_name PARTITION(ds=1).key EXPRESSION [(tstsrc)tstsrc.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrc.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-08,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=11).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: tstsrcpart PARTITION(ds=2008-04-09,hr=12).value SIMPLE [(srcpart)srcpart.FieldSchema(name:value, type:string, comment:default), ] Index: ql/src/test/queries/clientpositive/archive_excludeHadoop20.q =================================================================== --- ql/src/test/queries/clientpositive/archive_excludeHadoop20.q (revision 0) +++ ql/src/test/queries/clientpositive/archive_excludeHadoop20.q (revision 0) @@ -0,0 +1,69 @@ +set hive.archive.enabled = true; +set hive.enforce.bucketing = true; + +-- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + +drop table tstsrc; +drop table tstsrcpart; + +create table tstsrc like src; +insert overwrite table tstsrc select key, value from src; + +create table tstsrcpart (key string, value string) partitioned by (ds string, hr string) clustered by (key) into 10 buckets; + +insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='11') +select key, value from srcpart where ds='2008-04-08' and hr='11'; + +insert overwrite table tstsrcpart partition (ds='2008-04-08', hr='12') +select key, value from srcpart where ds='2008-04-08' and hr='12'; + +insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='11') +select key, value from srcpart where ds='2008-04-09' and hr='11'; + +insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='12') +select key, value from srcpart where ds='2008-04-09' and hr='12'; + +SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2; + +ALTER TABLE tstsrcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12'); + +SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2; + +SELECT key, count(1) FROM tstsrcpart WHERE ds='2008-04-08' AND hr='12' AND key='0' GROUP BY key; + +SELECT * FROM tstsrcpart a JOIN tstsrc b ON a.key=b.key +WHERE a.ds='2008-04-08' AND a.hr='12' AND a.key='0'; + +ALTER TABLE tstsrcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12'); + +SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2; + +CREATE TABLE harbucket(key INT) +PARTITIONED by (ds STRING) +CLUSTERED BY (key) INTO 10 BUCKETS; + +INSERT OVERWRITE TABLE harbucket PARTITION(ds='1') SELECT CAST(key AS INT) AS a FROM tstsrc WHERE key < 50; + +SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key; +ALTER TABLE tstsrcpart ARCHIVE PARTITION (ds='2008-04-08', hr='12'); +SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key; +ALTER TABLE tstsrcpart UNARCHIVE PARTITION (ds='2008-04-08', hr='12'); +SELECT key FROM harbucket TABLESAMPLE(BUCKET 1 OUT OF 10) SORT BY key; + + +CREATE TABLE old_name(key INT) +PARTITIONED by (ds STRING); + +INSERT OVERWRITE TABLE old_name PARTITION(ds='1') SELECT CAST(key AS INT) AS a FROM tstsrc WHERE key < 50; +ALTER TABLE old_name ARCHIVE PARTITION (ds='1'); +SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +FROM (SELECT * FROM old_name WHERE ds='1') subq1) subq2; +ALTER TABLE old_name RENAME TO new_name; +SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col +FROM (SELECT * FROM new_name WHERE ds='1') subq1) subq2; + +drop table tstsrc; +drop table tstsrcpart; Index: ql/src/test/queries/clientpositive/archive.q =================================================================== --- ql/src/test/queries/clientpositive/archive.q (revision 1379535) +++ ql/src/test/queries/clientpositive/archive.q (working copy) @@ -1,6 +1,8 @@ set hive.archive.enabled = true; set hive.enforce.bucketing = true; +-- INCLUDE_HADOOP_MAJOR_VERSIONS(0.20) + drop table tstsrc; drop table tstsrcpart; @@ -21,8 +23,6 @@ insert overwrite table tstsrcpart partition (ds='2008-04-09', hr='12') select key, value from srcpart where ds='2008-04-09' and hr='12'; --- EXCLUDE_HADOOP_MAJOR_VERSIONS(0.17, 0.18, 0.19) - SELECT SUM(hash(col)) FROM (SELECT transform(*) using 'tr "\t" "_"' AS col FROM (SELECT * FROM tstsrcpart WHERE ds='2008-04-08') subq1) subq2; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ArchiveUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ArchiveUtils.java (revision 1379535) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ArchiveUtils.java (working copy) @@ -39,6 +39,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.Partition; import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.shims.HadoopShims; /** * ArchiveUtils. @@ -129,7 +130,6 @@ * HarPathHelper helps to create har:/ URIs for locations inside of archive. */ public static class HarPathHelper { - boolean parentSettable; private final URI base, originalBase; /** @@ -138,12 +138,11 @@ * @param originalBase directory for which Hadoop archive was created */ public HarPathHelper(HiveConf hconf, URI archive, URI originalBase) throws HiveException { - parentSettable = hconf.getBoolVar(HiveConf.ConfVars.HIVEHARPARENTDIRSETTABLE); this.originalBase = addSlash(originalBase); String parentHost = archive.getHost(); String harHost = null; if (parentHost == null) { - harHost = archive.getScheme(); + harHost = archive.getScheme() + "-localhost"; } else { harHost = archive.getScheme() + "-" + parentHost; } @@ -164,44 +163,15 @@ } } - /** - * Creates har URI for file/directory that was put there when creating HAR. - * - * With older versions of Hadoop, archiving a directory would produce - * the same directory structure, reflecting absoulute paths. - * If you created myArchive.har of /tmp/myDir the files in /tmp/myDir - * will be located under myArchive.har/tmp/myDir/* - * - * With newer versions, the parent directory can be specified. Assuming - * the parent directory was set to /tmp/myDir when creating the archive, - * the files can be found under myArchive.har/* - * - * This is why originalBase is argument - with new versions we can - * relativize URI, in older we keep absolute one. - * - * @param original file/directory path - * @return absolute HAR uri - */ - public URI getHarUri(URI original) throws HiveException { - URI relative = null; - if (!parentSettable) { - String dirInArchive = original.getPath(); - if(dirInArchive.length() > 1 && dirInArchive.charAt(0)=='/') { - dirInArchive = dirInArchive.substring(1); - } - try { - relative = new URI(null, null, dirInArchive, null); - } catch (URISyntaxException e) { - throw new HiveException("Couldn't create har URI for location"); - } // relative URI with path only + public URI getHarUri(URI original, HadoopShims shim) throws HiveException { + URI harUri = null; + try { + harUri = shim.getHarUri(original, base, originalBase); + } catch (URISyntaxException e) { + throw new HiveException("Couldn't create har URI for location", e); } - else { - relative = originalBase.relativize(original); - if(relative.isAbsolute()) { - throw new HiveException("Unable to relativize URI"); - } - } - return base.resolve(relative); + + return harUri; } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (revision 1379535) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DDLTask.java (working copy) @@ -1324,6 +1324,7 @@ // ARCHIVE_INTERMEDIATE_DIR_SUFFIX that's the same level as the partition, // if it does not already exist. If it does exist, we assume the dir is good // to use as the move operation that created it is atomic. + HadoopShims shim = ShimLoader.getHadoopShims(); if (!pathExists(intermediateArchivedDir) && !pathExists(intermediateOriginalDir)) { @@ -1338,7 +1339,6 @@ console.printInfo("Please wait... (this may take a while)"); // Create the Hadoop archive - HadoopShims shim = ShimLoader.getHadoopShims(); int ret=0; try { int maxJobNameLen = conf.getIntVar(HiveConf.ConfVars.HIVEJOBNAMELENGTH); @@ -1353,6 +1353,7 @@ if (ret != 0) { throw new HiveException("Error while creating HAR"); } + // Move from the tmp dir to an intermediate directory, in the same level as // the partition directory. e.g. .../hr=12-intermediate-archived try { @@ -1406,7 +1407,8 @@ try { for(Partition p: partitions) { URI originalPartitionUri = ArchiveUtils.addSlash(p.getPartitionPath().toUri()); - URI harPartitionDir = harHelper.getHarUri(originalPartitionUri); + URI test = p.getPartitionPath().toUri(); + URI harPartitionDir = harHelper.getHarUri(originalPartitionUri, shim); Path harPath = new Path(harPartitionDir.getScheme(), harPartitionDir.getAuthority(), harPartitionDir.getPath()); // make in Path to ensure no slash at the end @@ -1508,7 +1510,8 @@ URI archiveUri = archivePath.toUri(); ArchiveUtils.HarPathHelper harHelper = new ArchiveUtils.HarPathHelper(conf, archiveUri, originalUri); - URI sourceUri = harHelper.getHarUri(originalUri); + HadoopShims shim = ShimLoader.getHadoopShims(); + URI sourceUri = harHelper.getHarUri(originalUri, shim); Path sourceDir = new Path(sourceUri.getScheme(), sourceUri.getAuthority(), sourceUri.getPath()); if(!pathExists(intermediateArchivedDir) && !pathExists(archivePath)) {