Index: ql/src/java/org/apache/hadoop/hive/ql/Driver.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/Driver.java (revision 986571) +++ ql/src/java/org/apache/hadoop/hive/ql/Driver.java (working copy) @@ -53,6 +53,7 @@ import org.apache.hadoop.hive.ql.history.HiveHistory.Keys; import org.apache.hadoop.hive.ql.hooks.PostExecute; import org.apache.hadoop.hive.ql.hooks.PreExecute; +import org.apache.hadoop.hive.ql.io.IOPrepareCache; import org.apache.hadoop.hive.ql.parse.ASTNode; import org.apache.hadoop.hive.ql.parse.BaseSemanticAnalyzer; import org.apache.hadoop.hive.ql.parse.ErrorMsg; @@ -462,6 +463,8 @@ Map running = new HashMap(); DriverContext driverCxt = new DriverContext(runnable, ctx); + IOPrepareCache ioPrepareCache = IOPrepareCache.get(); + ioPrepareCache.clear(); // Add root Tasks to runnable Index: ql/src/java/org/apache/hadoop/hive/ql/DriverContext.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/DriverContext.java (revision 986571) +++ ql/src/java/org/apache/hadoop/hive/ql/DriverContext.java (working copy) @@ -22,7 +22,6 @@ import java.util.LinkedList; import java.util.Queue; -import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.Task; import org.apache.hadoop.mapred.JobConf; Index: ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java (revision 986571) +++ ql/src/java/org/apache/hadoop/hive/ql/io/CombineHiveInputFormat.java (working copy) @@ -97,7 +97,9 @@ // CombinedSplit. Path[] ipaths = inputSplitShim.getPaths(); if (ipaths.length > 0) { - PartitionDesc part = getPartitionDescFromPath(pathToPartitionInfo, ipaths[0]); + PartitionDesc part = HiveFileFormatUtils + .getPartitionDescFromPathRecursively(pathToPartitionInfo, + ipaths[0], IOPrepareCache.get().getPartitionDescMap()); inputFormatClassName = part.getInputFileFormatClass().getName(); } } @@ -198,8 +200,8 @@ // extract all the inputFormatClass names for each chunk in the // CombinedSplit. - PartitionDesc part = getPartitionDescFromPath(pathToPartitionInfo, - inputSplitShim.getPath(0)); + PartitionDesc part = HiveFileFormatUtils.getPartitionDescFromPathRecursively(pathToPartitionInfo, + inputSplitShim.getPath(0), IOPrepareCache.get().getPartitionDescMap()); // create a new InputFormat instance if this is the first time to see // this class @@ -209,7 +211,7 @@ out.writeUTF(inputFormatClassName); } } - + /** * Create Hive splits based on CombineFileSplit. */ @@ -235,7 +237,8 @@ Set poolSet = new HashSet(); for (Path path : paths) { - PartitionDesc part = getPartitionDescFromPath(pathToPartitionInfo, path); + PartitionDesc part = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + pathToPartitionInfo, path, IOPrepareCache.get().allocatePartitionDescMap()); TableDesc tableDesc = part.getTableDesc(); if ((tableDesc != null) && tableDesc.isNonNative()) { return super.getSplits(job, numSplits); @@ -346,64 +349,6 @@ CombineHiveRecordReader.class); } - protected static PartitionDesc getPartitionDescFromPath( - Map pathToPartitionInfo, Path dir) throws IOException { - - // We first do exact match, and then do prefix matching. The latter is due to input dir - // could be /dir/ds='2001-02-21'/part-03 where part-03 is not part of partition - String dirPath = dir.toUri().getPath(); - PartitionDesc part = pathToPartitionInfo.get(dir.toString()); - if (part == null) { - // LOG.warn("exact match not found, try ripping input path's theme and authority"); - part = pathToPartitionInfo.get(dirPath); - } - if (part == null) { - - // LOG.warn("still does not found just the path part: " + dirPath + " in pathToPartitionInfo." - // + " Will try prefix matching"); - for (Map.Entry entry : pathToPartitionInfo.entrySet()) { - String keyPath = entry.getKey(); - String dirStr = dir.toString(); - // keyPath could start with hdfs:// or not, so we need to match both cases. - if (dirStr.startsWith(keyPath)) { - part = entry.getValue(); - break; - } else { - Path p = new Path(keyPath); - String newP = p.toUri().getPath().toString(); - if (dirStr.startsWith(newP)) { - part = entry.getValue(); - break; - } - // This case handles the situation where dir is a fully qualified - // subdirectory of a path in pathToPartitionInfo. e.g. - // dir = hdfs://host:9000/user/warehouse/tableName/abc - // pathToPartitionInfo = {/user/warehouse/tableName : myPart} - // In such a case, just compare the path components. - - // This could result in aliasing if we have a case where - // two entries in pathToPartitionInfo differ only by scheme - // or authority, but this problem exists anyway in the above checks. - - // This check was precipitated by changes that allow recursive dirs - // in the input path, and an upcoming change to CombineFileInputFormat - // where the paths in splits no longer have the scheme and authority - // stripped out. - if (dirPath.startsWith(newP)) { - part = entry.getValue(); - break; - } - } - } - } - if (part != null) { - return part; - } else { - throw new IOException("cannot find dir = " + dir.toString() - + " in partToPartitionInfo: " + pathToPartitionInfo.keySet()); - } - } - static class CombineFilter implements PathFilter { private final String pString; Index: ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java (revision 986571) +++ ql/src/java/org/apache/hadoop/hive/ql/io/HiveFileFormatUtils.java (working copy) @@ -18,6 +18,7 @@ package org.apache.hadoop.hive.ql.io; +import java.io.File; import java.io.IOException; import java.util.ArrayList; import java.util.HashMap; @@ -33,6 +34,7 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.FileSinkDesc; +import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.io.SequenceFile.CompressionType; import org.apache.hadoop.io.Writable; @@ -242,6 +244,86 @@ } return null; } + + public static PartitionDesc getPartitionDescFromPathRecursively( + Map pathToPartitionInfo, Path dir, + Map, Map> cacheMap) + throws IOException { + + PartitionDesc part = doGetPartitionDescFromPath(pathToPartitionInfo, dir); + if (part == null + && (dir.toUri().getScheme() == null || dir.toUri().getScheme().trim() + .equals(""))) { + + Map newPathToPartitionInfo = null; + if (cacheMap != null) { + newPathToPartitionInfo = cacheMap.get(pathToPartitionInfo); + } + + if (newPathToPartitionInfo == null) { // still null + newPathToPartitionInfo = new HashMap(); + populateNewPartitionDesc(pathToPartitionInfo, newPathToPartitionInfo); + + if (cacheMap != null) { + cacheMap.put(pathToPartitionInfo, newPathToPartitionInfo); + } + } + part = doGetPartitionDescFromPath(newPathToPartitionInfo, dir); + } + + if (part != null) { + return part; + } else { + throw new IOException("cannot find dir = " + dir.toString() + + " in partToPartitionInfo: " + pathToPartitionInfo.keySet()); + } + } + + private static void populateNewPartitionDesc( + Map pathToPartitionInfo, + Map newPathToPartitionInfo) { + for (Map.Entry entry: pathToPartitionInfo.entrySet()) { + String entryKey = entry.getKey(); + PartitionDesc partDesc = entry.getValue(); + Path newP = new Path(entryKey); + String pathOnly = newP.toUri().getPath(); + newPathToPartitionInfo.put(pathOnly, partDesc); + } + } + + private static PartitionDesc doGetPartitionDescFromPath( + Map pathToPartitionInfo, Path dir) { + // We first do exact match, and then do prefix matching. The latter is due to input dir + // could be /dir/ds='2001-02-21'/part-03 where part-03 is not part of partition + String dirPath = dir.toUri().getPath(); + PartitionDesc part = pathToPartitionInfo.get(dir.toString()); + if (part == null) { + // LOG.warn("exact match not found, try ripping input path's theme and authority"); + part = pathToPartitionInfo.get(dirPath); + } + + if (part == null) { + String dirStr = dir.toString(); + int dirPathIndex = dirPath.lastIndexOf(File.separator); + int dirStrIndex = dirStr.lastIndexOf(File.separator); + while (dirPathIndex >= 0 && dirStrIndex >= 0) { + dirStr = dirStr.substring(0, dirStrIndex); + dirPath = dirPath.substring(0, dirPathIndex); + //first try full match + part = pathToPartitionInfo.get(dirStr); + if (part == null) { + // LOG.warn("exact match not found, try ripping input path's theme and authority"); + part = pathToPartitionInfo.get(dirPath); + } + if (part != null) { + break; + } + dirPathIndex = dirPath.lastIndexOf(File.separator); + dirStrIndex = dirStr.lastIndexOf(File.separator); + } + } + return part; + } private HiveFileFormatUtils() { // prevent instantiation Index: ql/src/java/org/apache/hadoop/hive/ql/io/IOPrepareCache.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/IOPrepareCache.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/io/IOPrepareCache.java (revision 0) @@ -0,0 +1,69 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io; + +import java.util.HashMap; +import java.util.Map; + +import org.apache.hadoop.hive.ql.plan.PartitionDesc; + +/** + * IOPrepareCache is used to cache pre-query io-related objects. + * It should be cleared every time a new query issued. + * + */ +public class IOPrepareCache { + + private static ThreadLocal threadLocalIOPrepareCache = new ThreadLocal(); + + public static IOPrepareCache get() { + IOPrepareCache cache = IOPrepareCache.threadLocalIOPrepareCache.get(); + if (cache == null) { + threadLocalIOPrepareCache.set(new IOPrepareCache()); + cache = IOPrepareCache.threadLocalIOPrepareCache.get(); + } + + return cache; + } + + public void clear() { + if(partitionDescMap != null) { + partitionDescMap.clear(); + } + } + + private Map, Map> partitionDescMap; + + public Map, Map> allocatePartitionDescMap() { + if (partitionDescMap == null) { + partitionDescMap = new HashMap, Map>(); + } + return partitionDescMap; + } + + public Map, Map> getPartitionDescMap() { + return partitionDescMap; + } + + public void setPartitionDescMap( + Map, Map> partitionDescMap) { + this.partitionDescMap = partitionDescMap; + } + +} Index: ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveFileFormatUtils.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveFileFormatUtils.java (revision 0) +++ ql/src/test/org/apache/hadoop/hive/ql/io/TestHiveFileFormatUtils.java (revision 0) @@ -0,0 +1,133 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io; + +import java.io.IOException; +import java.util.HashMap; +import java.util.Map; + +import junit.framework.TestCase; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.plan.PartitionDesc; + +public class TestHiveFileFormatUtils extends TestCase { + + public void testGetPartitionDescFromPathRecursively() throws IOException { + + PartitionDesc partDesc_3 = new PartitionDesc(); + PartitionDesc partDesc_4 = new PartitionDesc(); + PartitionDesc partDesc_5 = new PartitionDesc(); + PartitionDesc partDesc_6 = new PartitionDesc(); + + Map pathToPartitionInfo = new HashMap(); + + pathToPartitionInfo.put( + new Path("file:///tbl/par1/part2/part3").toString(), partDesc_3); + pathToPartitionInfo.put(new Path("/tbl/par1/part2/part4").toString(), + partDesc_4); + pathToPartitionInfo.put(new Path("/tbl/par1/part2/part5/").toString(), + partDesc_5); + pathToPartitionInfo.put(new Path("hdfs:///tbl/par1/part2/part6/") + .toString(), partDesc_6); + + // first group + PartitionDesc ret = null; + + ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + pathToPartitionInfo, new Path("file:///tbl/par1/part2/part3"), + IOPrepareCache.get().allocatePartitionDescMap()); + assertEquals("file:///tbl/par1/part2/part3 not found.", partDesc_3, ret); + + ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + pathToPartitionInfo, new Path("/tbl/par1/part2/part3"), + IOPrepareCache.get().allocatePartitionDescMap()); + assertEquals("/tbl/par1/part2/part3 not found.", partDesc_3, ret); + + boolean exception = false; + try { + ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + pathToPartitionInfo, new Path("hdfs:///tbl/par1/part2/part3"), + IOPrepareCache.get().allocatePartitionDescMap()); + } catch (IOException e) { + exception = true; + } + assertEquals("hdfs:///tbl/par1/part2/part3 should return null", true, + exception); + exception = false; + + // second group + ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + pathToPartitionInfo, new Path("file:///tbl/par1/part2/part4"), + IOPrepareCache.get().allocatePartitionDescMap()); + assertEquals("file:///tbl/par1/part2/part4 not found.", partDesc_4, ret); + + ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + pathToPartitionInfo, new Path("/tbl/par1/part2/part4"), + IOPrepareCache.get().allocatePartitionDescMap()); + assertEquals("/tbl/par1/part2/part4 not found.", partDesc_4, ret); + + ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + pathToPartitionInfo, new Path("hdfs:///tbl/par1/part2/part4"), + IOPrepareCache.get().allocatePartitionDescMap()); + + assertEquals("hdfs:///tbl/par1/part2/part4 should not found", partDesc_4, + ret); + + // third group + ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + pathToPartitionInfo, new Path("file:///tbl/par1/part2/part5"), + IOPrepareCache.get().allocatePartitionDescMap()); + assertEquals("file:///tbl/par1/part2/part5 not found.", partDesc_5, ret); + + ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + pathToPartitionInfo, new Path("/tbl/par1/part2/part5"), + IOPrepareCache.get().allocatePartitionDescMap()); + assertEquals("/tbl/par1/part2/part5 not found.", partDesc_5, ret); + + ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + pathToPartitionInfo, new Path("hdfs:///tbl/par1/part2/part5"), + IOPrepareCache.get().allocatePartitionDescMap()); + assertEquals("hdfs:///tbl/par1/part2/part5 not found", partDesc_5, ret); + + // fourth group + try { + ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + pathToPartitionInfo, new Path("file:///tbl/par1/part2/part6"), + IOPrepareCache.get().allocatePartitionDescMap()); + } catch (IOException e) { + exception = true; + } + assertEquals("file:///tbl/par1/part2/part6 should return null", true, + exception); + exception = false; + + ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + pathToPartitionInfo, new Path("/tbl/par1/part2/part6"), + IOPrepareCache.get().allocatePartitionDescMap()); + assertEquals("/tbl/par1/part2/part6 not found.", partDesc_6, ret); + + ret = HiveFileFormatUtils.getPartitionDescFromPathRecursively( + pathToPartitionInfo, new Path("hdfs:///tbl/par1/part2/part6"), + IOPrepareCache.get().allocatePartitionDescMap()); + assertEquals("hdfs:///tbl/par1/part2/part6 not found.", partDesc_6, ret); + + } + +} Index: ql/src/test/queries/clientpositive/combine3.q =================================================================== --- ql/src/test/queries/clientpositive/combine3.q (revision 0) +++ ql/src/test/queries/clientpositive/combine3.q (revision 0) @@ -0,0 +1,41 @@ +set hive.exec.compress.output = true; +set hive.input.format=org.apache.hadoop.hive.ql.io.CombineHiveInputFormat; +set mapred.min.split.size=256; +set mapred.min.split.size.per.node=256; +set mapred.min.split.size.per.rack=256; +set mapred.max.split.size=256; + + +drop table combine_3_srcpart_seq_rc; + +create table combine_3_srcpart_seq_rc (key int , value string) partitioned by (ds string, hr string) stored as sequencefile; + +insert overwrite table combine_3_srcpart_seq_rc partition (ds="2010-08-03", hr="00") select * from src; + +alter table combine_3_srcpart_seq_rc set fileformat rcfile; +insert overwrite table combine_3_srcpart_seq_rc partition (ds="2010-08-03", hr="001") select * from src; + +desc extended combine_3_srcpart_seq_rc partition(ds="2010-08-03", hr="00"); +desc extended combine_3_srcpart_seq_rc partition(ds="2010-08-03", hr="001"); + +select key, value, ds, hr from combine_3_srcpart_seq_rc where ds="2010-08-03" order by key, hr limit 30; + +set hive.enforce.bucketing = true; +set hive.exec.reducers.max = 1; + +drop table bucket3_1; +CREATE TABLE combine_3_srcpart_seq_rc_bucket(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS stored as sequencefile; + +insert overwrite table combine_3_srcpart_seq_rc_bucket partition (ds='1') +select * from src; + +alter table combine_3_srcpart_seq_rc_bucket set fileformat rcfile; + +insert overwrite table combine_3_srcpart_seq_rc_bucket partition (ds='11') +select * from src; + +select key, ds from combine_3_srcpart_seq_rc_bucket tablesample (bucket 1 out of 2) s where ds = '1' or ds= '11' order by key, ds limit 30; + +drop table combine_3_srcpart_seq_rc_bucket; + +drop table combine_3_srcpart_seq_rc; Index: ql/src/test/results/clientpositive/combine3.q.out =================================================================== --- ql/src/test/results/clientpositive/combine3.q.out (revision 0) +++ ql/src/test/results/clientpositive/combine3.q.out (revision 0) @@ -0,0 +1,258 @@ +PREHOOK: query: drop table combine_3_srcpart_seq_rc +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table combine_3_srcpart_seq_rc +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table combine_3_srcpart_seq_rc (key int , value string) partitioned by (ds string, hr string) stored as sequencefile +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table combine_3_srcpart_seq_rc (key int , value string) partitioned by (ds string, hr string) stored as sequencefile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@combine_3_srcpart_seq_rc +PREHOOK: query: insert overwrite table combine_3_srcpart_seq_rc partition (ds="2010-08-03", hr="00") select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@combine_3_srcpart_seq_rc@ds=2010-08-03/hr=00 +POSTHOOK: query: insert overwrite table combine_3_srcpart_seq_rc partition (ds="2010-08-03", hr="00") select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@combine_3_srcpart_seq_rc@ds=2010-08-03/hr=00 +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=00).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=00).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: alter table combine_3_srcpart_seq_rc set fileformat rcfile +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@combine_3_srcpart_seq_rc +PREHOOK: Output: default@combine_3_srcpart_seq_rc +POSTHOOK: query: alter table combine_3_srcpart_seq_rc set fileformat rcfile +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@combine_3_srcpart_seq_rc +POSTHOOK: Output: default@combine_3_srcpart_seq_rc +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=00).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=00).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table combine_3_srcpart_seq_rc partition (ds="2010-08-03", hr="001") select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@combine_3_srcpart_seq_rc@ds=2010-08-03/hr=001 +POSTHOOK: query: insert overwrite table combine_3_srcpart_seq_rc partition (ds="2010-08-03", hr="001") select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@combine_3_srcpart_seq_rc@ds=2010-08-03/hr=001 +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=001).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=001).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=00).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=00).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: desc extended combine_3_srcpart_seq_rc partition(ds="2010-08-03", hr="00") +PREHOOK: type: DESCTABLE +POSTHOOK: query: desc extended combine_3_srcpart_seq_rc partition(ds="2010-08-03", hr="00") +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=001).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=001).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=00).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=00).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +key int +value string +ds string +hr string + +Detailed Partition Information Partition(values:[2010-08-03, 00], dbName:default, tableName:combine_3_srcpart_seq_rc, createTime:1282111568, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:pfile:/Users/heyongqiang/Documents/workspace/Hive-Index/build/ql/test/data/warehouse/combine_3_srcpart_seq_rc/ds=2010-08-03/hr=00, inputFormat:org.apache.hadoop.mapred.SequenceFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}), parameters:{transient_lastDdlTime=1282111568}) +PREHOOK: query: desc extended combine_3_srcpart_seq_rc partition(ds="2010-08-03", hr="001") +PREHOOK: type: DESCTABLE +POSTHOOK: query: desc extended combine_3_srcpart_seq_rc partition(ds="2010-08-03", hr="001") +POSTHOOK: type: DESCTABLE +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=001).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=001).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=00).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=00).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +key int +value string +ds string +hr string + +Detailed Partition Information Partition(values:[2010-08-03, 001], dbName:default, tableName:combine_3_srcpart_seq_rc, createTime:1282111572, lastAccessTime:0, sd:StorageDescriptor(cols:[FieldSchema(name:key, type:int, comment:null), FieldSchema(name:value, type:string, comment:null)], location:pfile:/Users/heyongqiang/Documents/workspace/Hive-Index/build/ql/test/data/warehouse/combine_3_srcpart_seq_rc/ds=2010-08-03/hr=001, inputFormat:org.apache.hadoop.hive.ql.io.RCFileInputFormat, outputFormat:org.apache.hadoop.hive.ql.io.RCFileOutputFormat, compressed:false, numBuckets:-1, serdeInfo:SerDeInfo(name:null, serializationLib:org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe, parameters:{serialization.format=1}), bucketCols:[], sortCols:[], parameters:{}), parameters:{transient_lastDdlTime=1282111572}) +PREHOOK: query: select key, value, ds, hr from combine_3_srcpart_seq_rc where ds="2010-08-03" order by key, hr limit 30 +PREHOOK: type: QUERY +PREHOOK: Input: default@combine_3_srcpart_seq_rc@ds=2010-08-03/hr=00 +PREHOOK: Input: default@combine_3_srcpart_seq_rc@ds=2010-08-03/hr=001 +PREHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-08-17_23-06-13_344_3763300927102183566/-mr-10000 +POSTHOOK: query: select key, value, ds, hr from combine_3_srcpart_seq_rc where ds="2010-08-03" order by key, hr limit 30 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@combine_3_srcpart_seq_rc@ds=2010-08-03/hr=00 +POSTHOOK: Input: default@combine_3_srcpart_seq_rc@ds=2010-08-03/hr=001 +POSTHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-08-17_23-06-13_344_3763300927102183566/-mr-10000 +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=001).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=001).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=00).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=00).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 val_0 2010-08-03 00 +0 val_0 2010-08-03 00 +0 val_0 2010-08-03 00 +0 val_0 2010-08-03 001 +0 val_0 2010-08-03 001 +0 val_0 2010-08-03 001 +2 val_2 2010-08-03 00 +2 val_2 2010-08-03 001 +4 val_4 2010-08-03 00 +4 val_4 2010-08-03 001 +5 val_5 2010-08-03 00 +5 val_5 2010-08-03 00 +5 val_5 2010-08-03 00 +5 val_5 2010-08-03 001 +5 val_5 2010-08-03 001 +5 val_5 2010-08-03 001 +8 val_8 2010-08-03 00 +8 val_8 2010-08-03 001 +9 val_9 2010-08-03 00 +9 val_9 2010-08-03 001 +10 val_10 2010-08-03 00 +10 val_10 2010-08-03 001 +11 val_11 2010-08-03 00 +11 val_11 2010-08-03 001 +12 val_12 2010-08-03 00 +12 val_12 2010-08-03 00 +12 val_12 2010-08-03 001 +12 val_12 2010-08-03 001 +15 val_15 2010-08-03 00 +15 val_15 2010-08-03 00 +PREHOOK: query: drop table bucket3_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucket3_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=001).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=001).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=00).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=00).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: CREATE TABLE combine_3_srcpart_seq_rc_bucket(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS stored as sequencefile +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE combine_3_srcpart_seq_rc_bucket(key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS stored as sequencefile +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@combine_3_srcpart_seq_rc_bucket +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=001).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=001).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=00).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=00).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table combine_3_srcpart_seq_rc_bucket partition (ds='1') +select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@combine_3_srcpart_seq_rc_bucket@ds=1 +POSTHOOK: query: insert overwrite table combine_3_srcpart_seq_rc_bucket partition (ds='1') +select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@combine_3_srcpart_seq_rc_bucket@ds=1 +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=001).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=001).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=00).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=00).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc_bucket PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc_bucket PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: alter table combine_3_srcpart_seq_rc_bucket set fileformat rcfile +PREHOOK: type: ALTERTABLE_FILEFORMAT +PREHOOK: Input: default@combine_3_srcpart_seq_rc_bucket +PREHOOK: Output: default@combine_3_srcpart_seq_rc_bucket +POSTHOOK: query: alter table combine_3_srcpart_seq_rc_bucket set fileformat rcfile +POSTHOOK: type: ALTERTABLE_FILEFORMAT +POSTHOOK: Input: default@combine_3_srcpart_seq_rc_bucket +POSTHOOK: Output: default@combine_3_srcpart_seq_rc_bucket +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=001).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=001).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=00).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=00).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc_bucket PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc_bucket PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: insert overwrite table combine_3_srcpart_seq_rc_bucket partition (ds='11') +select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@combine_3_srcpart_seq_rc_bucket@ds=11 +POSTHOOK: query: insert overwrite table combine_3_srcpart_seq_rc_bucket partition (ds='11') +select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@combine_3_srcpart_seq_rc_bucket@ds=11 +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=001).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=001).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=00).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=00).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc_bucket PARTITION(ds=11).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc_bucket PARTITION(ds=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc_bucket PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc_bucket PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select key, ds from combine_3_srcpart_seq_rc_bucket tablesample (bucket 1 out of 2) s where ds = '1' or ds= '11' order by key, ds limit 30 +PREHOOK: type: QUERY +PREHOOK: Input: default@combine_3_srcpart_seq_rc_bucket@ds=1 +PREHOOK: Input: default@combine_3_srcpart_seq_rc_bucket@ds=11 +PREHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-08-17_23-06-32_345_4538839581357678799/-mr-10000 +POSTHOOK: query: select key, ds from combine_3_srcpart_seq_rc_bucket tablesample (bucket 1 out of 2) s where ds = '1' or ds= '11' order by key, ds limit 30 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@combine_3_srcpart_seq_rc_bucket@ds=1 +POSTHOOK: Input: default@combine_3_srcpart_seq_rc_bucket@ds=11 +POSTHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-08-17_23-06-32_345_4538839581357678799/-mr-10000 +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=001).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=001).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=00).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=00).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc_bucket PARTITION(ds=11).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc_bucket PARTITION(ds=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc_bucket PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc_bucket PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +0 1 +0 1 +0 1 +0 11 +0 11 +0 11 +2 1 +2 11 +4 1 +4 11 +8 1 +8 11 +10 1 +10 11 +12 1 +12 1 +12 11 +12 11 +18 1 +18 1 +18 11 +18 11 +20 1 +20 11 +24 1 +24 1 +24 11 +24 11 +26 1 +26 1 +PREHOOK: query: drop table combine_3_srcpart_seq_rc_bucket +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@combine_3_srcpart_seq_rc_bucket +PREHOOK: Output: default@combine_3_srcpart_seq_rc_bucket +POSTHOOK: query: drop table combine_3_srcpart_seq_rc_bucket +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@combine_3_srcpart_seq_rc_bucket +POSTHOOK: Output: default@combine_3_srcpart_seq_rc_bucket +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=001).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=001).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=00).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=00).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc_bucket PARTITION(ds=11).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc_bucket PARTITION(ds=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc_bucket PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc_bucket PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: drop table combine_3_srcpart_seq_rc +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@combine_3_srcpart_seq_rc +PREHOOK: Output: default@combine_3_srcpart_seq_rc +POSTHOOK: query: drop table combine_3_srcpart_seq_rc +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@combine_3_srcpart_seq_rc +POSTHOOK: Output: default@combine_3_srcpart_seq_rc +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=001).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=001).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=00).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc PARTITION(ds=2010-08-03,hr=00).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc_bucket PARTITION(ds=11).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc_bucket PARTITION(ds=11).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc_bucket PARTITION(ds=1).key EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: combine_3_srcpart_seq_rc_bucket PARTITION(ds=1).value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ]