Index: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java =================================================================== --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (revision 929232) +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (working copy) @@ -219,6 +219,8 @@ HIVEENFORCEBUCKETING("hive.enforce.bucketing", false), HIVEENFORCESORTING("hive.enforce.sorting", false), HIVEPARTITIONER("hive.mapred.partitioner", "org.apache.hadoop.hive.ql.io.DefaultHivePartitioner"), + + HIVESCRIPTOPERATORTRUST("hive.exec.script.trust", false), // Optimizer HIVEOPTCP("hive.optimize.cp", true), // column pruner @@ -226,6 +228,7 @@ HIVEOPTGROUPBY("hive.optimize.groupby", true), // optimize group by HIVEOPTBUCKETMAPJOIN("hive.optimize.bucketmapjoin", false), // optimize bucket map join HIVEOPTSORTMERGEBUCKETMAPJOIN("hive.optimize.bucketmapjoin.sortedmerge", false), // try to use sorted merge bucket map join + HIVEOPTREDUCEDEDUPLICATION("hive.optimize.reducededuplication", true), ; public final String varname; Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java (revision 929232) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java (working copy) @@ -64,6 +64,9 @@ } transformations.add(new UnionProcessor()); transformations.add(new JoinReorder()); + if(HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION)) { + transformations.add(new ReduceSinkDeDuplication()); + } } /** Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkDeDuplication.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkDeDuplication.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkDeDuplication.java (revision 0) @@ -0,0 +1,428 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Stack; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.ExtractOperator; +import org.apache.hadoop.hive.ql.exec.FilterOperator; +import org.apache.hadoop.hive.ql.exec.ForwardOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.RowSchema; +import org.apache.hadoop.hive.ql.exec.ScriptOperator; +import org.apache.hadoop.hive.ql.exec.SelectOperator; +import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; +import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; +import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.lib.Rule; +import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.parse.OpParseContext; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.RowResolver; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; +import org.apache.hadoop.hive.ql.plan.SelectDesc; + +/** + * If two reducer sink operators share the same partition/sort columns, we + * should merge them. This should happen after map join optimization because map + * join optimization will remove reduce sink operators. + */ +public class ReduceSinkDeDuplication implements Transform{ + + protected ParseContext pGraphContext; + + @Override + public ParseContext transform(ParseContext pctx) throws SemanticException { + pGraphContext = pctx; + + // generate pruned column list for all relevant operators + ReduceSinkDeduplicateProcCtx cppCtx = new ReduceSinkDeduplicateProcCtx(pGraphContext); + + Map opRules = new LinkedHashMap(); + opRules.put(new RuleRegExp("R1", "RS%.*RS%"), ReduceSinkDeduplicateProcFactory + .getReducerReducerProc()); + + // The dispatcher fires the processor corresponding to the closest matching + // rule and passes the context along + Dispatcher disp = new DefaultRuleDispatcher(ReduceSinkDeduplicateProcFactory + .getDefaultProc(), opRules, cppCtx); + GraphWalker ogw = new DefaultGraphWalker(disp); + + // Create a list of topop nodes + ArrayList topNodes = new ArrayList(); + topNodes.addAll(pGraphContext.getTopOps().values()); + ogw.startWalking(topNodes, null); + return pGraphContext; + } + + class ReduceSinkDeduplicateProcCtx implements NodeProcessorCtx{ + ParseContext pctx; + List rejectedRSList; + + public ReduceSinkDeduplicateProcCtx(ParseContext pctx) { + rejectedRSList = new ArrayList(); + this.pctx = pctx; + } + + public boolean contains (ReduceSinkOperator rsOp) { + return rejectedRSList.contains(rsOp); + } + + public void addRejectedReduceSinkOperator(ReduceSinkOperator rsOp) { + if (!rejectedRSList.contains(rsOp)) { + rejectedRSList.add(rsOp); + } + } + + public ParseContext getPctx() { + return pctx; + } + + public void setPctx(ParseContext pctx) { + this.pctx = pctx; + } + } + + + static class ReduceSinkDeduplicateProcFactory { + + + public static NodeProcessor getReducerReducerProc() { + return new ReducerReducerProc(); + } + + public static NodeProcessor getDefaultProc() { + return new DefaultProc(); + } + + /* + * do nothing. + */ + static class DefaultProc implements NodeProcessor { + @Override + public Object process(Node nd, Stack stack, + NodeProcessorCtx procCtx, Object... nodeOutputs) + throws SemanticException { + return null; + } + } + + static class ReducerReducerProc implements NodeProcessor { + @Override + public Object process(Node nd, Stack stack, + NodeProcessorCtx procCtx, Object... nodeOutputs) + throws SemanticException { + ReduceSinkDeduplicateProcCtx ctx = (ReduceSinkDeduplicateProcCtx) procCtx; + ReduceSinkOperator childReduceSink = (ReduceSinkOperator)nd; + + if(ctx.contains(childReduceSink)) { + return null; + } + + ParseContext pGraphContext = ctx.getPctx(); + HashMap childColumnMapping = getPartitionAndKeyColumnMapping(childReduceSink); + ReduceSinkOperator parentRS = null; + parentRS = findSingleParentReduceSink(childReduceSink, pGraphContext); + if (parentRS == null) { + ctx.addRejectedReduceSinkOperator(childReduceSink); + return null; + } + HashMap parentColumnMapping = getPartitionAndKeyColumnMapping(parentRS); + Operator stopBacktrackFlagOp = null; + if (parentRS.getParentOperators() == null + || parentRS.getParentOperators().size() == 0) { + stopBacktrackFlagOp = parentRS; + } else if (parentRS.getParentOperators().size() != 1) { + return null; + } else { + stopBacktrackFlagOp = parentRS.getParentOperators().get(0); + } + + boolean succeed = backTrackColumnNames(childColumnMapping, childReduceSink, stopBacktrackFlagOp, pGraphContext); + if (!succeed) { + return null; + } + succeed = backTrackColumnNames(parentColumnMapping, parentRS, stopBacktrackFlagOp, pGraphContext); + if (!succeed) { + return null; + } + + boolean same = compareReduceSink(childReduceSink, parentRS, childColumnMapping, parentColumnMapping); + if (!same) { + return null; + } + replaceReduceSinkWithSelectOperator(childReduceSink, pGraphContext); + return null; + } + + private void replaceReduceSinkWithSelectOperator( + ReduceSinkOperator childReduceSink, ParseContext pGraphContext) throws SemanticException { + List> parentOp = childReduceSink.getParentOperators(); + List> childOp = childReduceSink.getChildOperators(); + + Operator oldParent = childReduceSink; + + if (childOp != null && childOp.size() == 1 + && ((childOp.get(0)) instanceof ExtractOperator)) { + oldParent = childOp.get(0); + childOp = childOp.get(0).getChildOperators(); + } + + Operator input = parentOp.get(0); + input.getChildOperators().clear(); + + RowResolver inputRR = pGraphContext.getOpParseCtx().get(input).getRR(); + + ArrayList exprs = new ArrayList(); + ArrayList outputs = new ArrayList(); + List outputCols = childReduceSink.getConf().getOutputValueColumnNames(); + RowResolver outputRS = new RowResolver(); + + Map colExprMap = new HashMap(); + + for (int i = 0; i < outputCols.size(); i++) { + String internalName = outputCols.get(i); + String[] nm = inputRR.reverseLookup(internalName); + ColumnInfo valueInfo = inputRR.get(nm[0], nm[1]); + ExprNodeDesc colDesc = childReduceSink.getConf().getValueCols().get(i); + exprs.add(colDesc); + outputs.add(internalName); + outputRS.put(nm[0], nm[1], new ColumnInfo(internalName, valueInfo + .getType(), nm[0], valueInfo.getIsPartitionCol())); + colExprMap.put(internalName, colDesc); + } + + SelectDesc select = new SelectDesc(exprs, outputs, false); + + SelectOperator sel = (SelectOperator) putOpInsertMap( + OperatorFactory.getAndMakeChild(select, new RowSchema(inputRR + .getColumnInfos()), input), inputRR, pGraphContext); + + sel.setColumnExprMap(colExprMap); + + // Insert the select operator in between. + sel.setChildOperators(childOp); + for (Operator ch : childOp) { + ch.replaceParent(oldParent, sel); + } + + } + + private Operator putOpInsertMap( + Operator op, RowResolver rr, ParseContext pGraphContext) { + OpParseContext ctx = new OpParseContext(rr); + pGraphContext.getOpParseCtx().put(op, ctx); + return op; + } + + private boolean compareReduceSink(ReduceSinkOperator childReduceSink, + ReduceSinkOperator parentRS, + HashMap childColumnMapping, + HashMap parentColumnMapping) { + + java.util.ArrayList childPartitionCols = childReduceSink.getConf().getPartitionCols(); + java.util.ArrayList parentPartitionCols = parentRS.getConf().getPartitionCols(); + + boolean ret = compareExprNodes(childColumnMapping, parentColumnMapping, + childPartitionCols, parentPartitionCols); + if (!ret) { + return false; + } + + java.util.ArrayList childReduceKeyCols = childReduceSink.getConf().getKeyCols(); + java.util.ArrayList parentReduceKeyCols = parentRS.getConf().getKeyCols(); + ret = compareExprNodes(childColumnMapping, parentColumnMapping, + childReduceKeyCols, parentReduceKeyCols); + if (!ret) { + return false; + } + + String childRSOrder = childReduceSink.getConf().getOrder(); + String parentRSOrder = parentRS.getConf().getOrder(); + if (childRSOrder != null && !(childRSOrder.trim().equals(""))) { + if (parentRSOrder == null + || !childRSOrder.trim().equals(parentRSOrder.trim())) { + return false; + } + } else { + if(parentRSOrder == null || parentRSOrder.trim().equals("")) { + parentRS.getConf().setOrder(childRSOrder); + } + } + + return true; + } + + private boolean compareExprNodes(HashMap childColumnMapping, + HashMap parentColumnMapping, + java.util.ArrayList childColExprs, + java.util.ArrayList parentColExprs) { + + boolean childEmpty = childColExprs == null || childColExprs.size() == 0; + boolean parentEmpty = parentColExprs == null || parentColExprs.size() == 0; + + if (childEmpty) { //both empty + return true; + } + + //child not empty here + if (parentEmpty) { // child not empty, but parent empty + return false; + } + + if (childColExprs.size() != parentColExprs.size()) { + return false; + } + int i = 0; + while (i < childColExprs.size()) { + ExprNodeDesc childExpr = childColExprs.get(i); + ExprNodeDesc parentExpr = parentColExprs.get(i); + + if ((childExpr instanceof ExprNodeColumnDesc) + && (parentExpr instanceof ExprNodeColumnDesc)) { + String childCol = childColumnMapping + .get(((ExprNodeColumnDesc) childExpr).getColumn()); + String parentCol = parentColumnMapping + .get(((ExprNodeColumnDesc) childExpr).getColumn()); + + if (!childCol.equals(parentCol)) { + return false; + } + } else { + return false; + } + i++; + } + return true; + } + + /* + * back track column names to find their corresponding original column + * names. Only allow simple operators like 'select column' or filter. + */ + private boolean backTrackColumnNames( + HashMap columnMapping, + ReduceSinkOperator reduceSink, + Operator stopBacktrackFlagOp, ParseContext pGraphContext) { + Operator startOperator = reduceSink; + while (startOperator != null && startOperator != stopBacktrackFlagOp) { + startOperator = startOperator.getParentOperators().get(0); + Map colExprMap = startOperator.getColumnExprMap(); + if(colExprMap == null || colExprMap.size()==0) { + continue; + } + Iterator keyIter = columnMapping.keySet().iterator(); + while (keyIter.hasNext()) { + String key = keyIter.next(); + String oldCol = columnMapping.get(key); + ExprNodeDesc exprNode = colExprMap.get(oldCol); + if(exprNode instanceof ExprNodeColumnDesc) { + String col = ((ExprNodeColumnDesc)exprNode).getColumn(); + columnMapping.put(key, col); + } else { + return false; + } + } + } + + return true; + } + + private HashMap getPartitionAndKeyColumnMapping(ReduceSinkOperator reduceSink) { + HashMap columnMapping = new HashMap (); + ReduceSinkDesc reduceSinkDesc = reduceSink.getConf(); + java.util.ArrayList partitionCols = reduceSinkDesc.getPartitionCols(); + java.util.ArrayList reduceKeyCols = reduceSinkDesc.getKeyCols(); + if(partitionCols != null) { + for (ExprNodeDesc desc : partitionCols) { + List cols = desc.getCols(); + for(String col : cols) { + columnMapping.put(col, col); + } + } + } + if(reduceKeyCols != null) { + for (ExprNodeDesc desc : reduceKeyCols) { + List cols = desc.getCols(); + for(String col : cols) { + columnMapping.put(col, col); + } + } + } + return columnMapping; + } + + private ReduceSinkOperator findSingleParentReduceSink(ReduceSinkOperator childReduceSink, ParseContext pGraphContext) { + Operator start = childReduceSink; + while(start != null) { + if (start.getParentOperators() == null + || start.getParentOperators().size() != 1) { + // this potentially is a join operator + return null; + } + + boolean allowed = false; + if ((start instanceof SelectOperator) + || (start instanceof FilterOperator) + || (start instanceof ExtractOperator) + || (start instanceof ForwardOperator) + || (start instanceof ScriptOperator) + || (start instanceof ReduceSinkOperator)) { + allowed = true; + } + + if (!allowed) { + return null; + } + + if ((start instanceof ScriptOperator) + && !HiveConf.getBoolVar(pGraphContext.getConf(), + HiveConf.ConfVars.HIVESCRIPTOPERATORTRUST)) { + return null; + } + + start = start.getParentOperators().get(0); + if(start instanceof ReduceSinkOperator) { + return (ReduceSinkOperator)start; + } + } + return null; + } + } + + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java (revision 929232) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ReduceSinkDesc.java (working copy) @@ -177,5 +177,11 @@ return keySerializeInfo.getProperties().getProperty( org.apache.hadoop.hive.serde.Constants.SERIALIZATION_SORT_ORDER); } + + public void setOrder(String orderStr) { + keySerializeInfo.getProperties().setProperty( + org.apache.hadoop.hive.serde.Constants.SERIALIZATION_SORT_ORDER, + orderStr); + } } Index: ql/src/test/queries/clientpositive/reduce_deduplicate.q =================================================================== --- ql/src/test/queries/clientpositive/reduce_deduplicate.q (revision 0) +++ ql/src/test/queries/clientpositive/reduce_deduplicate.q (revision 0) @@ -0,0 +1,42 @@ +set hive.enforce.bucketing = true; +set hive.exec.reducers.max = 1; +set hive.exec.script.trust = true; + +drop table bucket5_1; + +CREATE TABLE bucket5_1(key string, value string) CLUSTERED BY (key) INTO 2 BUCKETS; +explain extended +insert overwrite table bucket5_1 +select * from src cluster by key; + +insert overwrite table bucket5_1 +select * from src cluster by key; + +select * from bucket5_1 order by key; + +drop table complex_tbl_1; +create table complex_tbl_1(aid string, bid string, t int, ctime string, etime bigint, l string, et string) partitioned by (ds string); + +drop table complex_tbl_2; +create table complex_tbl_2(aet string, aes string) partitioned by (ds string); + +explain extended +insert overwrite table complex_tbl_1 partition (ds='2010-03-29') +select s2.* from +( + select TRANSFORM (aid,bid,t,ctime,etime,l,et) + USING 'cat' + AS (aid string, bid string, t int, ctime string, etime bigint, l string, et string) + from + ( + select transform(aet,aes) + using 'cat' + as (aid string, bid string, t int, ctime string, etime bigint, l string, et string) + from complex_tbl_2 where ds ='2010-03-29' cluster by bid +)s +)s2; + +drop table complex_tbl_2; +drop table complex_tbl_1; +drop table bucket5_1; + Index: ql/src/test/results/clientpositive/reduce_deduplicate.q.out =================================================================== --- ql/src/test/results/clientpositive/reduce_deduplicate.q.out (revision 0) +++ ql/src/test/results/clientpositive/reduce_deduplicate.q.out (revision 0) @@ -0,0 +1,902 @@ +PREHOOK: query: drop table bucket5_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucket5_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE bucket5_1(key string, value string) CLUSTERED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE bucket5_1(key string, value string) CLUSTERED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@bucket5_1 +PREHOOK: query: explain extended +insert overwrite table bucket5_1 +select * from src cluster by key +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +insert overwrite table bucket5_1 +select * from src cluster by key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF src)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB bucket5_1)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_CLUSTERBY (TOK_TABLE_OR_COL key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Needs Tagging: false + Path -> Alias: + file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/test/data/warehouse/src [src] + Path -> Partition: + file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/test/data/warehouse/src + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/test/data/warehouse/src + name src + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1269926333 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/test/data/warehouse/src + name src + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1269926333 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: src + name: src + Reduce Operator Tree: + Extract + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + directory: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-18-54_583_5210238441426809761/10000 + NumFilesPerFileSink: 2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/test/data/warehouse/bucket5_1 + name bucket5_1 + serialization.ddl struct bucket5_1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1269926334 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucket5_1 + TotalFiles: 2 + MultiFileSpray: true + + Stage: Stage-0 + Move Operator + tables: + replace: true + source: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-18-54_583_5210238441426809761/10000 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/test/data/warehouse/bucket5_1 + name bucket5_1 + serialization.ddl struct bucket5_1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1269926334 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucket5_1 + tmp directory: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-18-54_583_5210238441426809761/10001 + + +PREHOOK: query: insert overwrite table bucket5_1 +select * from src cluster by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@bucket5_1 +POSTHOOK: query: insert overwrite table bucket5_1 +select * from src cluster by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@bucket5_1 +PREHOOK: query: select * from bucket5_1 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket5_1 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-19-01_821_2985042884082895223/10000 +POSTHOOK: query: select * from bucket5_1 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket5_1 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-19-01_821_2985042884082895223/10000 +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +105 val_105 +11 val_11 +111 val_111 +113 val_113 +113 val_113 +114 val_114 +116 val_116 +118 val_118 +118 val_118 +119 val_119 +119 val_119 +119 val_119 +12 val_12 +12 val_12 +120 val_120 +120 val_120 +125 val_125 +125 val_125 +126 val_126 +128 val_128 +128 val_128 +128 val_128 +129 val_129 +129 val_129 +131 val_131 +133 val_133 +134 val_134 +134 val_134 +136 val_136 +137 val_137 +137 val_137 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +143 val_143 +145 val_145 +146 val_146 +146 val_146 +149 val_149 +149 val_149 +15 val_15 +15 val_15 +150 val_150 +152 val_152 +152 val_152 +153 val_153 +155 val_155 +156 val_156 +157 val_157 +158 val_158 +160 val_160 +162 val_162 +163 val_163 +164 val_164 +164 val_164 +165 val_165 +165 val_165 +166 val_166 +167 val_167 +167 val_167 +167 val_167 +168 val_168 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +17 val_17 +170 val_170 +172 val_172 +172 val_172 +174 val_174 +174 val_174 +175 val_175 +175 val_175 +176 val_176 +176 val_176 +177 val_177 +178 val_178 +179 val_179 +179 val_179 +18 val_18 +18 val_18 +180 val_180 +181 val_181 +183 val_183 +186 val_186 +187 val_187 +187 val_187 +187 val_187 +189 val_189 +19 val_19 +190 val_190 +191 val_191 +191 val_191 +192 val_192 +193 val_193 +193 val_193 +193 val_193 +194 val_194 +195 val_195 +195 val_195 +196 val_196 +197 val_197 +197 val_197 +199 val_199 +199 val_199 +199 val_199 +2 val_2 +20 val_20 +200 val_200 +200 val_200 +201 val_201 +202 val_202 +203 val_203 +203 val_203 +205 val_205 +205 val_205 +207 val_207 +207 val_207 +208 val_208 +208 val_208 +208 val_208 +209 val_209 +209 val_209 +213 val_213 +213 val_213 +214 val_214 +216 val_216 +216 val_216 +217 val_217 +217 val_217 +218 val_218 +219 val_219 +219 val_219 +221 val_221 +221 val_221 +222 val_222 +223 val_223 +223 val_223 +224 val_224 +224 val_224 +226 val_226 +228 val_228 +229 val_229 +229 val_229 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +233 val_233 +233 val_233 +235 val_235 +237 val_237 +237 val_237 +238 val_238 +238 val_238 +239 val_239 +239 val_239 +24 val_24 +24 val_24 +241 val_241 +242 val_242 +242 val_242 +244 val_244 +247 val_247 +248 val_248 +249 val_249 +252 val_252 +255 val_255 +255 val_255 +256 val_256 +256 val_256 +257 val_257 +258 val_258 +26 val_26 +26 val_26 +260 val_260 +262 val_262 +263 val_263 +265 val_265 +265 val_265 +266 val_266 +27 val_27 +272 val_272 +272 val_272 +273 val_273 +273 val_273 +273 val_273 +274 val_274 +275 val_275 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +278 val_278 +278 val_278 +28 val_28 +280 val_280 +280 val_280 +281 val_281 +281 val_281 +282 val_282 +282 val_282 +283 val_283 +284 val_284 +285 val_285 +286 val_286 +287 val_287 +288 val_288 +288 val_288 +289 val_289 +291 val_291 +292 val_292 +296 val_296 +298 val_298 +298 val_298 +298 val_298 +30 val_30 +302 val_302 +305 val_305 +306 val_306 +307 val_307 +307 val_307 +308 val_308 +309 val_309 +309 val_309 +310 val_310 +311 val_311 +311 val_311 +311 val_311 +315 val_315 +316 val_316 +316 val_316 +316 val_316 +317 val_317 +317 val_317 +318 val_318 +318 val_318 +318 val_318 +321 val_321 +321 val_321 +322 val_322 +322 val_322 +323 val_323 +325 val_325 +325 val_325 +327 val_327 +327 val_327 +327 val_327 +33 val_33 +331 val_331 +331 val_331 +332 val_332 +333 val_333 +333 val_333 +335 val_335 +336 val_336 +338 val_338 +339 val_339 +34 val_34 +341 val_341 +342 val_342 +342 val_342 +344 val_344 +344 val_344 +345 val_345 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +35 val_35 +35 val_35 +35 val_35 +351 val_351 +353 val_353 +353 val_353 +356 val_356 +360 val_360 +362 val_362 +364 val_364 +365 val_365 +366 val_366 +367 val_367 +367 val_367 +368 val_368 +369 val_369 +369 val_369 +369 val_369 +37 val_37 +37 val_37 +373 val_373 +374 val_374 +375 val_375 +377 val_377 +378 val_378 +379 val_379 +382 val_382 +382 val_382 +384 val_384 +384 val_384 +384 val_384 +386 val_386 +389 val_389 +392 val_392 +393 val_393 +394 val_394 +395 val_395 +395 val_395 +396 val_396 +396 val_396 +396 val_396 +397 val_397 +397 val_397 +399 val_399 +399 val_399 +4 val_4 +400 val_400 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +402 val_402 +403 val_403 +403 val_403 +403 val_403 +404 val_404 +404 val_404 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +407 val_407 +409 val_409 +409 val_409 +409 val_409 +41 val_41 +411 val_411 +413 val_413 +413 val_413 +414 val_414 +414 val_414 +417 val_417 +417 val_417 +417 val_417 +418 val_418 +419 val_419 +42 val_42 +42 val_42 +421 val_421 +424 val_424 +424 val_424 +427 val_427 +429 val_429 +429 val_429 +43 val_43 +430 val_430 +430 val_430 +430 val_430 +431 val_431 +431 val_431 +431 val_431 +432 val_432 +435 val_435 +436 val_436 +437 val_437 +438 val_438 +438 val_438 +438 val_438 +439 val_439 +439 val_439 +44 val_44 +443 val_443 +444 val_444 +446 val_446 +448 val_448 +449 val_449 +452 val_452 +453 val_453 +454 val_454 +454 val_454 +454 val_454 +455 val_455 +457 val_457 +458 val_458 +458 val_458 +459 val_459 +459 val_459 +460 val_460 +462 val_462 +462 val_462 +463 val_463 +463 val_463 +466 val_466 +466 val_466 +466 val_466 +467 val_467 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +47 val_47 +470 val_470 +472 val_472 +475 val_475 +477 val_477 +478 val_478 +478 val_478 +479 val_479 +480 val_480 +480 val_480 +480 val_480 +481 val_481 +482 val_482 +483 val_483 +484 val_484 +485 val_485 +487 val_487 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +490 val_490 +491 val_491 +492 val_492 +492 val_492 +493 val_493 +494 val_494 +495 val_495 +496 val_496 +497 val_497 +498 val_498 +498 val_498 +498 val_498 +5 val_5 +5 val_5 +5 val_5 +51 val_51 +51 val_51 +53 val_53 +54 val_54 +57 val_57 +58 val_58 +58 val_58 +64 val_64 +65 val_65 +66 val_66 +67 val_67 +67 val_67 +69 val_69 +70 val_70 +70 val_70 +70 val_70 +72 val_72 +72 val_72 +74 val_74 +76 val_76 +76 val_76 +77 val_77 +78 val_78 +8 val_8 +80 val_80 +82 val_82 +83 val_83 +83 val_83 +84 val_84 +84 val_84 +85 val_85 +86 val_86 +87 val_87 +9 val_9 +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +PREHOOK: query: drop table complex_tbl_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table complex_tbl_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table complex_tbl_1(aid string, bid string, t int, ctime string, etime bigint, l string, et string) partitioned by (ds string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table complex_tbl_1(aid string, bid string, t int, ctime string, etime bigint, l string, et string) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@complex_tbl_1 +PREHOOK: query: drop table complex_tbl_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table complex_tbl_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table complex_tbl_2(aet string, aes string) partitioned by (ds string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table complex_tbl_2(aet string, aes string) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@complex_tbl_2 +PREHOOK: query: explain extended +insert overwrite table complex_tbl_1 partition (ds='2010-03-29') +select s2.* from +( + select TRANSFORM (aid,bid,t,ctime,etime,l,et) + USING 'cat' + AS (aid string, bid string, t int, ctime string, etime bigint, l string, et string) + from + ( + select transform(aet,aes) + using 'cat' + as (aid string, bid string, t int, ctime string, etime bigint, l string, et string) + from complex_tbl_2 where ds ='2010-03-29' cluster by bid +)s +)s2 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +insert overwrite table complex_tbl_1 partition (ds='2010-03-29') +select s2.* from +( + select TRANSFORM (aid,bid,t,ctime,etime,l,et) + USING 'cat' + AS (aid string, bid string, t int, ctime string, etime bigint, l string, et string) + from + ( + select transform(aet,aes) + using 'cat' + as (aid string, bid string, t int, ctime string, etime bigint, l string, et string) + from complex_tbl_2 where ds ='2010-03-29' cluster by bid +)s +)s2 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF complex_tbl_2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (TOK_TABLE_OR_COL aet) (TOK_TABLE_OR_COL aes)) TOK_SERDE TOK_RECORDWRITER 'cat' TOK_SERDE TOK_RECORDREADER (TOK_TABCOLLIST (TOK_TABCOL aid TOK_STRING) (TOK_TABCOL bid TOK_STRING) (TOK_TABCOL t TOK_INT) (TOK_TABCOL ctime TOK_STRING) (TOK_TABCOL etime TOK_BIGINT) (TOK_TABCOL l TOK_STRING) (TOK_TABCOL et TOK_STRING))))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '2010-03-29')) (TOK_CLUSTERBY (TOK_TABLE_OR_COL bid)))) s)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (TOK_TABLE_OR_COL aid) (TOK_TABLE_OR_COL bid) (TOK_TABLE_OR_COL t) (TOK_TABLE_OR_COL ctime) (TOK_TABLE_OR_COL etime) (TOK_TABLE_OR_COL l) (TOK_TABLE_OR_COL et)) TOK_SERDE TOK_RECORDWRITER 'cat' TOK_SERDE TOK_RECORDREADER (TOK_TABCOLLIST (TOK_TABCOL aid TOK_STRING) (TOK_TABCOL bid TOK_STRING) (TOK_TABCOL t TOK_INT) (TOK_TABCOL ctime TOK_STRING) (TOK_TABCOL etime TOK_BIGINT) (TOK_TABCOL l TOK_STRING) (TOK_TABCOL et TOK_STRING))))))) s2)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB complex_tbl_1 (TOK_PARTSPEC (TOK_PARTVAL ds '2010-03-29')))) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF s2))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + s2:s:complex_tbl_2 + TableScan + alias: complex_tbl_2 + Filter Operator + isSamplingPred: false + predicate: + expr: (ds = '2010-03-29') + type: boolean + Filter Operator + isSamplingPred: false + predicate: + expr: (ds = '2010-03-29') + type: boolean + Select Operator + expressions: + expr: aet + type: string + expr: aes + type: string + outputColumnNames: _col0, _col1 + Transform Operator + command: cat + output info: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types string,string,int,string,bigint,string,string + field.delim 9 + serialization.format 9 + Reduce Output Operator + key expressions: + expr: _col1 + type: string + sort order: + + Map-reduce partition columns: + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: int + expr: _col3 + type: string + expr: _col4 + type: bigint + expr: _col5 + type: string + expr: _col6 + type: string + Needs Tagging: false + Reduce Operator Tree: + Extract + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: int + expr: _col3 + type: string + expr: _col4 + type: bigint + expr: _col5 + type: string + expr: _col6 + type: string + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Transform Operator + command: cat + output info: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types string,string,int,string,bigint,string,string + field.delim 9 + serialization.format 9 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: int + expr: _col3 + type: string + expr: _col4 + type: bigint + expr: _col5 + type: string + expr: _col6 + type: string + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + File Output Operator + compressed: false + GlobalTableId: 1 + directory: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-19-07_812_2407101678291620366/10000 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns aid,bid,t,ctime,etime,l,et + columns.types string:string:int:string:bigint:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/test/data/warehouse/complex_tbl_1 + name complex_tbl_1 + partition_columns ds + serialization.ddl struct complex_tbl_1 { string aid, string bid, i32 t, string ctime, i64 etime, string l, string et} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1269926347 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: complex_tbl_1 + TotalFiles: 1 + MultiFileSpray: false + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 2010-03-29 + replace: true + source: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-19-07_812_2407101678291620366/10000 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns aid,bid,t,ctime,etime,l,et + columns.types string:string:int:string:bigint:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/test/data/warehouse/complex_tbl_1 + name complex_tbl_1 + partition_columns ds + serialization.ddl struct complex_tbl_1 { string aid, string bid, i32 t, string ctime, i64 etime, string l, string et} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1269926347 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: complex_tbl_1 + tmp directory: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-19-07_812_2407101678291620366/10001 + + +PREHOOK: query: drop table complex_tbl_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table complex_tbl_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@complex_tbl_2 +PREHOOK: query: drop table complex_tbl_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table complex_tbl_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@complex_tbl_1 +PREHOOK: query: drop table bucket5_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucket5_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@bucket5_1