Index: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java =================================================================== --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (revision 927179) +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (working copy) @@ -219,6 +219,8 @@ HIVEENFORCEBUCKETING("hive.enforce.bucketing", false), HIVEENFORCESORTING("hive.enforce.sorting", false), HIVEPARTITIONER("hive.mapred.partitioner", "org.apache.hadoop.hive.ql.io.DefaultHivePartitioner"), + + HIVESCRIPTOPERATORTRUST("hive.exec.script.trust", false), // Optimizer HIVEOPTCP("hive.optimize.cp", true), // column pruner @@ -226,6 +228,7 @@ HIVEOPTGROUPBY("hive.optimize.groupby", true), // optimize group by HIVEOPTBUCKETMAPJOIN("hive.optimize.bucketmapjoin", false), // optimize bucket map join HIVEOPTSORTMERGEBUCKETMAPJOIN("hive.optimize.bucketmapjoin.sortedmerge", false), // try to use sorted merge bucket map join + HIVEOPTREDUCEDEDUPLICATION("hive.optimize.reducededuplication", true), ; public final String varname; Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java (revision 927179) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java (working copy) @@ -64,6 +64,9 @@ } transformations.add(new UnionProcessor()); transformations.add(new JoinReorder()); + if(HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTREDUCEDEDUPLICATION)) { + transformations.add(new ReduceSinkDeDuplication()); + } } /** Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkDeDuplication.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkDeDuplication.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/ReduceSinkDeDuplication.java (revision 0) @@ -0,0 +1,433 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Stack; + +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.ColumnInfo; +import org.apache.hadoop.hive.ql.exec.CommonJoinOperator; +import org.apache.hadoop.hive.ql.exec.ExtractOperator; +import org.apache.hadoop.hive.ql.exec.GroupByOperator; +import org.apache.hadoop.hive.ql.exec.LateralViewJoinOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; +import org.apache.hadoop.hive.ql.exec.ReduceSinkOperator; +import org.apache.hadoop.hive.ql.exec.RowSchema; +import org.apache.hadoop.hive.ql.exec.ScriptOperator; +import org.apache.hadoop.hive.ql.exec.SelectOperator; +import org.apache.hadoop.hive.ql.exec.UDTFOperator; +import org.apache.hadoop.hive.ql.exec.UnionOperator; +import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; +import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; +import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.lib.Rule; +import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.parse.OpParseContext; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.RowResolver; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; +import org.apache.hadoop.hive.ql.plan.SelectDesc; + +/** + * If two reducer sink operators share the same partition/sort columns, we + * should merge them. This should happen after map join optimization because map + * join optimization will remove reduce sink operators. + */ +public class ReduceSinkDeDuplication implements Transform{ + + protected ParseContext pGraphContext; + + @Override + public ParseContext transform(ParseContext pctx) throws SemanticException { + pGraphContext = pctx; + + // generate pruned column list for all relevant operators + ReduceSinkDeduplicateProcCtx cppCtx = new ReduceSinkDeduplicateProcCtx(pGraphContext); + + Map opRules = new LinkedHashMap(); + opRules.put(new RuleRegExp("R1", "RS%.*RS%"), ReduceSinkDeduplicateProcFactory + .getReducerReducerProc()); + opRules.put(new RuleRegExp("R2", "GBY%.*RS%"), ReduceSinkDeduplicateProcFactory + .getRejectedReduceSinkProc()); + + + // The dispatcher fires the processor corresponding to the closest matching + // rule and passes the context along + Dispatcher disp = new DefaultRuleDispatcher(ReduceSinkDeduplicateProcFactory + .getDefaultProc(), opRules, cppCtx); + GraphWalker ogw = new DefaultGraphWalker(disp); + + // Create a list of topop nodes + ArrayList topNodes = new ArrayList(); + topNodes.addAll(pGraphContext.getTopOps().values()); + ogw.startWalking(topNodes, null); + return pGraphContext; + } + + class ReduceSinkDeduplicateProcCtx implements NodeProcessorCtx{ + ParseContext pctx; + List rejectedRSList; + + public ReduceSinkDeduplicateProcCtx(ParseContext pctx) { + rejectedRSList = new ArrayList(); + this.pctx = pctx; + } + + public boolean contains (ReduceSinkOperator rsOp) { + return rejectedRSList.contains(rsOp); + } + + public void addRejectedReduceSinkOperator(ReduceSinkOperator rsOp) { + if (!rejectedRSList.contains(rsOp)) { + rejectedRSList.add(rsOp); + } + } + + public ParseContext getPctx() { + return pctx; + } + + public void setPctx(ParseContext pctx) { + this.pctx = pctx; + } + } + + + static class ReduceSinkDeduplicateProcFactory { + + + public static NodeProcessor getReducerReducerProc() { + return new ReducerReducerProc(); + } + + public static NodeProcessor getDefaultProc() { + return new DefaultProc(); + } + + public static NodeProcessor getRejectedReduceSinkProc() { + return new RejectedReduceSinkProc(); + } + + /* + * do nothing. + */ + static class DefaultProc implements NodeProcessor { + @Override + public Object process(Node nd, Stack stack, + NodeProcessorCtx procCtx, Object... nodeOutputs) + throws SemanticException { + return null; + } + } + + /* + * add the reduce sink operator to the rejected list (it should not be + * merged with its parent reduce sink operator). + */ + static class RejectedReduceSinkProc implements NodeProcessor { + + @Override + public Object process(Node nd, Stack stack, + NodeProcessorCtx procCtx, Object... nodeOutputs) + throws SemanticException { + ReduceSinkDeduplicateProcCtx ctx = (ReduceSinkDeduplicateProcCtx) procCtx; + ctx.addRejectedReduceSinkOperator((ReduceSinkOperator)nd); + return null; + } + } + + static class ReducerReducerProc implements NodeProcessor { + @Override + public Object process(Node nd, Stack stack, + NodeProcessorCtx procCtx, Object... nodeOutputs) + throws SemanticException { + ReduceSinkDeduplicateProcCtx ctx = (ReduceSinkDeduplicateProcCtx) procCtx; + ReduceSinkOperator childReduceSink = (ReduceSinkOperator)nd; + + if(ctx.contains(childReduceSink)) { + return null; + } + + ParseContext pGraphContext = ctx.getPctx(); + HashMap childColumnMapping = getPartitionAndKeyColumnMapping(childReduceSink); + ReduceSinkOperator parentRS = null; + parentRS = findSingleParentReduceSink(childReduceSink, pGraphContext); + if (parentRS == null) { + return null; + } + HashMap parentColumnMapping = getPartitionAndKeyColumnMapping(parentRS); + Operator stopBacktrackFlagOp = null; + if (parentRS.getParentOperators() == null + || parentRS.getParentOperators().size() == 0) { + stopBacktrackFlagOp = parentRS; + } else if (parentRS.getParentOperators().size() != 1) { + return null; + } else { + stopBacktrackFlagOp = parentRS.getParentOperators().get(0); + } + + boolean succeed = backTrackColumnNames(childColumnMapping, childReduceSink, stopBacktrackFlagOp, pGraphContext); + if (!succeed) { + return null; + } + succeed = backTrackColumnNames(parentColumnMapping, parentRS, stopBacktrackFlagOp, pGraphContext); + if (!succeed) { + return null; + } + + boolean same = compareReduceSink(childReduceSink, parentRS, childColumnMapping, parentColumnMapping); + if (!same) { + return null; + } + replaceReduceSinkWithSelectOperator(childReduceSink, pGraphContext); + return null; + } + + private void replaceReduceSinkWithSelectOperator( + ReduceSinkOperator childReduceSink, ParseContext pGraphContext) throws SemanticException { + List> parentOp = childReduceSink.getParentOperators(); + List> childOp = childReduceSink.getChildOperators(); + + Operator oldParent = childReduceSink; + + if (childOp != null && childOp.size() == 1 + && ((childOp.get(0)) instanceof ExtractOperator)) { + oldParent = childOp.get(0); + childOp = childOp.get(0).getChildOperators(); + } + + Operator input = parentOp.get(0); + input.getChildOperators().clear(); + + RowResolver inputRR = pGraphContext.getOpParseCtx().get(input).getRR(); + + ArrayList exprs = new ArrayList(); + ArrayList outputs = new ArrayList(); + List outputCols = childReduceSink.getConf().getOutputValueColumnNames(); + RowResolver outputRS = new RowResolver(); + + Map colExprMap = new HashMap(); + + for (int i = 0; i < outputCols.size(); i++) { + String internalName = outputCols.get(i); + String[] nm = inputRR.reverseLookup(internalName); + ColumnInfo valueInfo = inputRR.get(nm[0], nm[1]); + ExprNodeDesc colDesc = childReduceSink.getConf().getValueCols().get(i); + exprs.add(colDesc); + outputs.add(internalName); + outputRS.put(nm[0], nm[1], new ColumnInfo(internalName, valueInfo + .getType(), nm[0], valueInfo.getIsPartitionCol())); + colExprMap.put(internalName, colDesc); + } + + SelectDesc select = new SelectDesc(exprs, outputs, false); + + SelectOperator sel = (SelectOperator) putOpInsertMap( + OperatorFactory.getAndMakeChild(select, new RowSchema(inputRR + .getColumnInfos()), input), inputRR, pGraphContext); + + sel.setColumnExprMap(colExprMap); + + // Insert the select operator in between. + sel.setChildOperators(childOp); + for (Operator ch : childOp) { + ch.replaceParent(oldParent, sel); + } + + } + + private Operator putOpInsertMap( + Operator op, RowResolver rr, ParseContext pGraphContext) { + OpParseContext ctx = new OpParseContext(rr); + pGraphContext.getOpParseCtx().put(op, ctx); + return op; + } + + private boolean compareReduceSink(ReduceSinkOperator childReduceSink, + ReduceSinkOperator parentRS, + HashMap childColumnMapping, + HashMap parentColumnMapping) { + java.util.ArrayList childPartitionCols = childReduceSink.getConf().getPartitionCols(); + java.util.ArrayList parentPartitionCols = parentRS.getConf().getPartitionCols(); + + boolean ret = compareExprNodes(childColumnMapping, parentColumnMapping, + childPartitionCols, parentPartitionCols); + if (!ret) { + return false; + } + + java.util.ArrayList childReduceKeyCols = childReduceSink.getConf().getKeyCols(); + java.util.ArrayList parentReduceKeyCols = parentRS.getConf().getKeyCols(); + ret = compareExprNodes(childColumnMapping, parentColumnMapping, + childReduceKeyCols, parentReduceKeyCols); + if (!ret) { + return false; + } + + return true; + } + + private boolean compareExprNodes(HashMap childColumnMapping, + HashMap parentColumnMapping, + java.util.ArrayList childColExprs, + java.util.ArrayList parentColExprs) { + + boolean childEmpty = childColExprs == null || childColExprs.size() == 0; + boolean parentEmpty = parentColExprs == null || parentColExprs.size() == 0; + + if (childEmpty) { //both empty + return true; + } + + //child not empty here + if (parentEmpty) { // child not empty, but parent empty + return false; + } + + if (childColExprs.size() != parentColExprs.size()) { + return false; + } + int i = 0; + while (i < childColExprs.size()) { + ExprNodeDesc childExpr = childColExprs.get(i); + ExprNodeDesc parentExpr = parentColExprs.get(i); + + if ((childExpr instanceof ExprNodeColumnDesc) + && (parentExpr instanceof ExprNodeColumnDesc)) { + String childCol = childColumnMapping + .get(((ExprNodeColumnDesc) childExpr).getColumn()); + String parentCol = parentColumnMapping + .get(((ExprNodeColumnDesc) childExpr).getColumn()); + + if (!childCol.equals(parentCol)) { + return false; + } + } else { + return false; + } + i++; + } + return true; + } + + /* + * back track column names to find their corresponding original column + * names. Only allow simple operators like 'select column' or filter. + */ + private boolean backTrackColumnNames( + HashMap columnMapping, + ReduceSinkOperator reduceSink, + Operator stopBacktrackFlagOp, ParseContext pGraphContext) { + Operator startOperator = reduceSink; + while (startOperator != null && startOperator != stopBacktrackFlagOp) { + startOperator = startOperator.getParentOperators().get(0); + Map colExprMap = startOperator.getColumnExprMap(); + if(colExprMap == null || colExprMap.size()==0) { + continue; + } + Iterator keyIter = columnMapping.keySet().iterator(); + while (keyIter.hasNext()) { + String key = keyIter.next(); + String oldCol = columnMapping.get(key); + ExprNodeDesc exprNode = colExprMap.get(oldCol); + if(exprNode instanceof ExprNodeColumnDesc) { + String col = ((ExprNodeColumnDesc)exprNode).getColumn(); + columnMapping.put(key, col); + } else { + return false; + } + } + } + + return true; + } + + private HashMap getPartitionAndKeyColumnMapping(ReduceSinkOperator reduceSink) { + HashMap columnMapping = new HashMap (); + ReduceSinkDesc reduceSinkDesc = reduceSink.getConf(); + java.util.ArrayList partitionCols = reduceSinkDesc.getPartitionCols(); + java.util.ArrayList reduceKeyCols = reduceSinkDesc.getKeyCols(); + if(partitionCols != null) { + for (ExprNodeDesc desc : partitionCols) { + List cols = desc.getCols(); + for(String col : cols) { + columnMapping.put(col, col); + } + } + } + if(reduceKeyCols != null) { + for (ExprNodeDesc desc : reduceKeyCols) { + List cols = desc.getCols(); + for(String col : cols) { + columnMapping.put(col, col); + } + } + } + return columnMapping; + } + + private ReduceSinkOperator findSingleParentReduceSink(ReduceSinkOperator childReduceSink, ParseContext pGraphContext) { + Operator start = childReduceSink; + while(start != null) { + if (start.getParentOperators() == null + || start.getParentOperators().size() != 1) { + // this potentially is a join operator + return null; + } + + if ((start instanceof CommonJoinOperator) + || (start instanceof GroupByOperator) + || (start instanceof UnionOperator) + || (start instanceof UDTFOperator) + || (start instanceof LateralViewJoinOperator)) { + return null; + } + + if ((start instanceof ScriptOperator) + && !HiveConf.getBoolVar(pGraphContext.getConf(), + HiveConf.ConfVars.HIVESCRIPTOPERATORTRUST)) { + return null; + } + + start = start.getParentOperators().get(0); + if(start instanceof ReduceSinkOperator) { + return (ReduceSinkOperator)start; + } + } + return null; + } + } + + } +} Index: ql/src/test/queries/clientpositive/reduce_deduplicate.q =================================================================== --- ql/src/test/queries/clientpositive/reduce_deduplicate.q (revision 0) +++ ql/src/test/queries/clientpositive/reduce_deduplicate.q (revision 0) @@ -0,0 +1,42 @@ +set hive.enforce.bucketing = true; +set hive.exec.reducers.max = 1; +set hive.exec.script.trust = true; + +drop table bucket5_1; + +CREATE TABLE bucket5_1(key string, value string) CLUSTERED BY (key) INTO 2 BUCKETS; +explain extended +insert overwrite table bucket5_1 +select * from src cluster by key; + +insert overwrite table bucket5_1 +select * from src cluster by key; + +select * from bucket5_1 order by key; + +drop table complex_tbl_1; +create table complex_tbl_1(aid string, bid string, t int, ctime string, etime bigint, l string, et string) partitioned by (ds string); + +drop table complex_tbl_2; +create table complex_tbl_2(aet string, aes string) partitioned by (ds string); + +explain extended +insert overwrite table complex_tbl_1 partition (ds='2010-03-29') +select s2.* from +( + select TRANSFORM (aid,bid,t,ctime,etime,l,et) + USING 'cat' + AS (aid string, bid string, t int, ctime string, etime bigint, l string, et string) + from + ( + select transform(aet,aes) + using 'cat' + as (aid string, bid string, t int, ctime string, etime bigint, l string, et string) + from complex_tbl_2 where ds ='2010-03-29' cluster by bid +)s +)s2; + +drop table complex_tbl_2; +drop table complex_tbl_1; +drop table bucket5_1; + Index: ql/src/test/results/clientpositive/ctas.q.out =================================================================== --- ql/src/test/results/clientpositive/ctas.q.out (revision 928940) +++ ql/src/test/results/clientpositive/ctas.q.out (working copy) @@ -26,11 +26,11 @@ PREHOOK: query: select * from nzhang_Tmp PREHOOK: type: QUERY PREHOOK: Input: default@nzhang_tmp -PREHOOK: Output: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-34-36_945_6347474365941059529/10000 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-09-01_455_6108617375963741196/10000 POSTHOOK: query: select * from nzhang_Tmp POSTHOOK: type: QUERY POSTHOOK: Input: default@nzhang_tmp -POSTHOOK: Output: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-34-36_945_6347474365941059529/10000 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-09-01_455_6108617375963741196/10000 PREHOOK: query: explain create table nzhang_CTAS1 as select key k, value from src sort by k, value limit 10 PREHOOK: type: CREATETABLE POSTHOOK: query: explain create table nzhang_CTAS1 as select key k, value from src sort by k, value limit 10 @@ -40,9 +40,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -74,47 +73,28 @@ Reduce Operator Tree: Extract Limit - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: - file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-34-36_995_4233953126182984687/10002 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - sort order: ++ - tag: -1 - value expressions: + Select Operator + expressions: expr: _col0 type: string expr: _col1 type: string - Reduce Operator Tree: - Extract - Limit - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + outputColumnNames: _col0, _col1 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Stage: Stage-0 Move Operator files: hdfs directory: true - destination: file:///data/users/njain/hive1/hive1/build/ql/test/data/warehouse/nzhang_ctas1 + destination: file:///Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/test/data/warehouse/nzhang_ctas1 - Stage: Stage-3 + Stage: Stage-2 Create Table Operator: Create Table columns: k string, value string @@ -136,11 +116,11 @@ PREHOOK: query: select * from nzhang_CTAS1 PREHOOK: type: QUERY PREHOOK: Input: default@nzhang_ctas1 -PREHOOK: Output: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-34-47_171_5862961218268088886/10000 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-09-10_321_1799194093245544872/10000 POSTHOOK: query: select * from nzhang_CTAS1 POSTHOOK: type: QUERY POSTHOOK: Input: default@nzhang_ctas1 -POSTHOOK: Output: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-34-47_171_5862961218268088886/10000 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-09-10_321_1799194093245544872/10000 0 val_0 0 val_0 0 val_0 @@ -160,9 +140,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -194,47 +173,28 @@ Reduce Operator Tree: Extract Limit - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: - file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-34-47_227_8082884342040328027/10002 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - sort order: ++ - tag: -1 - value expressions: + Select Operator + expressions: expr: _col0 type: string expr: _col1 type: string - Reduce Operator Tree: - Extract - Limit - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + outputColumnNames: _col0, _col1 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Stage: Stage-0 Move Operator files: hdfs directory: true - destination: file:///data/users/njain/hive1/hive1/build/ql/test/data/warehouse/nzhang_ctas2 + destination: file:///Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/test/data/warehouse/nzhang_ctas2 - Stage: Stage-3 + Stage: Stage-2 Create Table Operator: Create Table columns: key string, value string @@ -256,11 +216,11 @@ PREHOOK: query: select * from nzhang_ctas2 PREHOOK: type: QUERY PREHOOK: Input: default@nzhang_ctas2 -PREHOOK: Output: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-34-55_547_4128296550453730519/10000 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-09-15_866_5052900743735917517/10000 POSTHOOK: query: select * from nzhang_ctas2 POSTHOOK: type: QUERY POSTHOOK: Input: default@nzhang_ctas2 -POSTHOOK: Output: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-34-55_547_4128296550453730519/10000 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-09-15_866_5052900743735917517/10000 0 val_0 0 val_0 0 val_0 @@ -324,7 +284,7 @@ Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-34-55_605_2682586381323159353/10002 + file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-09-15_933_3367193336316503989/10002 Reduce Output Operator key expressions: expr: _col0 @@ -352,7 +312,7 @@ Move Operator files: hdfs directory: true - destination: file:///data/users/njain/hive1/hive1/build/ql/test/data/warehouse/nzhang_ctas3 + destination: file:///Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/test/data/warehouse/nzhang_ctas3 Stage: Stage-3 Create Table Operator: @@ -377,11 +337,11 @@ PREHOOK: query: select * from nzhang_ctas3 PREHOOK: type: QUERY PREHOOK: Input: default@nzhang_ctas3 -PREHOOK: Output: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-35-03_686_6599056952238873916/10000 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-09-27_468_7994201377832887635/10000 POSTHOOK: query: select * from nzhang_ctas3 POSTHOOK: type: QUERY POSTHOOK: Input: default@nzhang_ctas3 -POSTHOOK: Output: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-35-03_686_6599056952238873916/10000 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-09-27_468_7994201377832887635/10000 0.0 val_0_con 0.0 val_0_con 0.0 val_0_con @@ -410,11 +370,11 @@ PREHOOK: query: select * from nzhang_ctas3 PREHOOK: type: QUERY PREHOOK: Input: default@nzhang_ctas3 -PREHOOK: Output: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-35-03_838_6249073042637282592/10000 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-09-27_689_8736198634332198038/10000 POSTHOOK: query: select * from nzhang_ctas3 POSTHOOK: type: QUERY POSTHOOK: Input: default@nzhang_ctas3 -POSTHOOK: Output: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-35-03_838_6249073042637282592/10000 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-09-27_689_8736198634332198038/10000 0.0 val_0_con 0.0 val_0_con 0.0 val_0_con @@ -434,9 +394,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -468,47 +427,28 @@ Reduce Operator Tree: Extract Limit - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: - file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-35-03_894_3834293106109095199/10002 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - sort order: ++ - tag: -1 - value expressions: + Select Operator + expressions: expr: _col0 type: string expr: _col1 type: string - Reduce Operator Tree: - Extract - Limit - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + outputColumnNames: _col0, _col1 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Stage: Stage-0 Move Operator files: hdfs directory: true - destination: file:///data/users/njain/hive1/hive1/build/ql/test/data/warehouse/nzhang_ctas4 + destination: file:///Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/test/data/warehouse/nzhang_ctas4 - Stage: Stage-3 + Stage: Stage-2 Create Table Operator: Create Table columns: key string, value string @@ -531,11 +471,11 @@ PREHOOK: query: select * from nzhang_ctas4 PREHOOK: type: QUERY PREHOOK: Input: default@nzhang_ctas4 -PREHOOK: Output: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-35-13_105_2973973102606118749/10000 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-09-32_703_8088045747457725025/10000 POSTHOOK: query: select * from nzhang_ctas4 POSTHOOK: type: QUERY POSTHOOK: Input: default@nzhang_ctas4 -POSTHOOK: Output: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-35-13_105_2973973102606118749/10000 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-09-32_703_8088045747457725025/10000 0 val_0 0 val_0 0 val_0 @@ -555,9 +495,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 - Stage-3 depends on stages: Stage-0 + Stage-0 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-0 STAGE PLANS: Stage: Stage-1 @@ -588,9 +527,9 @@ type: string Needs Tagging: false Path -> Alias: - file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/src [src] + file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/test/data/warehouse/src [src] Path -> Partition: - file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/src + file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/test/data/warehouse/src Partition base file name: src input format: org.apache.hadoop.mapred.TextInputFormat @@ -601,12 +540,12 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/src + location file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/test/data/warehouse/src name src serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449676 + transient_lastDdlTime 1269925740 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -617,99 +556,52 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location file:/data/users/njain/hive1/hive1/build/ql/test/data/warehouse/src + location file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/test/data/warehouse/src name src serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1266449676 + transient_lastDdlTime 1269925740 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: src name: src Reduce Operator Tree: Extract Limit - File Output Operator - compressed: false - GlobalTableId: 0 - directory: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-35-13_160_4580041524192799730/10002 - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types string,string - escape.delim \ - TotalFiles: 1 - MultiFileSpray: false - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: - file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-35-13_160_4580041524192799730/10002 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col1 - type: string - sort order: ++ - tag: -1 - value expressions: + Select Operator + expressions: expr: _col0 type: string expr: _col1 type: string - Needs Tagging: false - Path -> Alias: - file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-35-13_160_4580041524192799730/10002 [file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-35-13_160_4580041524192799730/10002] - Path -> Partition: - file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-35-13_160_4580041524192799730/10002 - Partition - base file name: 10002 - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types string,string - escape.delim \ - - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1 - columns.types string,string - escape.delim \ - Reduce Operator Tree: - Extract - Limit - File Output Operator - compressed: false - GlobalTableId: 0 - directory: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-35-13_160_4580041524192799730/10001 - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - columns _col0,_col1 - columns.types string:string - field.delim , - line.delim + outputColumnNames: _col0, _col1 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + directory: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-09-32_760_7976824957664260788/10001 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1 + columns.types string:string + field.delim , + line.delim - serialization.format , - TotalFiles: 1 - MultiFileSpray: false + serialization.format , + TotalFiles: 1 + MultiFileSpray: false Stage: Stage-0 Move Operator files: hdfs directory: true - source: file:/data/users/njain/hive1/hive1/build/ql/scratchdir/hive_2010-02-17_15-35-13_160_4580041524192799730/10001 - destination: file:///data/users/njain/hive1/hive1/build/ql/test/data/warehouse/nzhang_ctas5 + source: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-09-32_760_7976824957664260788/10001 + destination: file:///Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/test/data/warehouse/nzhang_ctas5 - Stage: Stage-3 + Stage: Stage-2 Create Table Operator: Create Table columns: key string, value string Index: ql/src/test/results/clientpositive/input14_limit.q.out =================================================================== --- ql/src/test/results/clientpositive/input14_limit.q.out (revision 928940) +++ ql/src/test/results/clientpositive/input14_limit.q.out (working copy) @@ -26,8 +26,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -65,60 +64,40 @@ Reduce Operator Tree: Extract Limit - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: - file:/data/users/nzhang/work/870/apache-hive/build/ql/tmp/275552824/10002 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - Map-reduce partition columns: - expr: _col0 - type: string - tag: -1 - value expressions: + Select Operator + expressions: expr: _col0 type: string expr: _col1 type: string - Reduce Operator Tree: - Extract - Limit - Filter Operator - predicate: - expr: (_col0 < 100) - type: boolean - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - outputColumnNames: _col0, _col1 - Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: dest1 + outputColumnNames: _col0, _col1 + Limit + Filter Operator + predicate: + expr: (_col0 < 100) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: UDFToInteger(_col0) + type: int + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest1 Stage: Stage-0 Move Operator @@ -154,11 +133,11 @@ PREHOOK: query: SELECT dest1.* FROM dest1 PREHOOK: type: QUERY PREHOOK: Input: default@dest1 -PREHOOK: Output: file:/data/users/nzhang/work/870/apache-hive/build/ql/tmp/679446797/10000 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-09-47_859_8270687092803388860/10000 POSTHOOK: query: SELECT dest1.* FROM dest1 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest1 -POSTHOOK: Output: file:/data/users/nzhang/work/870/apache-hive/build/ql/tmp/679446797/10000 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-09-47_859_8270687092803388860/10000 0 val_0 0 val_0 0 val_0 Index: ql/src/test/results/clientpositive/input3_limit.q.out =================================================================== --- ql/src/test/results/clientpositive/input3_limit.q.out (revision 928940) +++ ql/src/test/results/clientpositive/input3_limit.q.out (working copy) @@ -37,8 +37,7 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-0 depends on stages: Stage-2 + Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -80,36 +79,22 @@ type: string outputColumnNames: _col0, _col1 Limit - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: - file:/data/users/njain/hive5/hive5/build/ql/tmp/1571190278/10002 - Reduce Output Operator - sort order: - tag: -1 - value expressions: - expr: _col0 - type: string - expr: _col1 - type: string - Reduce Operator Tree: - Extract - Limit - File Output Operator - compressed: false - GlobalTableId: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: t2 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + Limit + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: t2 Stage: Stage-0 Move Operator @@ -133,11 +118,11 @@ PREHOOK: query: SELECT * FROM T2 SORT BY key, value PREHOOK: type: QUERY PREHOOK: Input: default@t2 -PREHOOK: Output: file:/data/users/njain/hive5/hive5/build/ql/tmp/2048970411/10000 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-09-56_877_5454401620106665047/10000 POSTHOOK: query: SELECT * FROM T2 SORT BY key, value POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 -POSTHOOK: Output: file:/data/users/njain/hive5/hive5/build/ql/tmp/2048970411/10000 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-09-56_877_5454401620106665047/10000 0 val_0 0 val_0 0 val_0 Index: ql/src/test/results/clientpositive/input4_limit.q.out =================================================================== --- ql/src/test/results/clientpositive/input4_limit.q.out (revision 928940) +++ ql/src/test/results/clientpositive/input4_limit.q.out (working copy) @@ -9,7 +9,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -40,37 +39,20 @@ Reduce Operator Tree: Extract Limit - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: - file:/data/users/njain/hive5/hive5/build/ql/tmp/1763489725/10002 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - sort order: + - tag: -1 - value expressions: + Select Operator + expressions: expr: _col0 type: string expr: _col1 type: string - Reduce Operator Tree: - Extract - Limit - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + outputColumnNames: _col0, _col1 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Stage: Stage-0 Fetch Operator @@ -80,11 +62,11 @@ PREHOOK: query: select * from src sort by key limit 10 PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/data/users/njain/hive5/hive5/build/ql/tmp/1576676537/10000 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-10-06_024_9160954902114674860/10000 POSTHOOK: query: select * from src sort by key limit 10 POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/data/users/njain/hive5/hive5/build/ql/tmp/1576676537/10000 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-10-06_024_9160954902114674860/10000 0 val_0 0 val_0 0 val_0 Index: ql/src/test/results/clientpositive/lateral_view.q.out =================================================================== --- ql/src/test/results/clientpositive/lateral_view.q.out (revision 928940) +++ ql/src/test/results/clientpositive/lateral_view.q.out (working copy) @@ -7,7 +7,6 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: @@ -81,41 +80,22 @@ Reduce Operator Tree: Extract Limit - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - - Stage: Stage-2 - Map Reduce - Alias -> Map Operator Tree: - file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/ql/tmp/119895400/10002 - Reduce Output Operator - key expressions: - expr: _col0 - type: string - expr: _col2 - type: int - sort order: ++ - tag: -1 - value expressions: + Select Operator + expressions: expr: _col0 type: string expr: _col1 type: string expr: _col2 type: int - Reduce Operator Tree: - Extract - Limit - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + outputColumnNames: _col0, _col1, _col2 + Limit + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Stage: Stage-0 Fetch Operator @@ -417,23 +397,23 @@ SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol SORT BY key ASC, myCol ASC LIMIT 1 PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/ql/tmp/1455708722/10000 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-10-15_150_8006325626771568459/10000 POSTHOOK: query: -- Verify that * selects columns from both tables SELECT * FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol SORT BY key ASC, myCol ASC LIMIT 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/ql/tmp/1455708722/10000 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-10-15_150_8006325626771568459/10000 0 val_0 1 PREHOOK: query: -- TABLE.* should be supported SELECT myTable.* FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol LIMIT 3 PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/ql/tmp/1119696855/10000 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-10-20_841_4969716061396163193/10000 POSTHOOK: query: -- TABLE.* should be supported SELECT myTable.* FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol LIMIT 3 POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/ql/tmp/1119696855/10000 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-10-20_841_4969716061396163193/10000 1 2 3 @@ -441,12 +421,12 @@ SELECT myTable.myCol, myTable2.myCol2 FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol LATERAL VIEW explode(array('a', 'b', 'c')) myTable2 AS myCol2 LIMIT 9 PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/ql/tmp/391873070/10000 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-10-25_322_3828704717015467664/10000 POSTHOOK: query: -- Multiple lateral views should result in a Cartesian product SELECT myTable.myCol, myTable2.myCol2 FROM src LATERAL VIEW explode(array(1,2,3)) myTable AS myCol LATERAL VIEW explode(array('a', 'b', 'c')) myTable2 AS myCol2 LIMIT 9 POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/ql/tmp/391873070/10000 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-10-25_322_3828704717015467664/10000 1 a 1 b 1 c @@ -460,12 +440,12 @@ SELECT myTable2.* FROM src LATERAL VIEW explode(array(array(1,2,3))) myTable AS myCol LATERAL VIEW explode(myTable.myCol) myTable2 AS myCol2 LIMIT 3 PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/ql/tmp/1057041862/10000 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-10-30_077_5668267335301615994/10000 POSTHOOK: query: -- Should be able to reference tables generated earlier SELECT myTable2.* FROM src LATERAL VIEW explode(array(array(1,2,3))) myTable AS myCol LATERAL VIEW explode(myTable.myCol) myTable2 AS myCol2 LIMIT 3 POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/data/users/pyang/task/trunk/VENDOR.hive/trunk/build/ql/tmp/1057041862/10000 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-10-30_077_5668267335301615994/10000 1 2 3 Index: ql/src/test/results/clientpositive/reduce_deduplicate.q.out =================================================================== --- ql/src/test/results/clientpositive/reduce_deduplicate.q.out (revision 0) +++ ql/src/test/results/clientpositive/reduce_deduplicate.q.out (revision 0) @@ -0,0 +1,902 @@ +PREHOOK: query: drop table bucket5_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucket5_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE bucket5_1(key string, value string) CLUSTERED BY (key) INTO 2 BUCKETS +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE bucket5_1(key string, value string) CLUSTERED BY (key) INTO 2 BUCKETS +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@bucket5_1 +PREHOOK: query: explain extended +insert overwrite table bucket5_1 +select * from src cluster by key +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +insert overwrite table bucket5_1 +select * from src cluster by key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF src)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB bucket5_1)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_CLUSTERBY (TOK_TABLE_OR_COL key)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + src + TableScan + alias: src + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Needs Tagging: false + Path -> Alias: + file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/test/data/warehouse/src [src] + Path -> Partition: + file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/test/data/warehouse/src + Partition + base file name: src + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/test/data/warehouse/src + name src + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1269926333 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/test/data/warehouse/src + name src + serialization.ddl struct src { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1269926333 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: src + name: src + Reduce Operator Tree: + Extract + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + directory: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-18-54_583_5210238441426809761/10000 + NumFilesPerFileSink: 2 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/test/data/warehouse/bucket5_1 + name bucket5_1 + serialization.ddl struct bucket5_1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1269926334 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucket5_1 + TotalFiles: 2 + MultiFileSpray: true + + Stage: Stage-0 + Move Operator + tables: + replace: true + source: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-18-54_583_5210238441426809761/10000 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count 2 + bucket_field_name key + columns key,value + columns.types string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/test/data/warehouse/bucket5_1 + name bucket5_1 + serialization.ddl struct bucket5_1 { string key, string value} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1269926334 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucket5_1 + tmp directory: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-18-54_583_5210238441426809761/10001 + + +PREHOOK: query: insert overwrite table bucket5_1 +select * from src cluster by key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@bucket5_1 +POSTHOOK: query: insert overwrite table bucket5_1 +select * from src cluster by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@bucket5_1 +PREHOOK: query: select * from bucket5_1 order by key +PREHOOK: type: QUERY +PREHOOK: Input: default@bucket5_1 +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-19-01_821_2985042884082895223/10000 +POSTHOOK: query: select * from bucket5_1 order by key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucket5_1 +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-19-01_821_2985042884082895223/10000 +0 val_0 +0 val_0 +0 val_0 +10 val_10 +100 val_100 +100 val_100 +103 val_103 +103 val_103 +104 val_104 +104 val_104 +105 val_105 +11 val_11 +111 val_111 +113 val_113 +113 val_113 +114 val_114 +116 val_116 +118 val_118 +118 val_118 +119 val_119 +119 val_119 +119 val_119 +12 val_12 +12 val_12 +120 val_120 +120 val_120 +125 val_125 +125 val_125 +126 val_126 +128 val_128 +128 val_128 +128 val_128 +129 val_129 +129 val_129 +131 val_131 +133 val_133 +134 val_134 +134 val_134 +136 val_136 +137 val_137 +137 val_137 +138 val_138 +138 val_138 +138 val_138 +138 val_138 +143 val_143 +145 val_145 +146 val_146 +146 val_146 +149 val_149 +149 val_149 +15 val_15 +15 val_15 +150 val_150 +152 val_152 +152 val_152 +153 val_153 +155 val_155 +156 val_156 +157 val_157 +158 val_158 +160 val_160 +162 val_162 +163 val_163 +164 val_164 +164 val_164 +165 val_165 +165 val_165 +166 val_166 +167 val_167 +167 val_167 +167 val_167 +168 val_168 +169 val_169 +169 val_169 +169 val_169 +169 val_169 +17 val_17 +170 val_170 +172 val_172 +172 val_172 +174 val_174 +174 val_174 +175 val_175 +175 val_175 +176 val_176 +176 val_176 +177 val_177 +178 val_178 +179 val_179 +179 val_179 +18 val_18 +18 val_18 +180 val_180 +181 val_181 +183 val_183 +186 val_186 +187 val_187 +187 val_187 +187 val_187 +189 val_189 +19 val_19 +190 val_190 +191 val_191 +191 val_191 +192 val_192 +193 val_193 +193 val_193 +193 val_193 +194 val_194 +195 val_195 +195 val_195 +196 val_196 +197 val_197 +197 val_197 +199 val_199 +199 val_199 +199 val_199 +2 val_2 +20 val_20 +200 val_200 +200 val_200 +201 val_201 +202 val_202 +203 val_203 +203 val_203 +205 val_205 +205 val_205 +207 val_207 +207 val_207 +208 val_208 +208 val_208 +208 val_208 +209 val_209 +209 val_209 +213 val_213 +213 val_213 +214 val_214 +216 val_216 +216 val_216 +217 val_217 +217 val_217 +218 val_218 +219 val_219 +219 val_219 +221 val_221 +221 val_221 +222 val_222 +223 val_223 +223 val_223 +224 val_224 +224 val_224 +226 val_226 +228 val_228 +229 val_229 +229 val_229 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +230 val_230 +233 val_233 +233 val_233 +235 val_235 +237 val_237 +237 val_237 +238 val_238 +238 val_238 +239 val_239 +239 val_239 +24 val_24 +24 val_24 +241 val_241 +242 val_242 +242 val_242 +244 val_244 +247 val_247 +248 val_248 +249 val_249 +252 val_252 +255 val_255 +255 val_255 +256 val_256 +256 val_256 +257 val_257 +258 val_258 +26 val_26 +26 val_26 +260 val_260 +262 val_262 +263 val_263 +265 val_265 +265 val_265 +266 val_266 +27 val_27 +272 val_272 +272 val_272 +273 val_273 +273 val_273 +273 val_273 +274 val_274 +275 val_275 +277 val_277 +277 val_277 +277 val_277 +277 val_277 +278 val_278 +278 val_278 +28 val_28 +280 val_280 +280 val_280 +281 val_281 +281 val_281 +282 val_282 +282 val_282 +283 val_283 +284 val_284 +285 val_285 +286 val_286 +287 val_287 +288 val_288 +288 val_288 +289 val_289 +291 val_291 +292 val_292 +296 val_296 +298 val_298 +298 val_298 +298 val_298 +30 val_30 +302 val_302 +305 val_305 +306 val_306 +307 val_307 +307 val_307 +308 val_308 +309 val_309 +309 val_309 +310 val_310 +311 val_311 +311 val_311 +311 val_311 +315 val_315 +316 val_316 +316 val_316 +316 val_316 +317 val_317 +317 val_317 +318 val_318 +318 val_318 +318 val_318 +321 val_321 +321 val_321 +322 val_322 +322 val_322 +323 val_323 +325 val_325 +325 val_325 +327 val_327 +327 val_327 +327 val_327 +33 val_33 +331 val_331 +331 val_331 +332 val_332 +333 val_333 +333 val_333 +335 val_335 +336 val_336 +338 val_338 +339 val_339 +34 val_34 +341 val_341 +342 val_342 +342 val_342 +344 val_344 +344 val_344 +345 val_345 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +348 val_348 +35 val_35 +35 val_35 +35 val_35 +351 val_351 +353 val_353 +353 val_353 +356 val_356 +360 val_360 +362 val_362 +364 val_364 +365 val_365 +366 val_366 +367 val_367 +367 val_367 +368 val_368 +369 val_369 +369 val_369 +369 val_369 +37 val_37 +37 val_37 +373 val_373 +374 val_374 +375 val_375 +377 val_377 +378 val_378 +379 val_379 +382 val_382 +382 val_382 +384 val_384 +384 val_384 +384 val_384 +386 val_386 +389 val_389 +392 val_392 +393 val_393 +394 val_394 +395 val_395 +395 val_395 +396 val_396 +396 val_396 +396 val_396 +397 val_397 +397 val_397 +399 val_399 +399 val_399 +4 val_4 +400 val_400 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +401 val_401 +402 val_402 +403 val_403 +403 val_403 +403 val_403 +404 val_404 +404 val_404 +406 val_406 +406 val_406 +406 val_406 +406 val_406 +407 val_407 +409 val_409 +409 val_409 +409 val_409 +41 val_41 +411 val_411 +413 val_413 +413 val_413 +414 val_414 +414 val_414 +417 val_417 +417 val_417 +417 val_417 +418 val_418 +419 val_419 +42 val_42 +42 val_42 +421 val_421 +424 val_424 +424 val_424 +427 val_427 +429 val_429 +429 val_429 +43 val_43 +430 val_430 +430 val_430 +430 val_430 +431 val_431 +431 val_431 +431 val_431 +432 val_432 +435 val_435 +436 val_436 +437 val_437 +438 val_438 +438 val_438 +438 val_438 +439 val_439 +439 val_439 +44 val_44 +443 val_443 +444 val_444 +446 val_446 +448 val_448 +449 val_449 +452 val_452 +453 val_453 +454 val_454 +454 val_454 +454 val_454 +455 val_455 +457 val_457 +458 val_458 +458 val_458 +459 val_459 +459 val_459 +460 val_460 +462 val_462 +462 val_462 +463 val_463 +463 val_463 +466 val_466 +466 val_466 +466 val_466 +467 val_467 +468 val_468 +468 val_468 +468 val_468 +468 val_468 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +469 val_469 +47 val_47 +470 val_470 +472 val_472 +475 val_475 +477 val_477 +478 val_478 +478 val_478 +479 val_479 +480 val_480 +480 val_480 +480 val_480 +481 val_481 +482 val_482 +483 val_483 +484 val_484 +485 val_485 +487 val_487 +489 val_489 +489 val_489 +489 val_489 +489 val_489 +490 val_490 +491 val_491 +492 val_492 +492 val_492 +493 val_493 +494 val_494 +495 val_495 +496 val_496 +497 val_497 +498 val_498 +498 val_498 +498 val_498 +5 val_5 +5 val_5 +5 val_5 +51 val_51 +51 val_51 +53 val_53 +54 val_54 +57 val_57 +58 val_58 +58 val_58 +64 val_64 +65 val_65 +66 val_66 +67 val_67 +67 val_67 +69 val_69 +70 val_70 +70 val_70 +70 val_70 +72 val_72 +72 val_72 +74 val_74 +76 val_76 +76 val_76 +77 val_77 +78 val_78 +8 val_8 +80 val_80 +82 val_82 +83 val_83 +83 val_83 +84 val_84 +84 val_84 +85 val_85 +86 val_86 +87 val_87 +9 val_9 +90 val_90 +90 val_90 +90 val_90 +92 val_92 +95 val_95 +95 val_95 +96 val_96 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +PREHOOK: query: drop table complex_tbl_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table complex_tbl_1 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table complex_tbl_1(aid string, bid string, t int, ctime string, etime bigint, l string, et string) partitioned by (ds string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table complex_tbl_1(aid string, bid string, t int, ctime string, etime bigint, l string, et string) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@complex_tbl_1 +PREHOOK: query: drop table complex_tbl_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table complex_tbl_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table complex_tbl_2(aet string, aes string) partitioned by (ds string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table complex_tbl_2(aet string, aes string) partitioned by (ds string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@complex_tbl_2 +PREHOOK: query: explain extended +insert overwrite table complex_tbl_1 partition (ds='2010-03-29') +select s2.* from +( + select TRANSFORM (aid,bid,t,ctime,etime,l,et) + USING 'cat' + AS (aid string, bid string, t int, ctime string, etime bigint, l string, et string) + from + ( + select transform(aet,aes) + using 'cat' + as (aid string, bid string, t int, ctime string, etime bigint, l string, et string) + from complex_tbl_2 where ds ='2010-03-29' cluster by bid +)s +)s2 +PREHOOK: type: QUERY +POSTHOOK: query: explain extended +insert overwrite table complex_tbl_1 partition (ds='2010-03-29') +select s2.* from +( + select TRANSFORM (aid,bid,t,ctime,etime,l,et) + USING 'cat' + AS (aid string, bid string, t int, ctime string, etime bigint, l string, et string) + from + ( + select transform(aet,aes) + using 'cat' + as (aid string, bid string, t int, ctime string, etime bigint, l string, et string) + from complex_tbl_2 where ds ='2010-03-29' cluster by bid +)s +)s2 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF complex_tbl_2)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (TOK_TABLE_OR_COL aet) (TOK_TABLE_OR_COL aes)) TOK_SERDE TOK_RECORDWRITER 'cat' TOK_SERDE TOK_RECORDREADER (TOK_TABCOLLIST (TOK_TABCOL aid TOK_STRING) (TOK_TABCOL bid TOK_STRING) (TOK_TABCOL t TOK_INT) (TOK_TABCOL ctime TOK_STRING) (TOK_TABCOL etime TOK_BIGINT) (TOK_TABCOL l TOK_STRING) (TOK_TABCOL et TOK_STRING))))) (TOK_WHERE (= (TOK_TABLE_OR_COL ds) '2010-03-29')) (TOK_CLUSTERBY (TOK_TABLE_OR_COL bid)))) s)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (TOK_TABLE_OR_COL aid) (TOK_TABLE_OR_COL bid) (TOK_TABLE_OR_COL t) (TOK_TABLE_OR_COL ctime) (TOK_TABLE_OR_COL etime) (TOK_TABLE_OR_COL l) (TOK_TABLE_OR_COL et)) TOK_SERDE TOK_RECORDWRITER 'cat' TOK_SERDE TOK_RECORDREADER (TOK_TABCOLLIST (TOK_TABCOL aid TOK_STRING) (TOK_TABCOL bid TOK_STRING) (TOK_TABCOL t TOK_INT) (TOK_TABCOL ctime TOK_STRING) (TOK_TABCOL etime TOK_BIGINT) (TOK_TABCOL l TOK_STRING) (TOK_TABCOL et TOK_STRING))))))) s2)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB complex_tbl_1 (TOK_PARTSPEC (TOK_PARTVAL ds '2010-03-29')))) (TOK_SELECT (TOK_SELEXPR (TOK_ALLCOLREF s2))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + s2:s:complex_tbl_2 + TableScan + alias: complex_tbl_2 + Filter Operator + isSamplingPred: false + predicate: + expr: (ds = '2010-03-29') + type: boolean + Filter Operator + isSamplingPred: false + predicate: + expr: (ds = '2010-03-29') + type: boolean + Select Operator + expressions: + expr: aet + type: string + expr: aes + type: string + outputColumnNames: _col0, _col1 + Transform Operator + command: cat + output info: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types string,string,int,string,bigint,string,string + field.delim 9 + serialization.format 9 + Reduce Output Operator + key expressions: + expr: _col1 + type: string + sort order: + + Map-reduce partition columns: + expr: _col1 + type: string + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: int + expr: _col3 + type: string + expr: _col4 + type: bigint + expr: _col5 + type: string + expr: _col6 + type: string + Needs Tagging: false + Reduce Operator Tree: + Extract + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: int + expr: _col3 + type: string + expr: _col4 + type: bigint + expr: _col5 + type: string + expr: _col6 + type: string + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Transform Operator + command: cat + output info: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + columns _col0,_col1,_col2,_col3,_col4,_col5,_col6 + columns.types string,string,int,string,bigint,string,string + field.delim 9 + serialization.format 9 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: int + expr: _col3 + type: string + expr: _col4 + type: bigint + expr: _col5 + type: string + expr: _col6 + type: string + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + File Output Operator + compressed: false + GlobalTableId: 1 + directory: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-19-07_812_2407101678291620366/10000 + NumFilesPerFileSink: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns aid,bid,t,ctime,etime,l,et + columns.types string:string:int:string:bigint:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/test/data/warehouse/complex_tbl_1 + name complex_tbl_1 + partition_columns ds + serialization.ddl struct complex_tbl_1 { string aid, string bid, i32 t, string ctime, i64 etime, string l, string et} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1269926347 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: complex_tbl_1 + TotalFiles: 1 + MultiFileSpray: false + + Stage: Stage-0 + Move Operator + tables: + partition: + ds 2010-03-29 + replace: true + source: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-19-07_812_2407101678291620366/10000 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + properties: + bucket_count -1 + columns aid,bid,t,ctime,etime,l,et + columns.types string:string:int:string:bigint:string:string + file.inputformat org.apache.hadoop.mapred.TextInputFormat + file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + location file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/test/data/warehouse/complex_tbl_1 + name complex_tbl_1 + partition_columns ds + serialization.ddl struct complex_tbl_1 { string aid, string bid, i32 t, string ctime, i64 etime, string l, string et} + serialization.format 1 + serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + transient_lastDdlTime 1269926347 + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: complex_tbl_1 + tmp directory: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-29_22-19-07_812_2407101678291620366/10001 + + +PREHOOK: query: drop table complex_tbl_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table complex_tbl_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@complex_tbl_2 +PREHOOK: query: drop table complex_tbl_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table complex_tbl_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@complex_tbl_1 +PREHOOK: query: drop table bucket5_1 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucket5_1 +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@bucket5_1