Index: common/src/java/org/apache/hadoop/hive/conf/HiveConf.java =================================================================== --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (revision 907343) +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java (working copy) @@ -187,6 +187,7 @@ HIVEOPTCP("hive.optimize.cp", true), // column pruner HIVEOPTPPD("hive.optimize.ppd", true), // predicate pushdown HIVEOPTGROUPBY("hive.optimize.groupby", true), // optimize group by + HIVEOPTBUCKETMAPJOIN("hive.optimize.bucketmapjoin", false), // optimize bucket map join ; public final String varname; Index: ql/src/java/org/apache/hadoop/hive/ql/exec/BucketMatcher.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/BucketMatcher.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/BucketMatcher.java (revision 0) @@ -0,0 +1,34 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; + +import org.apache.hadoop.fs.Path; + +public interface BucketMatcher { + + public List getAliasBucketFiles(String currentInputFile, String refTableAlias, String alias); + + public void setAliasBucketFileNameMapping( + LinkedHashMap>> aliasBucketFileNameMapping); + +} \ No newline at end of file Index: ql/src/java/org/apache/hadoop/hive/ql/exec/DefaultBucketMatcher.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/DefaultBucketMatcher.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DefaultBucketMatcher.java (revision 0) @@ -0,0 +1,57 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec; + +import java.util.ArrayList; +import java.util.LinkedHashMap; +import java.util.List; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import org.apache.hadoop.fs.Path; + +public class DefaultBucketMatcher implements BucketMatcher { + + protected Log LOG = LogFactory.getLog(this.getClass().getName()); + + //MAPPING: bucket_file_name_in_big_tble->{alias_table->corresonding_bucket_file_names} + private LinkedHashMap>> aliasBucketMapping; + + public DefaultBucketMatcher(){ + } + + public List getAliasBucketFiles(String refTableInputFile, String refTableAlias, String alias) { + List pathStr=aliasBucketMapping.get(alias).get(refTableInputFile); + List paths = new ArrayList(); + if(pathStr!=null) { + for (String p : pathStr) { + LOG.info("Loading file " + p + " for " + alias + ". (" + refTableInputFile + ")"); + paths.add(new Path(p)); + } + } + return paths; + } + + public void setAliasBucketFileNameMapping( + LinkedHashMap>> aliasBucketFileNameMapping) { + this.aliasBucketMapping = aliasBucketFileNameMapping; + } + +} Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapper.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapper.java (revision 907343) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapper.java (working copy) @@ -25,13 +25,18 @@ import java.net.URLClassLoader; import java.util.Arrays; import java.util.HashMap; +import java.util.Iterator; +import java.util.List; import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.plan.MapredLocalWork; import org.apache.hadoop.hive.ql.plan.MapredWork; +import org.apache.hadoop.hive.ql.plan.MapredLocalWork.BucketMapJoinContext; import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.io.Writable; @@ -40,6 +45,7 @@ import org.apache.hadoop.mapred.Mapper; import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reporter; +import org.apache.hadoop.util.ReflectionUtils; public class ExecMapper extends MapReduceBase implements Mapper { @@ -56,6 +62,8 @@ private MemoryMXBean memoryMXBean; private long numRows = 0; private long nextCntr = 1; + private String lastInputFile = null; + private MapredLocalWork localWork = null; @Override public void configure(JobConf job) { @@ -84,7 +92,7 @@ mo.initialize(jc, null); // initialize map local work - MapredLocalWork localWork = mrwork.getMapLocalWork(); + localWork = mrwork.getMapLocalWork(); if (localWork == null) { return; } @@ -127,54 +135,15 @@ rp = reporter; mo.setOutputCollector(oc); mo.setReporter(rp); - // process map local operators - if (fetchOperators != null) { - try { - MapredLocalWork localWork = mo.getConf().getMapLocalWork(); - int fetchOpNum = 0; - for (Map.Entry entry : fetchOperators - .entrySet()) { - int fetchOpRows = 0; - String alias = entry.getKey(); - FetchOperator fetchOp = entry.getValue(); - Operator forwardOp = localWork - .getAliasToWork().get(alias); - - while (true) { - InspectableObject row = fetchOp.getNextRow(); - if (row == null) { - forwardOp.close(false); - break; - } - fetchOpRows++; - forwardOp.process(row.o, 0); - // check if any operator had a fatal error or early exit during - // execution - if (forwardOp.getDone()) { - done = true; - break; - } - } - - if (l4j.isInfoEnabled()) { - l4j - .info("fetch " + fetchOpNum++ + " processed " + fetchOpRows - + " used mem: " - + memoryMXBean.getHeapMemoryUsage().getUsed()); - } - } - } catch (Throwable e) { - abort = true; - if (e instanceof OutOfMemoryError) { - // Don't create a new object if we are already out of memory - throw (OutOfMemoryError) e; - } else { - throw new RuntimeException("Map local work failed", e); - } - } - } } - + + if (localWork != null + && (this.lastInputFile == null || + (localWork.getInputFileChangeSensitive() && inputFileChanged()))) { + this.lastInputFile = HiveConf.getVar(jc, HiveConf.ConfVars.HADOOPMAPFILENAME); + processMapLocalWork(localWork.getInputFileChangeSensitive()); + } + try { if (mo.getDone()) { done = true; @@ -204,6 +173,96 @@ } } + /** + * For CompbineFileInputFormat, the mapper's input file will be changed on the + * fly. If the map local work has any mapping depending on the current + * mapper's input file, the work need to clear context and re-initialization + * after the input file changed. This is first introduced to process bucket + * map join. + * + * @return + */ + private boolean inputFileChanged() { + String currentInputFile = HiveConf.getVar(jc, HiveConf.ConfVars.HADOOPMAPFILENAME); + if (this.lastInputFile == null + || !this.lastInputFile.equals(currentInputFile)) { + return true; + } + return false; + } + + private void processMapLocalWork(boolean inputFileChangeSenstive) { + // process map local operators + if (fetchOperators != null) { + try { + int fetchOpNum = 0; + for (Map.Entry entry : fetchOperators + .entrySet()) { + int fetchOpRows = 0; + String alias = entry.getKey(); + FetchOperator fetchOp = entry.getValue(); + + if(inputFileChangeSenstive) { + fetchOp.clearFetchContext(); + setUpFetchOpContext(fetchOp, alias); + } + + Operator forwardOp = localWork + .getAliasToWork().get(alias); + + while (true) { + InspectableObject row = fetchOp.getNextRow(); + if (row == null) { + forwardOp.close(false); + break; + } + fetchOpRows++; + forwardOp.process(row.o, 0); + // check if any operator had a fatal error or early exit during + // execution + if (forwardOp.getDone()) { + done = true; + break; + } + } + + if (l4j.isInfoEnabled()) { + l4j + .info("fetch " + fetchOpNum++ + " processed " + fetchOpRows + + " used mem: " + + memoryMXBean.getHeapMemoryUsage().getUsed()); + } + } + } catch (Throwable e) { + abort = true; + if (e instanceof OutOfMemoryError) { + // Don't create a new object if we are already out of memory + throw (OutOfMemoryError) e; + } else { + throw new RuntimeException("Map local work failed", e); + } + } + } + } + + private void setUpFetchOpContext(FetchOperator fetchOp, String alias) + throws Exception { + String currentInputFile = HiveConf.getVar(jc, HiveConf.ConfVars.HADOOPMAPFILENAME); + BucketMapJoinContext bucketMatcherCxt = this.localWork.getBucketMapjoinContext(); + Class bucketMatcherCls = bucketMatcherCxt.getBucketMatcherClass(); + if(bucketMatcherCls == null) { + bucketMatcherCls = org.apache.hadoop.hive.ql.exec.DefaultBucketMatcher.class; + } + BucketMatcher bucketMatcher = (BucketMatcher) ReflectionUtils.newInstance(bucketMatcherCls, null); + bucketMatcher.setAliasBucketFileNameMapping(bucketMatcherCxt.getAliasBucketFileNameMapping()); + List aliasFiles = bucketMatcher.getAliasBucketFiles(currentInputFile, + bucketMatcherCxt.getMapJoinBigTableAlias(), + alias); + Iterator iter = aliasFiles.iterator(); + fetchOp.setupContext(iter, null); + } + + private long getNextCntr(long cntr) { // A very simple counter to keep track of number of rows processed by the // reducer. It dumps Index: ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java (revision 907843) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java (working copy) @@ -182,14 +182,18 @@ while (iterPath.hasNext()) { Path nxt = iterPath.next(); - PartitionDesc prt = iterPartDesc.next(); + PartitionDesc prt = null; + if(iterPartDesc != null) + prt = iterPartDesc.next(); FileSystem fs = nxt.getFileSystem(job); if (fs.exists(nxt)) { FileStatus[] fStats = fs.listStatus(nxt); for (FileStatus fStat : fStats) { if (fStat.getLen() > 0) { currPath = nxt; - currPart = prt; + if(iterPartDesc != null) { + currPart = prt; + } return; } } @@ -225,7 +229,7 @@ LOG.debug("Creating fetchTask with deserializer typeinfo: " + serde.getObjectInspector().getTypeName()); LOG.debug("deserializer properties: " + tmp.getProperties()); - if (!tblDataDone) { + if (currPart != null) { setPrtnDesc(); } } @@ -259,10 +263,10 @@ return null; } } - + boolean ret = currRecReader.next(key, value); if (ret) { - if (tblDataDone) { + if (this.currPart == null) { Object obj = serde.deserialize(value); return new InspectableObject(obj, serde.getObjectInspector()); } else { @@ -278,7 +282,7 @@ throw new IOException(e); } } - + /** * Clear the context, if anything needs to be done. * @@ -289,11 +293,32 @@ currRecReader.close(); currRecReader = null; } + this.currPath = null; + this.iterPath = null; + this.iterPartDesc = null; } catch (Exception e) { throw new HiveException("Failed with exception " + e.getMessage() + org.apache.hadoop.util.StringUtils.stringifyException(e)); } } + + /** + * used for bucket map join. there is a hack for getting partitionDesc. + * bucket map join right now only allow one partition present in bucket map join. + */ + public void setupContext (Iterator iterPath, Iterator iterPartDesc) { + this.iterPath = iterPath; + this.iterPartDesc = iterPartDesc; + if(iterPartDesc == null) { + if (work.getTblDir() != null) { + this.currTbl = work.getTblDesc(); + } else { + //hack, get the first. + List listParts = work.getPartDesc(); + currPart = listParts.get(0); + } + } + } public ObjectInspector getOutputObjectInspector() throws HiveException { try { Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/BucketMapJoinOptimizer.java (revision 0) @@ -0,0 +1,363 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements.See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership.The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License.You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer; + +import java.io.IOException; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.Collections; +import java.util.HashSet; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.Stack; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.exec.FunctionRegistry; +import org.apache.hadoop.hive.ql.exec.MapJoinOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; +import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; +import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.lib.Rule; +import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.metadata.Table; +import org.apache.hadoop.hive.ql.optimizer.ppr.PartitionPruner; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; +import org.apache.hadoop.hive.ql.parse.QBJoinTree; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ExprNodeColumnDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.ExprNodeGenericFuncDesc; +import org.apache.hadoop.hive.ql.plan.MapJoinDesc; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; + +/** + *this transformation does bucket map join optimization. + */ +public class BucketMapJoinOptimizer implements Transform { + + private static final Log LOG = LogFactory.getLog(GroupByOptimizer.class + .getName()); + + public BucketMapJoinOptimizer() { + } + + @Override + public ParseContext transform(ParseContext pctx) throws SemanticException { + + Map opRules = new LinkedHashMap(); + BucketMapjoinOptProcCtx bucketMapJoinOptimizeCtx = new BucketMapjoinOptProcCtx(); + + // process map joins with no reducers pattern + opRules.put(new RuleRegExp("R1", "MAPJOIN%"), getBucketMapjoinProc(pctx)); + opRules.put(new RuleRegExp("R2", "RS%.*MAPJOIN"), getBucketMapjoinRejectProc(pctx)); + opRules.put(new RuleRegExp(new String("R3"), "UNION%.*MAPJOIN%"), + getBucketMapjoinRejectProc(pctx)); + opRules.put(new RuleRegExp(new String("R4"), "MAPJOIN%.*MAPJOIN%"), + getBucketMapjoinRejectProc(pctx)); + + // The dispatcher fires the processor corresponding to the closest matching + // rule and passes the context along + Dispatcher disp = new DefaultRuleDispatcher(getDefaultProc(), opRules, + bucketMapJoinOptimizeCtx); + GraphWalker ogw = new DefaultGraphWalker(disp); + + // Create a list of topop nodes + ArrayList topNodes = new ArrayList(); + topNodes.addAll(pctx.getTopOps().values()); + ogw.startWalking(topNodes, null); + + return pctx; + } + + private NodeProcessor getBucketMapjoinRejectProc(ParseContext pctx) { + return new NodeProcessor () { + @Override + public Object process(Node nd, Stack stack, + NodeProcessorCtx procCtx, Object... nodeOutputs) + throws SemanticException { + MapJoinOperator mapJoinOp = (MapJoinOperator) nd; + BucketMapjoinOptProcCtx context = (BucketMapjoinOptProcCtx) procCtx; + context.listOfRejectedMapjoins.add(mapJoinOp); + return null; + } + }; + } + + private NodeProcessor getBucketMapjoinProc(ParseContext pctx) { + return new BucketMapjoinOptProc(pctx); + } + + private NodeProcessor getDefaultProc() { + return new NodeProcessor() { + @Override + public Object process(Node nd, Stack stack, + NodeProcessorCtx procCtx, Object... nodeOutputs) + throws SemanticException { + return null; + } + }; + } + + class BucketMapjoinOptProc implements NodeProcessor { + + protected ParseContext pGraphContext; + + public BucketMapjoinOptProc(ParseContext pGraphContext) { + super(); + this.pGraphContext = pGraphContext; + } + + @Override + public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, + Object... nodeOutputs) throws SemanticException { + MapJoinOperator mapJoinOp = (MapJoinOperator) nd; + BucketMapjoinOptProcCtx context = (BucketMapjoinOptProcCtx) procCtx; + + if(context.getListOfRejectedMapjoins().contains(mapJoinOp)) + return null; + + QBJoinTree joinCxt = this.pGraphContext.getMapJoinContext().get(mapJoinOp); + if(joinCxt == null) + return null; + + List joinAliases = new ArrayList(); + String[] srcs = joinCxt.getBaseSrc(); + String[] left = joinCxt.getLeftAliases(); + List mapAlias = joinCxt.getMapAliases(); + String baseBigAlias = null; + for(String s : left) { + if(s != null && !joinAliases.contains(s)) { + joinAliases.add(s); + if(!mapAlias.contains(s)) { + baseBigAlias = s; + } + } + } + for(String s : srcs) { + if(s != null && !joinAliases.contains(s)) { + joinAliases.add(s); + if(!mapAlias.contains(s)) { + baseBigAlias = s; + } + } + } + + MapJoinDesc mjDecs = mapJoinOp.getConf(); + LinkedHashMap aliasToBucketNumber = new LinkedHashMap(); + LinkedHashMap> aliasToBucketFileNames = new LinkedHashMap>(); + // right now this code does not work with "a join b on a.key = b.key and + // a.ds = b.ds", where ds is a partition column. It only works with joins + // with only one partition presents in each join source tables. + Map> topOps = this.pGraphContext.getTopOps(); + Map topToTable = this.pGraphContext.getTopToTable(); + + List bucketNumbers = new ArrayList(); + for (int index = 0; index < joinAliases.size(); index++) { + String alias = joinAliases.get(index); + TableScanOperator tso = (TableScanOperator) topOps.get(alias); + Table tbl = topToTable.get(tso); + if(tbl.isPartitioned()) { + PrunedPartitionList prunedParts = null; + try { + prunedParts = PartitionPruner.prune(tbl, pGraphContext.getOpToPartPruner().get(tso), pGraphContext.getConf(), alias, + pGraphContext.getPrunedPartitions()); + } catch (HiveException e) { + // Has to use full name to make sure it does not conflict with + // org.apache.commons.lang.StringUtils + LOG.error(org.apache.hadoop.util.StringUtils.stringifyException(e)); + throw new SemanticException(e.getMessage(), e); + } + int partNumber = prunedParts.getConfirmedPartns().size() + + prunedParts.getUnknownPartns().size(); + if(partNumber > 1) + return null; + + Partition part = null; + Iterator iter = prunedParts.getConfirmedPartns() + .iterator(); + while (iter.hasNext()) + part = iter.next(); + if (part == null) { + iter = prunedParts.getUnknownPartns().iterator(); + while (iter.hasNext()) + part = iter.next(); + } + + assert part != null; + + if (!checkBucketColumns(part.getBucketCols(), mjDecs, index)) + return null; + + Integer num = new Integer(part.getBucketCount()); + aliasToBucketNumber.put(alias, num); + List fileNames = new ArrayList(); + try { + FileSystem fs = FileSystem.get(this.pGraphContext.getConf()); + FileStatus[] files = fs.listStatus(new Path(part.getDataLocation() + .toString())); + if (files != null) { + for (FileStatus file : files) { + fileNames.add(file.getPath().toString()); + } + } + } catch (IOException e) { + throw new SemanticException(e); + } + aliasToBucketFileNames.put(alias, fileNames); + } else { + if (!checkBucketColumns(tbl.getBucketCols(), mjDecs, index)) + return null; + Integer num = new Integer(tbl.getNumBuckets()); + aliasToBucketNumber.put(alias, num); + List fileNames = new ArrayList(); + try { + FileSystem fs = FileSystem.get(this.pGraphContext.getConf()); + FileStatus[] files = fs.listStatus(new Path(tbl.getDataLocation().toString())); + if(files != null) { + for(FileStatus file : files) { + fileNames.add(file.getPath().toString()); + } + } + } catch (IOException e) { + throw new SemanticException(e); + } + aliasToBucketFileNames.put(alias, fileNames); + } + } + + // All tables or partitions are bucketed, and their bucket number is + // stored in 'bucketNumbers', we need to check if the number of buckets in + // the big table can be divided by no of buckets in small tables. + int bucketNoInBigTbl = aliasToBucketNumber.get(baseBigAlias).intValue(); + Iterator iter = aliasToBucketNumber.values().iterator(); + while(iter.hasNext()) { + int nxt = iter.next().intValue(); + boolean ok = (nxt >= bucketNoInBigTbl) ? nxt % bucketNoInBigTbl == 0 + : bucketNoInBigTbl % nxt == 0; + if(!ok) + return null; + } + MapJoinDesc desc = mapJoinOp.getConf(); + + LinkedHashMap>> aliasBucketFileNameMapping = + new LinkedHashMap>>(); + + int bigTblBucketNum = aliasToBucketNumber.get(baseBigAlias); + Collections.sort(aliasToBucketFileNames.get(baseBigAlias)); + for (int j = 0; j < joinAliases.size(); j++) { + String alias = joinAliases.get(j); + if(alias.equals(baseBigAlias)) + continue; + Collections.sort(aliasToBucketFileNames.get(alias)); + LinkedHashMap> mapping = new LinkedHashMap>(); + aliasBucketFileNameMapping.put(alias, mapping); + for(String inputBigTBLBucket : aliasToBucketFileNames.get(baseBigAlias)) { + int smallTblBucketNum = aliasToBucketNumber.get(alias); + ArrayList resultFileNames = new ArrayList(); + int index = aliasToBucketFileNames.get(baseBigAlias).indexOf(inputBigTBLBucket); + if (bigTblBucketNum >= smallTblBucketNum) { + int temp = bigTblBucketNum / smallTblBucketNum; + int toAddSmallIndex = index/temp; + if(toAddSmallIndex < aliasToBucketFileNames.get(alias).size()) { + resultFileNames.add(aliasToBucketFileNames.get(alias).get(toAddSmallIndex)); + } + } else { + int jump = smallTblBucketNum / bigTblBucketNum; + for (int i = index; i < aliasToBucketFileNames.get(alias).size(); i = i + jump) { + if(i <= aliasToBucketFileNames.get(alias).size()) { + resultFileNames.add(aliasToBucketFileNames.get(alias).get(i)); + } + } + } + mapping.put(inputBigTBLBucket, resultFileNames); + } + } + desc.setAliasBucketFileNameMapping(aliasBucketFileNameMapping); + desc.setBigTableAlias(baseBigAlias); + return null; + } + + private boolean checkBucketColumns(List bucketColumns, MapJoinDesc mjDesc, int index) { + List keys = mjDesc.getKeys().get((byte)index); + if (keys == null || bucketColumns == null || bucketColumns.size() == 0) + return false; + + //get all join columns from join keys stored in MapJoinDesc + List joinCols = new ArrayList(); + List joinKeys = new ArrayList(); + joinKeys.addAll(keys); + while (joinKeys.size() > 0) { + ExprNodeDesc node = joinKeys.remove(0); + if (node instanceof ExprNodeColumnDesc) { + joinCols.addAll(node.getCols()); + } else if (node instanceof ExprNodeGenericFuncDesc) { + ExprNodeGenericFuncDesc udfNode = ((ExprNodeGenericFuncDesc) node); + GenericUDF udf = udfNode.getGenericUDF(); + if (!FunctionRegistry.isDeterministic(udf)) { + return false; + } + joinKeys.addAll(0, udfNode.getChildExprs()); + } else { + return false; + } + } + + // to see if the join columns from a table is exactly this same as its + // bucket columns + if (joinCols.size() == 0 || joinCols.size() != bucketColumns.size()) { + return false; + } + + for (String col : joinCols) { + if (!bucketColumns.contains(col)) + return false; + } + + return true; + } + + } + + class BucketMapjoinOptProcCtx implements NodeProcessorCtx { + // we only convert map joins that follows a root table scan in the same + // mapper. That means there is no reducer between the root table scan and + // mapjoin. + Set listOfRejectedMapjoins = new HashSet(); + + public Set getListOfRejectedMapjoins() { + return listOfRejectedMapjoins; + } + + } +} Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (revision 907343) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/GenMapRedUtils.java (working copy) @@ -60,6 +60,7 @@ import org.apache.hadoop.hive.ql.plan.FileSinkDesc; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.MapredLocalWork; +import org.apache.hadoop.hive.ql.plan.MapredLocalWork.BucketMapJoinContext; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.ReduceSinkDesc; @@ -194,6 +195,7 @@ } setTaskPlan(taskTmpDir, taskTmpDir, rootOp, plan, local, tt_desc); + setupBucketMapJoinInfo(plan, currMapJoinOp); } else { initUnionPlan(opProcCtx, currTask, false); } @@ -215,6 +217,7 @@ seenOps.add(currTopOp); boolean local = (pos == desc.getPosBigTable()) ? false : true; setTaskPlan(currAliasId, currTopOp, plan, local, opProcCtx); + setupBucketMapJoinInfo(plan, (MapJoinOperator)op); } opProcCtx.setCurrTask(currTask); @@ -222,6 +225,22 @@ opProcCtx.setCurrAliasId(null); } + private static void setupBucketMapJoinInfo(MapredWork plan, + MapJoinOperator currMapJoinOp) { + MapredLocalWork localPlan = plan.getMapLocalWork(); + if (localPlan != null) { + LinkedHashMap>> aliasBucketFileNameMapping = + currMapJoinOp.getConf().getAliasBucketFileNameMapping(); + if(aliasBucketFileNameMapping!= null) { + BucketMapJoinContext bucketMJCxt = new BucketMapJoinContext(); + localPlan.setBucketMapjoinContext(bucketMJCxt); + bucketMJCxt.setAliasBucketFileNameMapping(aliasBucketFileNameMapping); + localPlan.setInputFileChangeSensitive(true); + bucketMJCxt.setMapJoinBigTableAlias(currMapJoinOp.getConf().getBigTableAlias()); + } + } + } + /** * Initialize the current union plan. * @@ -370,6 +389,7 @@ boolean local = ((pos == -1) || (pos == (mjOp.getConf()) .getPosBigTable())) ? false : true; setTaskPlan(taskTmpDir, taskTmpDir, rootOp, plan, local, tt_desc); + setupBucketMapJoinInfo(plan, oldMapJoin); } opProcCtx.setCurrMapJoinOp(null); @@ -805,6 +825,7 @@ opProcCtx.setMapJoinCtx(mjOp, mjCtx); opProcCtx.getMapCurrCtx().put(parent, new GenMapRedCtx(childTask, null, null)); + setupBucketMapJoinInfo(cplan, mjOp); } currTopOp = null; Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java (revision 907343) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/MapJoinProcessor.java (working copy) @@ -367,6 +367,11 @@ // traverse all the joins and convert them if necessary if (pGraphContext.getJoinContext() != null) { Map joinMap = new HashMap(); + Map mapJoinMap = pGraphContext.getMapJoinContext(); + if(mapJoinMap == null) { + mapJoinMap = new HashMap (); + pGraphContext.setMapJoinContext(mapJoinMap); + } Set> joinCtx = pGraphContext .getJoinContext().entrySet(); @@ -378,7 +383,9 @@ QBJoinTree qbJoin = joinEntry.getValue(); int mapJoinPos = mapSideJoin(joinOp, qbJoin); if (mapJoinPos >= 0) { - listMapJoinOps.add(convertMapJoin(pactx, joinOp, qbJoin, mapJoinPos)); + MapJoinOperator mapJoinOp = convertMapJoin(pactx, joinOp, qbJoin, mapJoinPos); + listMapJoinOps.add(mapJoinOp); + mapJoinMap.put(mapJoinOp, qbJoin); } else { joinMap.put(joinOp, qbJoin); } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java (revision 907343) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/Optimizer.java (working copy) @@ -56,6 +56,9 @@ transformations.add(new SamplePruner()); transformations.add(new MapJoinProcessor()); + if (HiveConf.getBoolVar(hiveConf, HiveConf.ConfVars.HIVEOPTBUCKETMAPJOIN)) { + transformations.add(new BucketMapJoinOptimizer()); + } transformations.add(new UnionProcessor()); transformations.add(new JoinReorder()); } Index: ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java (revision 907343) +++ ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java (working copy) @@ -58,6 +58,7 @@ private HashMap> topSelOps; private LinkedHashMap, OpParseContext> opParseCtx; private Map joinContext; + private Map mapJoinContext; private HashMap topToTable; private List loadTableWork; private List loadFileWork; @@ -439,4 +440,12 @@ Map prunedPartitions) { this.prunedPartitions = prunedPartitions; } + + public Map getMapJoinContext() { + return mapJoinContext; + } + + public void setMapJoinContext(Map mapJoinContext) { + this.mapJoinContext = mapJoinContext; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java (revision 907343) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java (working copy) @@ -22,6 +22,7 @@ import java.util.ArrayList; import java.util.HashMap; import java.util.Iterator; +import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Set; @@ -42,6 +43,10 @@ private int posBigTable; private Map> retainList; + + private transient String bigTableAlias; + + private LinkedHashMap>> aliasBucketFileNameMapping; public MapJoinDesc() { } @@ -141,4 +146,27 @@ public void setValueTblDescs(List valueTblDescs) { this.valueTblDescs = valueTblDescs; } + + /** + * @return bigTableAlias + */ + public String getBigTableAlias() { + return bigTableAlias; + } + + /** + * @param bigTableAlias + */ + public void setBigTableAlias(String bigTableAlias) { + this.bigTableAlias = bigTableAlias; + } + + public LinkedHashMap>> getAliasBucketFileNameMapping() { + return aliasBucketFileNameMapping; + } + + public void setAliasBucketFileNameMapping( + LinkedHashMap>> aliasBucketFileNameMapping) { + this.aliasBucketFileNameMapping = aliasBucketFileNameMapping; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java (revision 907343) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java (working copy) @@ -19,9 +19,12 @@ package org.apache.hadoop.hive.ql.plan; import java.io.Serializable; +import java.util.ArrayList; import java.util.LinkedHashMap; +import java.util.List; import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.BucketMatcher; @Explain(displayName = "Map Reduce Local Work") public class MapredLocalWork implements Serializable { @@ -29,7 +32,9 @@ private LinkedHashMap> aliasToWork; private LinkedHashMap aliasToFetchWork; - + private boolean inputFileChangeSensitive; + private BucketMapJoinContext bucketMapjoinContext; + public MapredLocalWork() { } @@ -66,4 +71,58 @@ final LinkedHashMap aliasToFetchWork) { this.aliasToFetchWork = aliasToFetchWork; } + + public boolean getInputFileChangeSensitive() { + return inputFileChangeSensitive; + } + + public void setInputFileChangeSensitive(boolean inputFileChangeSensitive) { + this.inputFileChangeSensitive = inputFileChangeSensitive; + } + + public BucketMapJoinContext getBucketMapjoinContext() { + return bucketMapjoinContext; + } + + public void setBucketMapjoinContext(BucketMapJoinContext bucketMapjoinContext) { + this.bucketMapjoinContext = bucketMapjoinContext; + } + + public static class BucketMapJoinContext implements Serializable { + + private static final long serialVersionUID = 1L; + + // used for bucket map join + private LinkedHashMap>> aliasBucketFileNameMapping; + private String mapJoinBigTableAlias; + private Class bucketMatcherClass; + + public void setMapJoinBigTableAlias(String bigTableAlias) { + this.mapJoinBigTableAlias = bigTableAlias; + } + + public String getMapJoinBigTableAlias() { + return mapJoinBigTableAlias; + } + + public Class getBucketMatcherClass() { + return bucketMatcherClass; + } + + public void setBucketMatcherClass( + Class bucketMatcherClass) { + this.bucketMatcherClass = bucketMatcherClass; + } + + public LinkedHashMap>> getAliasBucketFileNameMapping() { + return aliasBucketFileNameMapping; + } + + public void setAliasBucketFileNameMapping( + LinkedHashMap>> aliasBucketFileNameMapping) { + this.aliasBucketFileNameMapping = aliasBucketFileNameMapping; + } + + } + } Index: ql/src/test/queries/clientpositive/bucketmapjoin.q =================================================================== --- ql/src/test/queries/clientpositive/bucketmapjoin.q (revision 0) +++ ql/src/test/queries/clientpositive/bucketmapjoin.q (revision 0) @@ -0,0 +1,127 @@ +CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin; +load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin; + +CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE; +load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); +load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08'); + +CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE; +load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08'); +load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08'); + + +set hive.optimize.bucketmapjoin = true; +create table bucketmapjoin_tmp_result (key string , value1 string, value2 string); + +explain +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key where b.ds="2008-04-08"; + +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key where b.ds="2008-04-08"; + +select count(1) from bucketmapjoin_tmp_result; + +explain +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key where b.ds="2008-04-08"; + +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key where b.ds="2008-04-08"; + +select count(1) from bucketmapjoin_tmp_result; + + +explain +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key and b.ds="2008-04-08"; + +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key and b.ds="2008-04-08"; + +select count(1) from bucketmapjoin_tmp_result; + +explain +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key and b.ds="2008-04-08"; + +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key and b.ds="2008-04-08"; + +select count(1) from bucketmapjoin_tmp_result; + +explain +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b +on a.key=b.key and b.ds="2008-04-08" and a.ds="2008-04-08"; + +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b +on a.key=b.key and b.ds="2008-04-08" and a.ds="2008-04-08"; + +select count(1) from bucketmapjoin_tmp_result; + +explain +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b +on a.key=b.key and b.ds="2008-04-08" and a.ds="2008-04-08"; + +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b +on a.key=b.key and b.ds="2008-04-08" and a.ds="2008-04-08"; + +select count(1) from bucketmapjoin_tmp_result; + +explain +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin b +on a.key=b.key; + +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin b +on a.key=b.key; + +select count(1) from bucketmapjoin_tmp_result; + +explain +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin b +on a.key=b.key; + +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin b +on a.key=b.key; + +select count(1) from bucketmapjoin_tmp_result; + +drop table bucketmapjoin_tmp_result; +drop table srcbucket_mapjoin; +drop table srcbucket_mapjoin_part; +drop table srcbucket_mapjoin_part_2; \ No newline at end of file Index: ql/src/test/results/clientpositive/bucketmapjoin.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin.q.out (revision 0) +++ ql/src/test/results/clientpositive/bucketmapjoin.q.out (revision 0) @@ -0,0 +1,1668 @@ +PREHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin(key int, value string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 4 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@srcbucket_mapjoin_part +PREHOOK: query: load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket20.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket21.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part partition(ds='2008-04-08') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: CREATE TABLE srcbucket_mapjoin_part_2 (key int, value string) partitioned by (ds string) CLUSTERED BY (key) INTO 2 BUCKETS STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@srcbucket_mapjoin_part_2 +PREHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket22.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: query: load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +PREHOOK: type: LOAD +POSTHOOK: query: load data local inpath '../data/files/srcbucket23.txt' INTO TABLE srcbucket_mapjoin_part_2 partition(ds='2008-04-08') +POSTHOOK: type: LOAD +POSTHOOK: Output: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table bucketmapjoin_tmp_result (key string , value1 string, value2 string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: explain +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key where b.ds="2008-04-08" +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key where b.ds="2008-04-08" +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF srcbucket_mapjoin a) (TOK_TABREF srcbucket_mapjoin_part b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB bucketmapjoin_tmp_result)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL b) ds) "2008-04-08")))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-4 depends on stages: Stage-1 , consists of Stage-3, Stage-2 + Stage-3 + Stage-0 depends on stages: Stage-3, Stage-2 + Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} {ds} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col3, _col4 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + outputColumnNames: _col0, _col1, _col3, _col4 + Filter Operator + predicate: + expr: (_col4 = '2008-04-08') + type: boolean + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + Local Work: + Map Reduce Local Work + Alias -> Map Local Tables: + b + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + b + TableScan + alias: b + Filter Operator + predicate: + expr: (ds = '2008-04-08') + type: boolean + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} {ds} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col3, _col4 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + outputColumnNames: _col0, _col1, _col3, _col4 + Filter Operator + predicate: + expr: (_col4 = '2008-04-08') + type: boolean + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + + Stage: Stage-4 + Conditional Operator + + Stage: Stage-3 + Move Operator + files: + hdfs directory: true + destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-08_15-57-40_778_1108047249399561858/10000 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-08_15-57-40_778_1108047249399561858/10002 + Reduce Output Operator + sort order: + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 + value expressions: + expr: key + type: string + expr: value1 + type: string + expr: value2 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + + +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key where b.ds="2008-04-08" +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key where b.ds="2008-04-08" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-08_15-57-53_083_2700218680145226718/10000 +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-08_15-57-53_083_2700218680145226718/10000 +464 +PREHOOK: query: explain +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key where b.ds="2008-04-08" +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key where b.ds="2008-04-08" +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF srcbucket_mapjoin a) (TOK_TABREF srcbucket_mapjoin_part b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB bucketmapjoin_tmp_result)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL b) ds) "2008-04-08")))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-4 depends on stages: Stage-1 , consists of Stage-3, Stage-2 + Stage-3 + Stage-0 depends on stages: Stage-3, Stage-2 + Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Filter Operator + predicate: + expr: (ds = '2008-04-08') + type: boolean + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} {ds} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col3, _col4 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + outputColumnNames: _col0, _col1, _col3, _col4 + Filter Operator + predicate: + expr: (_col4 = '2008-04-08') + type: boolean + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + Local Work: + Map Reduce Local Work + Alias -> Map Local Tables: + a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + a + TableScan + alias: a + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} {ds} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col3, _col4 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + expr: _col4 + type: string + outputColumnNames: _col0, _col1, _col3, _col4 + Filter Operator + predicate: + expr: (_col4 = '2008-04-08') + type: boolean + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + + Stage: Stage-4 + Conditional Operator + + Stage: Stage-3 + Move Operator + files: + hdfs directory: true + destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-08_15-57-58_733_7184512769059027736/10000 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-08_15-57-58_733_7184512769059027736/10002 + Reduce Output Operator + sort order: + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 + value expressions: + expr: key + type: string + expr: value1 + type: string + expr: value2 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + + +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key where b.ds="2008-04-08" +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part b +on a.key=b.key where b.ds="2008-04-08" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-08_15-58-08_954_5795138339778716410/10000 +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-08_15-58-08_954_5795138339778716410/10000 +464 +PREHOOK: query: explain +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key and b.ds="2008-04-08" +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key and b.ds="2008-04-08" +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF srcbucket_mapjoin a) (TOK_TABREF srcbucket_mapjoin_part_2 b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL b) ds) "2008-04-08")))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB bucketmapjoin_tmp_result)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-4 depends on stages: Stage-1 , consists of Stage-3, Stage-2 + Stage-3 + Stage-0 depends on stages: Stage-3, Stage-2 + Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col3 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + Local Work: + Map Reduce Local Work + Alias -> Map Local Tables: + b + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + b + TableScan + alias: b + Filter Operator + predicate: + expr: (ds = '2008-04-08') + type: boolean + Filter Operator + predicate: + expr: (ds = '2008-04-08') + type: boolean + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col3 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + + Stage: Stage-4 + Conditional Operator + + Stage: Stage-3 + Move Operator + files: + hdfs directory: true + destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-08_15-58-13_327_6313157024627373650/10000 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-08_15-58-13_327_6313157024627373650/10002 + Reduce Output Operator + sort order: + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 + value expressions: + expr: key + type: string + expr: value1 + type: string + expr: value2 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + + +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key and b.ds="2008-04-08" +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key and b.ds="2008-04-08" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-08_15-58-22_477_7746276070578688929/10000 +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-08_15-58-22_477_7746276070578688929/10000 +0 +PREHOOK: query: explain +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key and b.ds="2008-04-08" +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key and b.ds="2008-04-08" +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF srcbucket_mapjoin a) (TOK_TABREF srcbucket_mapjoin_part_2 b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL b) ds) "2008-04-08")))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB bucketmapjoin_tmp_result)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-4 depends on stages: Stage-1 , consists of Stage-3, Stage-2 + Stage-3 + Stage-0 depends on stages: Stage-3, Stage-2 + Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Filter Operator + predicate: + expr: (ds = '2008-04-08') + type: boolean + Filter Operator + predicate: + expr: (ds = '2008-04-08') + type: boolean + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col3 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + Local Work: + Map Reduce Local Work + Alias -> Map Local Tables: + a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + a + TableScan + alias: a + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col3 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + + Stage: Stage-4 + Conditional Operator + + Stage: Stage-3 + Move Operator + files: + hdfs directory: true + destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-08_15-58-27_094_4453494437323431447/10000 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-08_15-58-27_094_4453494437323431447/10002 + Reduce Output Operator + sort order: + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 + value expressions: + expr: key + type: string + expr: value1 + type: string + expr: value2 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + + +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key and b.ds="2008-04-08" +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin_part_2 b +on a.key=b.key and b.ds="2008-04-08" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-08_15-58-35_471_7350800153357516460/10000 +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-08_15-58-35_471_7350800153357516460/10000 +0 +PREHOOK: query: explain +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b +on a.key=b.key and b.ds="2008-04-08" and a.ds="2008-04-08" +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b +on a.key=b.key and b.ds="2008-04-08" and a.ds="2008-04-08" +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF srcbucket_mapjoin_part_2 a) (TOK_TABREF srcbucket_mapjoin_part b) (and (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL b) ds) "2008-04-08")) (= (. (TOK_TABLE_OR_COL a) ds) "2008-04-08")))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB bucketmapjoin_tmp_result)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-4 depends on stages: Stage-1 , consists of Stage-3, Stage-2 + Stage-3 + Stage-0 depends on stages: Stage-3, Stage-2 + Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + Filter Operator + predicate: + expr: (ds = '2008-04-08') + type: boolean + Filter Operator + predicate: + expr: (ds = '2008-04-08') + type: boolean + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col4 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col4 + type: string + outputColumnNames: _col0, _col1, _col4 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col4 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + Local Work: + Map Reduce Local Work + Alias -> Map Local Tables: + b + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + b + TableScan + alias: b + Filter Operator + predicate: + expr: (ds = '2008-04-08') + type: boolean + Filter Operator + predicate: + expr: (ds = '2008-04-08') + type: boolean + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col4 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col4 + type: string + outputColumnNames: _col0, _col1, _col4 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col4 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + + Stage: Stage-4 + Conditional Operator + + Stage: Stage-3 + Move Operator + files: + hdfs directory: true + destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-08_15-58-39_531_2043357852187421351/10000 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-08_15-58-39_531_2043357852187421351/10002 + Reduce Output Operator + sort order: + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 + value expressions: + expr: key + type: string + expr: value1 + type: string + expr: value2 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + + +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b +on a.key=b.key and b.ds="2008-04-08" and a.ds="2008-04-08" +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b +on a.key=b.key and b.ds="2008-04-08" and a.ds="2008-04-08" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-08_15-58-48_670_7340348838463302043/10000 +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-08_15-58-48_670_7340348838463302043/10000 +564 +PREHOOK: query: explain +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b +on a.key=b.key and b.ds="2008-04-08" and a.ds="2008-04-08" +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b +on a.key=b.key and b.ds="2008-04-08" and a.ds="2008-04-08" +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF srcbucket_mapjoin_part_2 a) (TOK_TABREF srcbucket_mapjoin_part b) (and (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL b) ds) "2008-04-08")) (= (. (TOK_TABLE_OR_COL a) ds) "2008-04-08")))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB bucketmapjoin_tmp_result)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-4 depends on stages: Stage-1 , consists of Stage-3, Stage-2 + Stage-3 + Stage-0 depends on stages: Stage-3, Stage-2 + Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Filter Operator + predicate: + expr: (ds = '2008-04-08') + type: boolean + Filter Operator + predicate: + expr: (ds = '2008-04-08') + type: boolean + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col4 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col4 + type: string + outputColumnNames: _col0, _col1, _col4 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col4 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + Local Work: + Map Reduce Local Work + Alias -> Map Local Tables: + a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + a + TableScan + alias: a + Filter Operator + predicate: + expr: (ds = '2008-04-08') + type: boolean + Filter Operator + predicate: + expr: (ds = '2008-04-08') + type: boolean + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col4 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col4 + type: string + outputColumnNames: _col0, _col1, _col4 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col4 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + + Stage: Stage-4 + Conditional Operator + + Stage: Stage-3 + Move Operator + files: + hdfs directory: true + destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-08_15-58-52_654_269080650402704572/10000 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-08_15-58-52_654_269080650402704572/10002 + Reduce Output Operator + sort order: + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 + value expressions: + expr: key + type: string + expr: value1 + type: string + expr: value2 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + + +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b +on a.key=b.key and b.ds="2008-04-08" and a.ds="2008-04-08" +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +PREHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +PREHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin_part_2 a join srcbucket_mapjoin_part b +on a.key=b.key and b.ds="2008-04-08" and a.ds="2008-04-08" +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin_part@ds=2008-04-08 +POSTHOOK: Input: default@srcbucket_mapjoin_part_2@ds=2008-04-08 +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-08_15-59-06_930_661733139608937419/10000 +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-08_15-59-06_930_661733139608937419/10000 +564 +PREHOOK: query: explain +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin b +on a.key=b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin b +on a.key=b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF srcbucket_mapjoin a) (TOK_TABREF srcbucket_mapjoin b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB bucketmapjoin_tmp_result)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-4 depends on stages: Stage-1 , consists of Stage-3, Stage-2 + Stage-3 + Stage-0 depends on stages: Stage-3, Stage-2 + Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col3 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + Local Work: + Map Reduce Local Work + Alias -> Map Local Tables: + b + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + b + TableScan + alias: b + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col3 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + + Stage: Stage-4 + Conditional Operator + + Stage: Stage-3 + Move Operator + files: + hdfs directory: true + destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-08_15-59-12_628_4635683324239859589/10000 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-08_15-59-12_628_4635683324239859589/10002 + Reduce Output Operator + sort order: + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 + value expressions: + expr: key + type: string + expr: value1 + type: string + expr: value2 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + + +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin b +on a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(b)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin b +on a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-08_15-59-28_617_6272356144491919166/10000 +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-08_15-59-28_617_6272356144491919166/10000 +464 +PREHOOK: query: explain +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin b +on a.key=b.key +PREHOOK: type: QUERY +POSTHOOK: query: explain +insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin b +on a.key=b.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF srcbucket_mapjoin a) (TOK_TABREF srcbucket_mapjoin b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB bucketmapjoin_tmp_result)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-4 depends on stages: Stage-1 , consists of Stage-3, Stage-2 + Stage-3 + Stage-0 depends on stages: Stage-3, Stage-2 + Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col3 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + Local Work: + Map Reduce Local Work + Alias -> Map Local Tables: + a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + a + TableScan + alias: a + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col3 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col3 + Select Operator + expressions: + expr: _col0 + type: int + expr: _col1 + type: string + expr: _col3 + type: string + outputColumnNames: _col0, _col1, _col2 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + + Stage: Stage-4 + Conditional Operator + + Stage: Stage-3 + Move Operator + files: + hdfs directory: true + destination: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-08_15-59-36_117_6268241397677123679/10000 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-08_15-59-36_117_6268241397677123679/10002 + Reduce Output Operator + sort order: + Map-reduce partition columns: + expr: rand() + type: double + tag: -1 + value expressions: + expr: key + type: string + expr: value1 + type: string + expr: value2 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: bucketmapjoin_tmp_result + + +PREHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin b +on a.key=b.key +PREHOOK: type: QUERY +PREHOOK: Input: default@srcbucket_mapjoin +PREHOOK: Output: default@bucketmapjoin_tmp_result +POSTHOOK: query: insert overwrite table bucketmapjoin_tmp_result +select /*+mapjoin(a)*/ a.key, a.value, b.value +from srcbucket_mapjoin a join srcbucket_mapjoin b +on a.key=b.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcbucket_mapjoin +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: select count(1) from bucketmapjoin_tmp_result +PREHOOK: type: QUERY +PREHOOK: Input: default@bucketmapjoin_tmp_result +PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-08_15-59-49_417_299500374370150738/10000 +POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result +POSTHOOK: type: QUERY +POSTHOOK: Input: default@bucketmapjoin_tmp_result +POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive-Test/build/ql/scratchdir/hive_2010-02-08_15-59-49_417_299500374370150738/10000 +464 +PREHOOK: query: drop table bucketmapjoin_tmp_result +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table bucketmapjoin_tmp_result +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@bucketmapjoin_tmp_result +PREHOOK: query: drop table srcbucket_mapjoin +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table srcbucket_mapjoin +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@srcbucket_mapjoin +PREHOOK: query: drop table srcbucket_mapjoin_part +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table srcbucket_mapjoin_part +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@srcbucket_mapjoin_part +PREHOOK: query: drop table srcbucket_mapjoin_part_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table srcbucket_mapjoin_part_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Output: default@srcbucket_mapjoin_part_2