Index: ql/src/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java =================================================================== --- ql/src/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java (revision 1023076) +++ ql/src/gen-javabean/org/apache/hadoop/hive/ql/plan/api/OperatorType.java (working copy) @@ -30,24 +30,28 @@ public static final int UDTF = 13; public static final int LATERALVIEWJOIN = 14; public static final int LATERALVIEWFORWARD = 15; + public static final int JDBMDUMMY = 16; + public static final int JDBMSINK = 17; public static final IntRangeSet VALID_VALUES = new IntRangeSet( - JOIN, - MAPJOIN, - EXTRACT, - FILTER, - FORWARD, - GROUPBY, - LIMIT, - SCRIPT, - SELECT, - TABLESCAN, - FILESINK, - REDUCESINK, - UNION, - UDTF, - LATERALVIEWJOIN, - LATERALVIEWFORWARD ); + JOIN, + MAPJOIN, + EXTRACT, + FILTER, + FORWARD, + GROUPBY, + LIMIT, + SCRIPT, + SELECT, + TABLESCAN, + FILESINK, + REDUCESINK, + UNION, + UDTF, + LATERALVIEWJOIN, + LATERALVIEWFORWARD, + JDBMDUMMY, + JDBMSINK); public static final Map VALUES_TO_NAMES = new HashMap() {{ put(JOIN, "JOIN"); @@ -66,5 +70,7 @@ put(UDTF, "UDTF"); put(LATERALVIEWJOIN, "LATERALVIEWJOIN"); put(LATERALVIEWFORWARD, "LATERALVIEWFORWARD"); + put(JDBMDUMMY, "JDBMDUMMY"); + put(JDBMSINK, "JDBMSINK"); }}; } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java (revision 1023076) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/AbstractMapJoinOperator.java (working copy) @@ -30,6 +30,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.hive.ql.util.JoinUtil; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.StructField; @@ -68,6 +69,8 @@ transient boolean firstRow; + private static final int NOTSKIPBIGTABLE = -1; + public AbstractMapJoinOperator() { } @@ -84,17 +87,19 @@ joinKeys = new HashMap>(); - populateJoinKeyValue(joinKeys, conf.getKeys()); - joinKeysObjectInspectors = getObjectInspectorsFromEvaluators(joinKeys, - inputObjInspectors); - joinKeysStandardObjectInspectors = getStandardObjectInspectors(joinKeysObjectInspectors); + JoinUtil.populateJoinKeyValue(joinKeys, conf.getKeys(),order,NOTSKIPBIGTABLE); + joinKeysObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinKeys, + inputObjInspectors,NOTSKIPBIGTABLE); + joinKeysStandardObjectInspectors = JoinUtil.getStandardObjectInspectors( + joinKeysObjectInspectors,NOTSKIPBIGTABLE); // all other tables are small, and are cached in the hash table posBigTable = conf.getPosBigTable(); emptyList = new RowContainer>(1, hconf); - RowContainer bigPosRC = getRowContainer(hconf, (byte) posBigTable, - order[posBigTable], joinCacheSize); + RowContainer bigPosRC = JoinUtil.getRowContainer(hconf, + rowContainerStandardObjectInspectors.get((byte) posBigTable), + order[posBigTable], joinCacheSize,spillTableDesc, conf,noOuterJoin); storage.put((byte) posBigTable, bigPosRC); mapJoinRowsKey = HiveConf.getIntVar(hconf, @@ -120,6 +125,7 @@ initializeChildren(hconf); } + @Override protected void fatalErrorMessage(StringBuilder errMsg, long counterCode) { errMsg.append("Operator " + getOperatorId() + " (id=" + id + "): " Index: ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java (revision 1023076) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/CommonJoinOperator.java (working copy) @@ -25,33 +25,24 @@ import java.util.List; import java.util.Map; import java.util.Set; -import java.util.Map.Entry; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.persistence.RowContainer; -import org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.JoinCondDesc; import org.apache.hadoop.hive.ql.plan.JoinDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; -import org.apache.hadoop.hive.serde2.SerDe; -import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.ql.util.JoinUtil; import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.io.BooleanWritable; -import org.apache.hadoop.mapred.SequenceFileInputFormat; -import org.apache.hadoop.util.ReflectionUtils; /** * Join operator implementation. @@ -127,10 +118,10 @@ protected transient Map> rowContainerStandardObjectInspectors; - protected static transient Byte[] order; // order in which the results should + protected transient Byte[] order; // order in which the results should // be output protected transient JoinCondDesc[] condn; - protected transient boolean noOuterJoin; + public transient boolean noOuterJoin; protected transient Object[] dummyObj; // for outer joins, contains the // potential nulls for the concerned // aliases @@ -163,6 +154,7 @@ protected transient int countAfterReport; protected transient int heartbeatInterval; + private static final int NOTSKIPBIGTABLE = -1; public CommonJoinOperator() { } @@ -179,9 +171,8 @@ this.operatorId = clone.operatorId; this.storage = clone.storage; this.condn = clone.condn; - + this.conf = clone.getConf(); this.setSchema(clone.getSchema()); - this.alias = clone.alias; this.beginTime = clone.beginTime; this.inputRows = clone.inputRows; @@ -208,68 +199,6 @@ this.joinFilterObjectInspectors = clone.joinFilterObjectInspectors; } - protected int populateJoinKeyValue(Map> outMap, - Map> inputMap) { - - int total = 0; - - Iterator>> entryIter = inputMap - .entrySet().iterator(); - while (entryIter.hasNext()) { - Map.Entry> e = entryIter.next(); - Byte key = order[e.getKey()]; - - List expr = e.getValue(); - int sz = expr.size(); - total += sz; - - List valueFields = new ArrayList(); - - for (int j = 0; j < sz; j++) { - valueFields.add(ExprNodeEvaluatorFactory.get(expr.get(j))); - } - - outMap.put(key, valueFields); - } - - return total; - } - - protected static HashMap> getObjectInspectorsFromEvaluators( - Map> exprEntries, - ObjectInspector[] inputObjInspector) throws HiveException { - HashMap> result = new HashMap>(); - for (Entry> exprEntry : exprEntries - .entrySet()) { - Byte alias = exprEntry.getKey(); - List exprList = exprEntry.getValue(); - ArrayList fieldOIList = new ArrayList(); - for (int i = 0; i < exprList.size(); i++) { - fieldOIList.add(exprList.get(i).initialize(inputObjInspector[alias])); - } - result.put(alias, fieldOIList); - } - return result; - } - - protected static HashMap> getStandardObjectInspectors( - Map> aliasToObjectInspectors) { - HashMap> result = new HashMap>(); - for (Entry> oiEntry : aliasToObjectInspectors - .entrySet()) { - Byte alias = oiEntry.getKey(); - List oiList = oiEntry.getValue(); - ArrayList fieldOIList = new ArrayList( - oiList.size()); - for (int i = 0; i < oiList.size(); i++) { - fieldOIList.add(ObjectInspectorUtils.getStandardObjectInspector(oiList - .get(i), ObjectInspectorCopyOption.WRITABLE)); - } - result.put(alias, fieldOIList); - } - return result; - - } protected static ObjectInspector getJoinOutputObjectInspector( Byte[] order, Map> aliasToObjectInspectors, @@ -307,20 +236,26 @@ joinFilters = new HashMap>(); - if (order == null) { - order = conf.getTagOrder(); - } + order = conf.getTagOrder(); condn = conf.getConds(); noOuterJoin = conf.isNoOuterJoin(); - totalSz = populateJoinKeyValue(joinValues, conf.getExprs()); - populateJoinKeyValue(joinFilters, conf.getFilters()); + + + totalSz = JoinUtil.populateJoinKeyValue(joinValues, conf.getExprs(), + order,NOTSKIPBIGTABLE); + + //process join filters + joinFilters = new HashMap>(); + JoinUtil.populateJoinKeyValue(joinFilters, conf.getFilters(),order,NOTSKIPBIGTABLE); + - joinValuesObjectInspectors = getObjectInspectorsFromEvaluators(joinValues, - inputObjInspectors); - joinFilterObjectInspectors = getObjectInspectorsFromEvaluators(joinFilters, - inputObjInspectors); - joinValuesStandardObjectInspectors = getStandardObjectInspectors(joinValuesObjectInspectors); + joinValuesObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinValues, + inputObjInspectors,NOTSKIPBIGTABLE); + joinFilterObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinFilters, + inputObjInspectors,NOTSKIPBIGTABLE); + joinValuesStandardObjectInspectors = JoinUtil.getStandardObjectInspectors( + joinValuesObjectInspectors,NOTSKIPBIGTABLE); if (noOuterJoin) { rowContainerStandardObjectInspectors = joinValuesStandardObjectInspectors; @@ -336,9 +271,12 @@ rowContainerObjectInspectors.put(alias, rcOIs); } rowContainerStandardObjectInspectors = - getStandardObjectInspectors(rowContainerObjectInspectors); + JoinUtil.getStandardObjectInspectors(rowContainerObjectInspectors,NOTSKIPBIGTABLE); } + + + dummyObj = new Object[numAliases]; dummyObjVectors = new RowContainer[numAliases]; @@ -367,14 +305,18 @@ } dummyObj[pos] = nr; // there should be only 1 dummy object in the RowContainer - RowContainer> values = getRowContainer(hconf, pos, - alias, 1); + RowContainer> values = JoinUtil.getRowContainer(hconf, + rowContainerStandardObjectInspectors.get((byte)pos), + alias, 1, spillTableDesc, conf, noOuterJoin); + values.add((ArrayList) dummyObj[pos]); dummyObjVectors[pos] = values; // if serde is null, the input doesn't need to be spilled out // e.g., the output columns does not contains the input table - RowContainer rc = getRowContainer(hconf, pos, alias, joinCacheSize); + RowContainer rc = JoinUtil.getRowContainer(hconf, + rowContainerStandardObjectInspectors.get((byte)pos), + alias, joinCacheSize,spillTableDesc, conf,noOuterJoin); storage.put(pos, rc); pos++; @@ -390,103 +332,10 @@ } - RowContainer getRowContainer(Configuration hconf, byte pos, Byte alias, - int containerSize) throws HiveException { - TableDesc tblDesc = getSpillTableDesc(alias); - SerDe serde = getSpillSerDe(alias); - if (serde == null) { - containerSize = 1; - } - RowContainer rc = new RowContainer(containerSize, hconf); - StructObjectInspector rcOI = null; - if (tblDesc != null) { - // arbitrary column names used internally for serializing to spill table - List colNames = Utilities.getColumnNames(tblDesc.getProperties()); - // object inspector for serializing input tuples - rcOI = ObjectInspectorFactory.getStandardStructObjectInspector(colNames, - rowContainerStandardObjectInspectors.get(pos)); - } - - rc.setSerDe(serde, rcOI); - rc.setTableDesc(tblDesc); - return rc; - } - private SerDe getSpillSerDe(byte alias) { - TableDesc desc = getSpillTableDesc(alias); - if (desc == null) { - return null; - } - SerDe sd = (SerDe) ReflectionUtils.newInstance(desc.getDeserializerClass(), - null); - try { - sd.initialize(null, desc.getProperties()); - } catch (SerDeException e) { - e.printStackTrace(); - return null; - } - return sd; - } - - transient boolean newGroupStarted = false; - - public TableDesc getSpillTableDesc(Byte alias) { - if (spillTableDesc == null || spillTableDesc.size() == 0) { - initSpillTables(); - } - return spillTableDesc.get(alias); - } - - public Map getSpillTableDesc() { - if (spillTableDesc == null) { - initSpillTables(); - } - return spillTableDesc; - } - - private void initSpillTables() { - Map> exprs = conf.getExprs(); - spillTableDesc = new HashMap(exprs.size()); - for (int tag = 0; tag < exprs.size(); tag++) { - List valueCols = exprs.get((byte) tag); - int columnSize = valueCols.size(); - StringBuilder colNames = new StringBuilder(); - StringBuilder colTypes = new StringBuilder(); - if (columnSize <= 0) { - continue; - } - for (int k = 0; k < columnSize; k++) { - String newColName = tag + "_VALUE_" + k; // any name, it does not - // matter. - colNames.append(newColName); - colNames.append(','); - colTypes.append(valueCols.get(k).getTypeString()); - colTypes.append(','); - } - if (!noOuterJoin) { - colNames.append("filtered"); - colNames.append(','); - colTypes.append(TypeInfoFactory.booleanTypeInfo.getTypeName()); - colTypes.append(','); - } - // remove the last ',' - colNames.setLength(colNames.length() - 1); - colTypes.setLength(colTypes.length() - 1); - TableDesc tblDesc = new TableDesc(LazyBinarySerDe.class, - SequenceFileInputFormat.class, HiveSequenceFileOutputFormat.class, - Utilities.makeProperties( - org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT, "" - + Utilities.ctrlaCode, - org.apache.hadoop.hive.serde.Constants.LIST_COLUMNS, colNames - .toString(), - org.apache.hadoop.hive.serde.Constants.LIST_COLUMN_TYPES, - colTypes.toString())); - spillTableDesc.put((byte) tag, tblDesc); - } - } - +transient boolean newGroupStarted = false; @Override public void startGroup() throws HiveException { LOG.trace("Join: Starting new group"); @@ -507,50 +356,6 @@ protected transient Byte alias; - /** - * Return the key as a standard object. StandardObject can be inspected by a - * standard ObjectInspector. - */ - protected static ArrayList computeKeys(Object row, - List keyFields, List keyFieldsOI) - throws HiveException { - - // Compute the keys - ArrayList nr = new ArrayList(keyFields.size()); - for (int i = 0; i < keyFields.size(); i++) { - - nr.add(ObjectInspectorUtils.copyToStandardObject(keyFields.get(i) - .evaluate(row), keyFieldsOI.get(i), - ObjectInspectorCopyOption.WRITABLE)); - } - - return nr; - } - - /** - * Return the value as a standard object. StandardObject can be inspected by a - * standard ObjectInspector. - */ - protected static ArrayList computeValues(Object row, - List valueFields, List valueFieldsOI, - List filters, List filtersOI, - boolean noOuterJoin) throws HiveException { - - // Compute the values - ArrayList nr = new ArrayList(valueFields.size()); - for (int i = 0; i < valueFields.size(); i++) { - nr.add(ObjectInspectorUtils.copyToStandardObject(valueFields.get(i) - .evaluate(row), valueFieldsOI.get(i), - ObjectInspectorCopyOption.WRITABLE)); - } - if (!noOuterJoin) { - // add whether the row is filtered or not. - nr.add(new BooleanWritable(isFiltered(row, filters, filtersOI))); - } - - return nr; - } - transient Object[] forwardCache; private void createForwardJoinObject(IntermediateObject intObj, Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java (revision 1023076) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java (working copy) @@ -43,6 +43,8 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.filecache.DistributedCache; +import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; @@ -81,12 +83,12 @@ import org.apache.hadoop.mapred.Partitioner; import org.apache.hadoop.mapred.RunningJob; import org.apache.hadoop.mapred.TaskCompletionEvent; +import org.apache.log4j.Appender; import org.apache.log4j.BasicConfigurator; +import org.apache.log4j.FileAppender; import org.apache.log4j.LogManager; import org.apache.log4j.PropertyConfigurator; import org.apache.log4j.varia.NullAppender; -import org.apache.log4j.FileAppender; -import org.apache.log4j.Appender; /** * ExecDriver. @@ -589,8 +591,45 @@ HiveConf.setVar(job, HiveConf.ConfVars.HADOOPJOBNAME, "JOB" + Utilities.randGen.nextInt()); } + try { + //propagate the file to distributed cache + MapredLocalWork localwork =work.getMapLocalWork(); + if(localwork != null){ + boolean localMode = HiveConf.getVar(job, HiveConf.ConfVars.HADOOPJT).equals("local"); + if(!localMode){ + LOG.info("=========Adding files to HDFS ================"); + Path localPath = new Path(localwork.getTmpFileURI()); + Path hdfsPath = new Path(work.getTmpHDFSFileURI()); + + FileSystem hdfs = hdfsPath.getFileSystem(job); + FileSystem localFS = localPath.getFileSystem(job); + FileStatus[] jdbmFiles = localFS.listStatus(localPath); + for(int i =0; i> pathToAliases = work - .getPathToAliases(); + + LinkedHashMap> pathToAliases = work.getPathToAliases(); + if (isEmptyPath) { assert path != null; pathToAliases.put(newPath.toUri().toString(), pathToAliases.get(path)); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapper.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapper.java (revision 1023076) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapper.java (working copy) @@ -23,18 +23,14 @@ import java.lang.management.ManagementFactory; import java.lang.management.MemoryMXBean; import java.net.URLClassLoader; -import java.util.ArrayList; import java.util.Arrays; -import java.util.HashMap; +import java.util.List; import java.util.Map; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.plan.MapredLocalWork; import org.apache.hadoop.hive.ql.plan.MapredWork; -import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.MapReduceBase; @@ -42,7 +38,6 @@ import org.apache.hadoop.mapred.OutputCollector; import org.apache.hadoop.mapred.Reporter; import org.apache.hadoop.util.StringUtils; - /** * ExecMapper. * @@ -91,62 +86,28 @@ // initialize map operator mo.setChildren(job); l4j.info(mo.dump(0)); + // initialize map local work + localWork = mrwork.getMapLocalWork(); + execContext.setLocalWork(localWork); + mo.setExecContext(execContext); mo.initializeLocalWork(jc); mo.initialize(jc, null); - // initialize map local work - localWork = mrwork.getMapLocalWork(); if (localWork == null) { return; } - fetchOperators = new HashMap(); - - Map fetchOpJobConfMap = new HashMap(); - // create map local operators - for (Map.Entry entry : localWork.getAliasToFetchWork() - .entrySet()) { - JobConf jobClone = new JobConf(job); - Operator tableScan = localWork.getAliasToWork() - .get(entry.getKey()); - boolean setColumnsNeeded = false; - if(tableScan instanceof TableScanOperator) { - ArrayList list = ((TableScanOperator)tableScan).getNeededColumnIDs(); - if (list != null) { - ColumnProjectionUtils.appendReadColumnIDs(jobClone, list); - setColumnsNeeded = true; - } - } - - if (!setColumnsNeeded) { - ColumnProjectionUtils.setFullyReadColumns(jobClone); - } - FetchOperator fetchOp = new FetchOperator(entry.getValue(),jobClone); - fetchOpJobConfMap.put(fetchOp, jobClone); - fetchOperators.put(entry.getKey(), fetchOp); - l4j.info("fetchoperator for " + entry.getKey() + " created"); + //The following code is for mapjoin + //initialize all the dummy ops + l4j.info("Initializing dummy operator"); + List> dummyOps = localWork.getDummyParentOp(); + for(Operator dummyOp : dummyOps){ + dummyOp.setExecContext(execContext); + dummyOp.initialize(jc,null); } - // initialize map local operators - for (Map.Entry entry : fetchOperators.entrySet()) { - Operator forwardOp = localWork.getAliasToWork() - .get(entry.getKey()); - forwardOp.setExecContext(execContext); - // All the operators need to be initialized before process - FetchOperator fetchOp = entry.getValue(); - JobConf jobConf = fetchOpJobConfMap.get(fetchOp); - if (jobConf == null) { - jobConf = job; - } - forwardOp.initialize(jobConf, new ObjectInspector[] {fetchOp - .getOutputObjectInspector()}); - l4j.info("fetchoperator for " + entry.getKey() + " initialized"); - } - this.execContext.setLocalWork(localWork); - this.execContext.setFetchOperators(fetchOperators); - // defer processing of map local operators to first row if in case there - // is no input (??) + } catch (Throwable e) { abort = true; if (e instanceof OutOfMemoryError) { @@ -228,6 +189,16 @@ // ideally hadoop should let us know whether map execution failed or not try { mo.close(abort); + + //for close the local work + if(localWork != null){ + List> dummyOps = localWork.getDummyParentOp(); + + for(Operator dummyOp : dummyOps){ + dummyOp.close(abort); + } + } + if (fetchOperators != null) { MapredLocalWork localWork = mo.getConf().getMapLocalWork(); for (Map.Entry entry : fetchOperators.entrySet()) { Index: ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapperContext.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapperContext.java (revision 1023076) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExecMapperContext.java (working copy) @@ -1,6 +1,5 @@ package org.apache.hadoop.hive.ql.exec; -import java.io.Serializable; import java.util.Iterator; import java.util.List; import java.util.Map; @@ -9,10 +8,8 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.io.IOContext; -import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.MapredLocalWork; import org.apache.hadoop.hive.ql.plan.MapredLocalWork.BucketMapJoinContext; -import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.util.ReflectionUtils; @@ -35,17 +32,21 @@ private JobConf jc; private IOContext ioCxt; - + + private String currentBigBucketFile=null; + + public String getCurrentBigBucketFile() { + return currentBigBucketFile; + } + + public void setCurrentBigBucketFile(String currentBigBucketFile) { + this.currentBigBucketFile = currentBigBucketFile; + } + public ExecMapperContext() { ioCxt = IOContext.get(); } - public void processInputFileChangeForLocalWork() throws HiveException { - // put inputFileChanged() after localWork check - if (this.localWork != null && inputFileChanged()) { - processMapLocalWork(localWork.getInputFileChangeSensitive()); - } - } /** @@ -85,57 +86,6 @@ this.lastInputFile = lastInputFile; } - private void processMapLocalWork(boolean inputFileChangeSenstive) throws HiveException { - // process map local operators - if (fetchOperators != null) { - try { - int fetchOpNum = 0; - for (Map.Entry entry : fetchOperators.entrySet()) { - int fetchOpRows = 0; - String alias = entry.getKey(); - FetchOperator fetchOp = entry.getValue(); - - if (inputFileChangeSenstive) { - fetchOp.clearFetchContext(); - setUpFetchOpContext(fetchOp, alias); - } - - Operator forwardOp = localWork - .getAliasToWork().get(alias); - - while (true) { - InspectableObject row = fetchOp.getNextRow(); - if (row == null) { - forwardOp.close(false); - break; - } - fetchOpRows++; - forwardOp.process(row.o, 0); - // check if any operator had a fatal error or early exit during - // execution - if (forwardOp.getDone()) { - ExecMapper.setDone(true); - break; - } - } - - if (l4j.isInfoEnabled()) { - l4j.info("fetch " + fetchOpNum++ + " processed " + fetchOpRows - + " used mem: " - + ExecMapper.memoryMXBean.getHeapMemoryUsage().getUsed()); - } - } - } catch (Throwable e) { - if (e instanceof OutOfMemoryError) { - // Don't create a new object if we are already out of memory - throw (OutOfMemoryError) e; - } else { - throw new HiveException( - "Hive Runtime Error: Map local work failed", e); - } - } - } - } private void setUpFetchOpContext(FetchOperator fetchOp, String alias) throws Exception { @@ -194,7 +144,7 @@ public void setFetchOperators(Map fetchOperators) { this.fetchOperators = fetchOperators; } - + public IOContext getIoCxt() { return ioCxt; } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java (revision 1023076) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/GroupByOperator.java (working copy) @@ -43,9 +43,9 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; -import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.io.Text; @@ -164,7 +164,7 @@ HiveConf.ConfVars.HIVESENDHEARTBEAT); countAfterReport = 0; - assert (inputObjInspectors.length == 1); + //assert (inputObjInspectors.length == 1); ObjectInspector rowInspector = inputObjInspectors[0]; // init keyFields Index: ql/src/java/org/apache/hadoop/hive/ql/exec/JDBMDummyOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/JDBMDummyOperator.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/JDBMDummyOperator.java (revision 0) @@ -0,0 +1,66 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec; + +import java.io.Serializable; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.JDBMDummyDesc; +import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.hive.serde2.Deserializer; + +public class JDBMDummyOperator extends Operator implements Serializable{ + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + TableDesc tbl = this.getConf().getTbl(); + try{ + Deserializer serde = tbl.getDeserializerClass().newInstance(); + serde.initialize(hconf, tbl.getProperties()); + + + this.outputObjInspector = serde.getObjectInspector(); + initializeChildren(hconf); + }catch(Exception e){ + LOG.error("Generating output obj inspector from dummy object error"); + e.printStackTrace(); + } + + } + + @Override + public void processOp(Object row, int tag) throws HiveException{ + throw new HiveException(); + } + + @Override + public void closeOp(boolean abort) throws HiveException{ + } + @Override + public String getName() { + return "JDBMDUMMY"; + } + + @Override + public int getType() { + return OperatorType.JDBMDUMMY; + } + +} Property changes on: ql/src/java/org/apache/hadoop/hive/ql/exec/JDBMDummyOperator.java ___________________________________________________________________ Added: svn:executable + * Index: ql/src/java/org/apache/hadoop/hive/ql/exec/JDBMSinkOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/JDBMSinkOperator.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/JDBMSinkOperator.java (revision 0) @@ -0,0 +1,484 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec; + +import java.io.IOException; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectKey; +import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectValue; +import org.apache.hadoop.hive.ql.exec.persistence.RowContainer; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.JDBMSinkDesc; +import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.hive.ql.util.JoinUtil; +import org.apache.hadoop.hive.serde2.SerDe; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.util.ReflectionUtils; + + +public class JDBMSinkOperator extends TerminalOperator +implements Serializable { + private static final long serialVersionUID = 1L; + private static final Log LOG = LogFactory.getLog(JDBMSinkOperator.class + .getName()); + + //from abstract map join operator + /** + * The expressions for join inputs's join keys. + */ + protected transient Map> joinKeys; + /** + * The ObjectInspectors for the join inputs's join keys. + */ + protected transient Map> joinKeysObjectInspectors; + /** + * The standard ObjectInspectors for the join inputs's join keys. + */ + protected transient Map> joinKeysStandardObjectInspectors; + + protected transient int posBigTableTag = -1; // one of the tables that is not in memory + protected transient int posBigTableAlias = -1; // one of the tables that is not in memory + transient int mapJoinRowsKey; // rows for a given key + + protected transient RowContainer> emptyList = null; + + transient int numMapRowsRead; + protected transient int totalSz; // total size of the composite object + transient boolean firstRow; + private boolean smallTablesOnly; + /** + * The filters for join + */ + protected transient Map> joinFilters; + + protected transient int numAliases; // number of aliases + /** + * The expressions for join outputs. + */ + protected transient Map> joinValues; + /** + * The ObjectInspectors for the join inputs. + */ + protected transient Map> joinValuesObjectInspectors; + /** + * The ObjectInspectors for join filters. + */ + protected transient Map> joinFilterObjectInspectors; + /** + * The standard ObjectInspectors for the join inputs. + */ + protected transient Map> joinValuesStandardObjectInspectors; + + protected transient + Map> rowContainerStandardObjectInspectors; + + protected transient Byte[] order; // order in which the results should + Configuration hconf; + protected transient Byte alias; + protected transient Map spillTableDesc; // spill tables are + + protected transient Map> mapJoinTables; + protected transient boolean noOuterJoin; + + public static class JDBMSinkObjectCtx { + ObjectInspector standardOI; + SerDe serde; + TableDesc tblDesc; + Configuration conf; + + /** + * @param standardOI + * @param serde + */ + public JDBMSinkObjectCtx(ObjectInspector standardOI, SerDe serde, + TableDesc tblDesc, Configuration conf) { + this.standardOI = standardOI; + this.serde = serde; + this.tblDesc = tblDesc; + this.conf = conf; + } + + /** + * @return the standardOI + */ + public ObjectInspector getStandardOI() { + return standardOI; + } + + /** + * @return the serde + */ + public SerDe getSerDe() { + return serde; + } + + public TableDesc getTblDesc() { + return tblDesc; + } + + public Configuration getConf() { + return conf; + } + + } + + private static final transient String[] FATAL_ERR_MSG = { + null, // counter value 0 means no error + "Mapside join size exceeds hive.mapjoin.maxsize. " + + "Please increase that or remove the mapjoin hint." + }; + transient int metadataKeyTag; + transient int[] metadataValueTag; + transient int maxMapJoinSize; + + + public JDBMSinkOperator(){ + //super(); + } + + public JDBMSinkOperator(MapJoinOperator mjop){ + this.conf = new JDBMSinkDesc(mjop.getConf()); + } + + + @Override + protected void initializeOp(Configuration hconf) throws HiveException { + + maxMapJoinSize = HiveConf.getIntVar(hconf, + HiveConf.ConfVars.HIVEMAXMAPJOINSIZE); + + numMapRowsRead = 0; + firstRow = true; + + //for small tables only; so get the big table position first + posBigTableTag = conf.getPosBigTable(); + + order = conf.getTagOrder(); + + posBigTableAlias=order[posBigTableTag]; + + //initialize some variables, which used to be initialized in CommonJoinOperator + numAliases = conf.getExprs().size(); + this.hconf = hconf; + totalSz = 0; + + noOuterJoin = conf.isNoOuterJoin(); + + //process join keys + joinKeys = new HashMap>(); + JoinUtil.populateJoinKeyValue(joinKeys, conf.getKeys(),order,posBigTableAlias); + joinKeysObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinKeys,inputObjInspectors,posBigTableAlias); + joinKeysStandardObjectInspectors = JoinUtil.getStandardObjectInspectors(joinKeysObjectInspectors,posBigTableAlias); + + //process join values + joinValues = new HashMap>(); + JoinUtil.populateJoinKeyValue(joinValues, conf.getExprs(),order,posBigTableAlias); + joinValuesObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinValues,inputObjInspectors,posBigTableAlias); + joinValuesStandardObjectInspectors = JoinUtil.getStandardObjectInspectors(joinValuesObjectInspectors,posBigTableAlias); + + //process join filters + joinFilters = new HashMap>(); + JoinUtil.populateJoinKeyValue(joinFilters, conf.getFilters(),order,posBigTableAlias); + joinFilterObjectInspectors = JoinUtil.getObjectInspectorsFromEvaluators(joinValues,inputObjInspectors,posBigTableAlias); + + + + + if (noOuterJoin) { + rowContainerStandardObjectInspectors = joinValuesStandardObjectInspectors; + } else { + Map> rowContainerObjectInspectors = + new HashMap>(); + for (Byte alias : order) { + if(alias == posBigTableAlias){ + continue; + } + ArrayList rcOIs = new ArrayList(); + rcOIs.addAll(joinValuesObjectInspectors.get(alias)); + // for each alias, add object inspector for boolean as the last element + rcOIs.add( + PrimitiveObjectInspectorFactory.writableBooleanObjectInspector); + rowContainerObjectInspectors.put(alias, rcOIs); + } + rowContainerStandardObjectInspectors = + getStandardObjectInspectors(rowContainerObjectInspectors); + } + + metadataValueTag = new int[numAliases]; + for (int pos = 0; pos < numAliases; pos++) { + metadataValueTag[pos] = -1; + } + + mapJoinTables = new HashMap>(); + + // initialize the hash tables for other tables + for (Byte pos:order) { + if (pos == posBigTableTag) { + continue; + } + + int cacheSize = HiveConf.getIntVar(hconf, + HiveConf.ConfVars.HIVEMAPJOINCACHEROWS); + HashMapWrapper hashTable = new HashMapWrapper( + cacheSize); + + mapJoinTables.put(pos, hashTable); + } + } + + + + protected static HashMap> getStandardObjectInspectors( + Map> aliasToObjectInspectors) { + HashMap> result = new HashMap>(); + for (Entry> oiEntry : aliasToObjectInspectors + .entrySet()) { + Byte alias = oiEntry.getKey(); + List oiList = oiEntry.getValue(); + ArrayList fieldOIList = new ArrayList( + oiList.size()); + for (int i = 0; i < oiList.size(); i++) { + fieldOIList.add(ObjectInspectorUtils.getStandardObjectInspector(oiList + .get(i), ObjectInspectorCopyOption.WRITABLE)); + } + result.put(alias, fieldOIList); + } + return result; + + } + + /* + * This operator only process small tables + * Read the key/value pairs + * Load them into hashtable + */ + @Override + public void processOp(Object row, int tag) throws HiveException{ + //let the mapJoinOp process these small tables + try{ + alias = order[tag]; + //alias = (byte)tag; + + // compute keys and values as StandardObjects + ArrayList key = JoinUtil.computeKeys(row, joinKeys.get(alias), + joinKeysObjectInspectors.get(alias)); + + ArrayList value = JoinUtil.computeValues(row, joinValues.get(alias), + joinValuesObjectInspectors.get(alias),joinFilters.get(alias), + joinFilterObjectInspectors.get(alias), noOuterJoin); + + + if (firstRow) { + metadataKeyTag = -1; + + TableDesc keyTableDesc = conf.getKeyTblDesc(); + SerDe keySerializer = (SerDe) ReflectionUtils.newInstance( + keyTableDesc.getDeserializerClass(), null); + keySerializer.initialize(null, keyTableDesc.getProperties()); + + MapJoinMetaData.clear(); + MapJoinMetaData.put(Integer.valueOf(metadataKeyTag), + new JDBMSinkObjectCtx( + ObjectInspectorUtils + .getStandardObjectInspector(keySerializer + .getObjectInspector(), + ObjectInspectorCopyOption.WRITABLE), keySerializer, + keyTableDesc, hconf)); + + firstRow = false; + } + + numMapRowsRead++; + + if ((numMapRowsRead > maxMapJoinSize)&& (counterNameToEnum != null)) { + // update counter + LOG + .warn("Too many rows in map join tables. Fatal error counter will be incremented!!"); + incrCounter(fatalErrorCntr, 1); + fatalError = true; + return; + } + + HashMapWrapper hashTable = mapJoinTables.get((byte) tag); + MapJoinObjectKey keyMap = new MapJoinObjectKey(metadataKeyTag, key); + MapJoinObjectValue o = hashTable.get(keyMap); + RowContainer res = null; + + boolean needNewKey = true; + if (o == null) { + int bucketSize = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEMAPJOINBUCKETCACHESIZE); + res = JoinUtil.getRowContainer(hconf, + rowContainerStandardObjectInspectors.get((byte)tag), + order[tag], bucketSize,spillTableDesc,conf,noOuterJoin); + + res.add(value); + } else { + res = o.getObj(); + res.add(value); + + if (hashTable.cacheSize() > 0) { + o.setObj(res); + needNewKey = false; + } + } + + if (metadataValueTag[tag] == -1) { + metadataValueTag[tag] = order[tag]; + + TableDesc valueTableDesc = conf.getValueTblDescs().get(tag); + SerDe valueSerDe = (SerDe) ReflectionUtils.newInstance(valueTableDesc + .getDeserializerClass(), null); + valueSerDe.initialize(null, valueTableDesc.getProperties()); + + MapJoinMetaData.put(Integer.valueOf(metadataValueTag[tag]), + new JDBMSinkObjectCtx(ObjectInspectorUtils + .getStandardObjectInspector(valueSerDe.getObjectInspector(), + ObjectInspectorCopyOption.WRITABLE), valueSerDe, + valueTableDesc, hconf)); + } + + // Construct externalizable objects for key and value + if (needNewKey) { + MapJoinObjectKey keyObj = new MapJoinObjectKey(metadataKeyTag, key); + MapJoinObjectValue valueObj = new MapJoinObjectValue( + metadataValueTag[tag], res); + + //valueObj.setConf(hconf); + valueObj.setConf(hconf); + // This may potentially increase the size of the hashmap on the mapper + if (res.size() > mapJoinRowsKey) { + if (res.size() % 100 == 0) { + LOG.warn("Number of values for a given key " + keyObj + " are " + + res.size()); + LOG.warn("used memory " + Runtime.getRuntime().totalMemory()); + } + } + hashTable.put(keyObj, valueObj); + } + }catch (SerDeException e) { + e.printStackTrace(); + throw new HiveException(e); + } + + } + + + @Override + /* + * Flush the hashtable into jdbm file + * Load this jdbm file into HDFS only + */ + public void closeOp(boolean abort) throws HiveException{ + try{ + if(mapJoinTables != null) { + //get tmp file URI + String tmpURI = this.getExecContext().getLocalWork().getTmpFileURI(); + LOG.info("Get TMP URI: "+tmpURI); + + for (Map.Entry> hashTables : mapJoinTables.entrySet()) { + //get the key and value + Byte tag = hashTables.getKey(); + HashMapWrapper hashTable = hashTables.getValue(); + + //get the jdbm file and path + String jdbmFile = hashTable.flushMemoryCacheToPersistent(); + Path localPath = new Path(jdbmFile); + + //get current input file name + String bigBucketFileName = this.getExecContext().getCurrentBigBucketFile(); + if(bigBucketFileName == null ||bigBucketFileName.length()==0) { + bigBucketFileName="-"; + } + //get the tmp URI path; it will be a hdfs path if not local mode + Path tmpURIPath = new Path(tmpURI+Path.SEPARATOR+"-"+tag+"-"+bigBucketFileName+".jdbm"); + + //upload jdbm file to this HDFS + FileSystem fs = tmpURIPath.getFileSystem(this.getExecContext().getJc()); + fs.copyFromLocalFile(localPath, tmpURIPath); + LOG.info("Upload 1 JDBM File to: "+tmpURIPath); + System.out.println("Upload 1 JDBM File to: "+tmpURIPath); + //remove the original jdbm tmp file + hashTable.close(); + } + } + + super.closeOp(abort); + }catch(IOException e){ + LOG.error("Copy local file to HDFS error"); + e.printStackTrace(); + } + } + + /** + * Implements the getName function for the Node Interface. + * + * @return the name of the operator + */ + @Override + public String getName() { + return "JDBMSINK"; + } + + @Override + public int getType() { + return OperatorType.JDBMSINK; + } + + private void getPersistentFilePath(Map paths) throws HiveException{ + Map jdbmFilePaths = paths; + try{ + if(mapJoinTables != null) { + for (Map.Entry> hashTables : mapJoinTables.entrySet()) { + //hashTable.close(); + + Byte key = hashTables.getKey(); + HashMapWrapper hashTable = hashTables.getValue(); + + //get the jdbm file and path + String jdbmFile = hashTable.flushMemoryCacheToPersistent(); + Path localPath = new Path(jdbmFile); + + //insert into map + jdbmFilePaths.put(key, localPath); + } + } + }catch (Exception e){ + LOG.fatal("Get local JDBM file error"); + e.printStackTrace(); + } + } + +} Property changes on: ql/src/java/org/apache/hadoop/hive/ql/exec/JDBMSinkOperator.java ___________________________________________________________________ Added: svn:executable + * Index: ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java (revision 1023076) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/JoinOperator.java (working copy) @@ -29,6 +29,7 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.JoinDesc; +import org.apache.hadoop.hive.ql.util.JoinUtil; import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.serde2.SerDeUtils; import org.apache.hadoop.hive.serde2.objectinspector.StructField; @@ -78,10 +79,12 @@ nextSz = joinEmitInterval; } - ArrayList nr = computeValues(row, joinValues.get(alias), + + ArrayList nr = JoinUtil.computeValues(row, joinValues.get(alias), joinValuesObjectInspectors.get(alias), joinFilters.get(alias), joinFilterObjectInspectors.get(alias), noOuterJoin); + if (handleSkewJoin) { skewJoinKeyContext.handleSkew(tag); } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinMetaData.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinMetaData.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinMetaData.java (revision 0) @@ -0,0 +1,42 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec; + +import java.util.HashMap; +import java.util.Map; + +import org.apache.hadoop.hive.ql.exec.JDBMSinkOperator.JDBMSinkObjectCtx; + +public class MapJoinMetaData { + static transient Map mapMetadata = new HashMap(); + + public MapJoinMetaData(){ + + } + public static void put(Integer key, JDBMSinkObjectCtx value){ + mapMetadata.put(key, value); + } + public static JDBMSinkObjectCtx get(Integer key){ + return mapMetadata.get(key); + } + + public static void clear(){ + mapMetadata.clear(); + } + +} Property changes on: ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinMetaData.java ___________________________________________________________________ Added: svn:executable + * Index: ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java (revision 1023076) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapJoinOperator.java (working copy) @@ -17,8 +17,9 @@ */ package org.apache.hadoop.hive.ql.exec; - +import java.io.File; import java.io.Serializable; +import java.net.URI; import java.util.ArrayList; import java.util.HashMap; import java.util.Map; @@ -26,18 +27,20 @@ import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.filecache.DistributedCache; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.JDBMSinkOperator.JDBMSinkObjectCtx; import org.apache.hadoop.hive.ql.exec.persistence.HashMapWrapper; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectKey; import org.apache.hadoop.hive.ql.exec.persistence.MapJoinObjectValue; -import org.apache.hadoop.hive.ql.exec.persistence.RowContainer; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.hive.ql.util.JoinUtil; import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.SerDeException; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.util.ReflectionUtils; @@ -51,6 +54,7 @@ private static final Log LOG = LogFactory.getLog(MapJoinOperator.class .getName()); + protected transient Map> mapJoinTables; private static final transient String[] FATAL_ERR_MSG = { @@ -59,63 +63,13 @@ + "Please increase that or remove the mapjoin hint." }; - /** - * MapJoinObjectCtx. - * - */ - public static class MapJoinObjectCtx { - ObjectInspector standardOI; - SerDe serde; - TableDesc tblDesc; - Configuration conf; - - /** - * @param standardOI - * @param serde - */ - public MapJoinObjectCtx(ObjectInspector standardOI, SerDe serde, - TableDesc tblDesc, Configuration conf) { - this.standardOI = standardOI; - this.serde = serde; - this.tblDesc = tblDesc; - this.conf = conf; - } - /** - * @return the standardOI - */ - public ObjectInspector getStandardOI() { - return standardOI; - } - /** - * @return the serde - */ - public SerDe getSerDe() { - return serde; - } - - public TableDesc getTblDesc() { - return tblDesc; - } - - public Configuration getConf() { - return conf; - } - - } - - static transient Map mapMetadata = new HashMap(); - static transient int nextVal = 0; - - public static Map getMapMetadata() { - return mapMetadata; - } transient int metadataKeyTag; transient int[] metadataValueTag; transient int maxMapJoinSize; - + private int bigTableAlias; public MapJoinOperator() { } @@ -125,6 +79,7 @@ @Override protected void initializeOp(Configuration hconf) throws HiveException { + super.initializeOp(hconf); maxMapJoinSize = HiveConf.getIntVar(hconf, @@ -135,6 +90,9 @@ metadataValueTag[pos] = -1; } + metadataKeyTag = -1; + bigTableAlias = order[posBigTable]; + mapJoinTables = new HashMap>(); // initialize the hash tables for other tables @@ -150,138 +108,172 @@ mapJoinTables.put(Byte.valueOf((byte) pos), hashTable); } + } + @Override protected void fatalErrorMessage(StringBuilder errMsg, long counterCode) { errMsg.append("Operator " + getOperatorId() + " (id=" + id + "): " + FATAL_ERR_MSG[(int) counterCode]); } - @Override - public void processOp(Object row, int tag) throws HiveException { - if (tag == posBigTable) { - this.getExecContext().processInputFileChangeForLocalWork(); - } + public void generateMapMetaData() throws HiveException,SerDeException{ + //generate the meta data for key + //index for key is -1 + TableDesc keyTableDesc = conf.getKeyTblDesc(); + SerDe keySerializer = (SerDe) ReflectionUtils.newInstance( + keyTableDesc.getDeserializerClass(), null); + keySerializer.initialize(null, keyTableDesc.getProperties()); + MapJoinMetaData.put(Integer.valueOf(metadataKeyTag), + new JDBMSinkObjectCtx( + ObjectInspectorUtils + .getStandardObjectInspector(keySerializer + .getObjectInspector(), + ObjectInspectorCopyOption.WRITABLE), keySerializer, + keyTableDesc, hconf)); - try { - // get alias - alias = (byte) tag; + //index for values is just alias + for (int tag = 0; tag < order.length; tag++) { + int alias = (int) order[tag]; - if ((lastAlias == null) || (!lastAlias.equals(alias))) { - nextSz = joinEmitInterval; + if(alias == this.bigTableAlias){ + continue; } - // compute keys and values as StandardObjects - ArrayList key = computeKeys(row, joinKeys.get(alias), - joinKeysObjectInspectors.get(alias)); - ArrayList value = computeValues(row, joinValues.get(alias), - joinValuesObjectInspectors.get(alias), joinFilters.get(alias), - joinFilterObjectInspectors.get(alias), noOuterJoin); + + TableDesc valueTableDesc = conf.getValueTblDescs().get(tag); + SerDe valueSerDe = (SerDe) ReflectionUtils.newInstance(valueTableDesc + .getDeserializerClass(), null); + valueSerDe.initialize(null, valueTableDesc.getProperties()); + + MapJoinMetaData.put(Integer.valueOf(alias), + new JDBMSinkObjectCtx(ObjectInspectorUtils + .getStandardObjectInspector(valueSerDe.getObjectInspector(), + ObjectInspectorCopyOption.WRITABLE), valueSerDe, + valueTableDesc, hconf)); + } + } - // does this source need to be stored in the hash map - if (tag != posBigTable) { - if (firstRow) { - metadataKeyTag = nextVal++; + private void loadJDBM() throws HiveException{ + boolean localMode = HiveConf.getVar(hconf, HiveConf.ConfVars.HADOOPJT).equals("local"); + String tmpURI =null; + HashMapWrapper hashtable; + Byte pos; + int alias; - TableDesc keyTableDesc = conf.getKeyTblDesc(); - SerDe keySerializer = (SerDe) ReflectionUtils.newInstance( - keyTableDesc.getDeserializerClass(), null); - keySerializer.initialize(null, keyTableDesc.getProperties()); + String currentInputFile = HiveConf.getVar(hconf, + HiveConf.ConfVars.HADOOPMAPFILENAME); + LOG.info("******* Load from JDBM File: input : "+ currentInputFile); - mapMetadata.put(Integer.valueOf(metadataKeyTag), - new MapJoinObjectCtx( - ObjectInspectorUtils - .getStandardObjectInspector(keySerializer - .getObjectInspector(), - ObjectInspectorCopyOption.WRITABLE), keySerializer, - keyTableDesc, hconf)); + String currentFileName; - firstRow = false; + if(this.getExecContext().getLocalWork().getInputFileChangeSensitive()) { + currentFileName= this.getFileName(currentInputFile); + } else { + currentFileName="-"; + } + LOG.info("******* Filename : "+ currentFileName); + try{ + if(localMode){ + //load the jdbm file from tmp dir + LOG.info("******* Load from tmp file uri ***"); + tmpURI= this.getExecContext().getLocalWork().getTmpFileURI(); + for(Map.Entry> entry: mapJoinTables.entrySet()){ + pos = entry.getKey(); + hashtable=entry.getValue(); + URI uri = new URI(tmpURI+Path.SEPARATOR+"-"+pos+"-"+currentFileName+".jdbm"); + LOG.info("\tLoad back 1 JDBM file from tmp file uri:"+uri.toString()); + Path path = new Path(tmpURI+Path.SEPARATOR+"-"+pos+"-"+currentFileName+".jdbm"); + LOG.info("\tLoad back 1 JDBM file from tmp file uri:"+path.toString()); + + File jdbmFile = new File(path.toUri()); + hashtable.initilizePersistentHash(jdbmFile); } + }else{ + //load the jdbm file from distributed cache + LOG.info("******* Load from distributed Cache ***:"); + Path[] localFiles= DistributedCache.getLocalCacheFiles(this.hconf); + for(int i = 0;i maxMapJoinSize) && (reporter != null) - && (counterNameToEnum != null)) { - // update counter - LOG - .warn("Too many rows in map join tables. Fatal error counter will be incremented!!"); - incrCounter(fatalErrorCntr, 1); - fatalError = true; - return; - } + for(Map.Entry> entry: mapJoinTables.entrySet()){ + pos = entry.getKey(); + hashtable=entry.getValue(); + String suffix="-"+pos+"-"+currentFileName+".jdbm"; + LOG.info("Looking for jdbm file with suffix: "+suffix); + + boolean found=false; + for(int i = 0;i hashTable = mapJoinTables - .get(alias); - MapJoinObjectKey keyMap = new MapJoinObjectKey(metadataKeyTag, key); - MapJoinObjectValue o = hashTable.get(keyMap); - RowContainer res = null; + if(path.toString().endsWith(suffix)){ + LOG.info("Matching suffix with cached file:"+path.toString()); + File jdbmFile = new File(path.toString()); + LOG.info("\tInitializing the JDBM by cached file:"+path.toString()); + hashtable.initilizePersistentHash(jdbmFile); + found = true; + LOG.info("\tLoad back 1 JDBM file from distributed cache:"+path.toString()); + break; + } + } + if(!found){ + LOG.error("Load nothing from Distributed Cache"); + throw new HiveException(); + } + } + LOG.info("******* End of loading *******:"); - boolean needNewKey = true; - if (o == null) { - int bucketSize = HiveConf.getIntVar(hconf, HiveConf.ConfVars.HIVEMAPJOINBUCKETCACHESIZE); - res = getRowContainer(hconf, (byte) tag, order[tag], bucketSize); - res.add(value); - } else { - res = o.getObj(); - res.add(value); - // If key already exists, HashMapWrapper.get() guarantees it is - // already in main memory HashMap - // cache. So just replacing the object value should update the - // HashMapWrapper. This will save - // the cost of constructing the new key/object and deleting old one - // and inserting the new one. - if (hashTable.cacheSize() > 0) { - o.setObj(res); - needNewKey = false; - } - } + } + }catch (Exception e){ + e.printStackTrace(); + LOG.error("Load Hash Table error"); + + throw new HiveException(); + } - if (metadataValueTag[tag] == -1) { - metadataValueTag[tag] = nextVal++; - TableDesc valueTableDesc = conf.getValueTblDescs().get(tag); - SerDe valueSerDe = (SerDe) ReflectionUtils.newInstance(valueTableDesc - .getDeserializerClass(), null); - valueSerDe.initialize(null, valueTableDesc.getProperties()); + } - mapMetadata.put(Integer.valueOf(metadataValueTag[tag]), - new MapJoinObjectCtx(ObjectInspectorUtils - .getStandardObjectInspector(valueSerDe.getObjectInspector(), - ObjectInspectorCopyOption.WRITABLE), valueSerDe, - valueTableDesc, hconf)); - } + @Override + public void processOp(Object row, int tag) throws HiveException { + + try { + if(firstRow){ + //generate the map metadata + generateMapMetaData(); + firstRow = false; + } + if(this.getExecContext().inputFileChanged()){ + loadJDBM(); + } + + // get alias + alias = order[tag]; + //alias = (byte)tag; - // Construct externalizable objects for key and value - if (needNewKey) { - MapJoinObjectKey keyObj = new MapJoinObjectKey(metadataKeyTag, key); - MapJoinObjectValue valueObj = new MapJoinObjectValue( - metadataValueTag[tag], res); - valueObj.setConf(hconf); - valueObj.setConf(hconf); - // This may potentially increase the size of the hashmap on the mapper - if (res.size() > mapJoinRowsKey) { - if (res.size() % 100 == 0) { - LOG.warn("Number of values for a given key " + keyObj + " are " - + res.size()); - LOG.warn("used memory " + Runtime.getRuntime().totalMemory()); - } - } - hashTable.put(keyObj, valueObj); - } - return; + if ((lastAlias == null) || (!lastAlias.equals(alias))) { + nextSz = joinEmitInterval; } + // compute keys and values as StandardObjects + ArrayList key = JoinUtil.computeKeys(row, joinKeys.get(alias), + joinKeysObjectInspectors.get(alias)); + ArrayList value = JoinUtil.computeValues(row, joinValues.get(alias), + joinValuesObjectInspectors.get(alias), joinFilters.get(alias), + joinFilterObjectInspectors.get(alias), noOuterJoin); + + // Add the value to the ArrayList - storage.get(alias).add(value); + storage.get((byte)tag).add(value); for (Byte pos : order) { if (pos.intValue() != tag) { MapJoinObjectKey keyMap = new MapJoinObjectKey(metadataKeyTag, key); - MapJoinObjectValue o = mapJoinTables.get(pos).get(keyMap); + MapJoinObjectValue o = mapJoinTables.get(pos).getMapJoinValueObject(keyMap); // there is no join-value or join-key has all null elements if (o == null || (hasAnyNulls(key))) { @@ -300,7 +292,7 @@ checkAndGenObject(); // done with the row - storage.get(alias).clear(); + storage.get((byte)tag).clear(); for (Byte pos : order) { if (pos.intValue() != tag) { @@ -313,9 +305,19 @@ throw new HiveException(e); } } + private String getFileName(String path){ + if(path== null || path.length()==0) { + return null; + } + + int last_separator = path.lastIndexOf(Path.SEPARATOR)+1; + String fileName = path.substring(last_separator); + return fileName; + } @Override public void closeOp(boolean abort) throws HiveException { + if(mapJoinTables != null) { for (HashMapWrapper hashTable : mapJoinTables.values()) { hashTable.close(); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java (revision 1023076) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapRedTask.java (working copy) @@ -18,8 +18,8 @@ package org.apache.hadoop.hive.ql.exec; +import java.io.File; import java.io.IOException; -import java.io.File; import java.io.OutputStream; import java.io.Serializable; import java.util.HashMap; @@ -173,15 +173,17 @@ workDir = (new Path(ctx.getLocalTmpFileURI())).toUri().getPath(); - if (! (new File(workDir)).mkdir()) + if (! (new File(workDir)).mkdir()) { throw new IOException ("Cannot create tmp working dir: " + workDir); + } for (String f: StringUtils.split(files, ',')) { Path p = new Path(f); String target = p.toUri().getPath(); String link = workDir + Path.SEPARATOR + p.getName(); - if (FileUtil.symLink(target, link) != 0) + if (FileUtil.symLink(target, link) != 0) { throw new IOException ("Cannot link to added file: " + target + " from: " + link); + } } } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/MapredLocalTask.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/MapredLocalTask.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/MapredLocalTask.java (revision 0) @@ -0,0 +1,235 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.exec; + +import java.io.File; +import java.io.Serializable; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.Context; +import org.apache.hadoop.hive.ql.DriverContext; +import org.apache.hadoop.hive.ql.QueryPlan; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.FetchWork; +import org.apache.hadoop.hive.ql.plan.MapredLocalWork; +import org.apache.hadoop.hive.ql.plan.MapredLocalWork.BucketMapJoinContext; +import org.apache.hadoop.hive.ql.plan.api.StageType; +import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; +import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.util.ReflectionUtils; + +public class MapredLocalTask extends Task implements Serializable { + + private Map fetchOperators; + private File jdbmFile; + private JobConf job; + public static final Log l4j = LogFactory.getLog("MapredLocalTask"); + private MapOperator mo; + // not sure we need this exec context; but all the operators in the work + // will pass this context throught + private final ExecMapperContext execContext = new ExecMapperContext(); + + public MapredLocalTask(){ + super(); + } + + @Override + public void initialize(HiveConf conf, QueryPlan queryPlan, + DriverContext driverContext) { + super.initialize(conf, queryPlan, driverContext); + job = new JobConf(conf, ExecDriver.class); + } + + @Override +public int execute(DriverContext driverContext){ + // check the local work + if(work == null){ + return -1; + } + fetchOperators = new HashMap(); + Map fetchOpJobConfMap = new HashMap(); + execContext.setJc(job); + //set the local work, so all the operator can get this context + execContext.setLocalWork(work); + boolean inputFileChangeSenstive = work.getInputFileChangeSensitive(); + try{ + + initializeOperators(fetchOpJobConfMap); + //for each big table's bucket, call the start forward + if(inputFileChangeSenstive){ + for( LinkedHashMap> bigTableBucketFiles: + work.getBucketMapjoinContext().getAliasBucketFileNameMapping().values()){ + for(String bigTableBucket: bigTableBucketFiles.keySet()){ + startForward(inputFileChangeSenstive,bigTableBucket); + } + } + }else{ + startForward(inputFileChangeSenstive,null); + } + } catch (Throwable e) { + if (e instanceof OutOfMemoryError) { + // Don't create a new object if we are already out of memory + l4j.error("Out of Merror Error"); + } else { + l4j.error("Hive Runtime Error: Map local work failed"); + e.printStackTrace(); + } + } + return 0; + } + + private void startForward(boolean inputFileChangeSenstive, String bigTableBucket) + throws Exception{ + for (Map.Entry entry : fetchOperators.entrySet()) { + int fetchOpRows = 0; + String alias = entry.getKey(); + FetchOperator fetchOp = entry.getValue(); + + if (inputFileChangeSenstive) { + fetchOp.clearFetchContext(); + setUpFetchOpContext(fetchOp, alias,bigTableBucket); + } + + //get the root operator + Operator forwardOp = work.getAliasToWork().get(alias); + //walk through the operator tree + while (true) { + InspectableObject row = fetchOp.getNextRow(); + if (row == null) { + if (inputFileChangeSenstive) { + String fileName=this.getFileName(bigTableBucket); + execContext.setCurrentBigBucketFile(fileName); + forwardOp.reset(); + } + forwardOp.close(false); + break; + } + fetchOpRows++; + forwardOp.process(row.o, 0); + // check if any operator had a fatal error or early exit during + // execution + if (forwardOp.getDone()) { + //ExecMapper.setDone(true); + break; + } + } + } + } + private void initializeOperators(Map fetchOpJobConfMap) + throws HiveException{ + // this mapper operator is used to initialize all the operators + for (Map.Entry entry : work.getAliasToFetchWork().entrySet()) { + JobConf jobClone = new JobConf(job); + + Operator tableScan = work.getAliasToWork().get(entry.getKey()); + boolean setColumnsNeeded = false; + if(tableScan instanceof TableScanOperator) { + ArrayList list = ((TableScanOperator)tableScan).getNeededColumnIDs(); + if (list != null) { + ColumnProjectionUtils.appendReadColumnIDs(jobClone, list); + setColumnsNeeded = true; + } + } + + if (!setColumnsNeeded) { + ColumnProjectionUtils.setFullyReadColumns(jobClone); + } + + //create a fetch operator + FetchOperator fetchOp = new FetchOperator(entry.getValue(),jobClone); + fetchOpJobConfMap.put(fetchOp, jobClone); + fetchOperators.put(entry.getKey(), fetchOp); + l4j.info("fetchoperator for " + entry.getKey() + " created"); + } + //initilize all forward operator + for (Map.Entry entry : fetchOperators.entrySet()) { + //get the forward op + Operator forwardOp = work.getAliasToWork().get(entry.getKey()); + + //put the exe context into all the operators + forwardOp.setExecContext(execContext); + // All the operators need to be initialized before process + FetchOperator fetchOp = entry.getValue(); + JobConf jobConf = fetchOpJobConfMap.get(fetchOp); + + if (jobConf == null) { + jobConf = job; + } + //initialize the forward operator + forwardOp.initialize(jobConf, new ObjectInspector[] {fetchOp.getOutputObjectInspector()}); + l4j.info("fetchoperator for " + entry.getKey() + " initialized"); + } + } + + + private void setUpFetchOpContext(FetchOperator fetchOp, String alias,String currentInputFile) + throws Exception { + + BucketMapJoinContext bucketMatcherCxt = this.work + .getBucketMapjoinContext(); + + Class bucketMatcherCls = bucketMatcherCxt + .getBucketMatcherClass(); + BucketMatcher bucketMatcher = (BucketMatcher) ReflectionUtils.newInstance( + bucketMatcherCls, null); + bucketMatcher.setAliasBucketFileNameMapping(bucketMatcherCxt + .getAliasBucketFileNameMapping()); + + List aliasFiles = bucketMatcher.getAliasBucketFiles(currentInputFile, + bucketMatcherCxt.getMapJoinBigTableAlias(), alias); + Iterator iter = aliasFiles.iterator(); + fetchOp.setupContext(iter, null); + } + + private String getFileName(String path){ + if(path== null || path.length()==0) { + return null; + } + + int last_separator = path.lastIndexOf(Path.SEPARATOR)+1; + String fileName = path.substring(last_separator); + return fileName; + + } + @Override + public void localizeMRTmpFilesImpl(Context ctx){ + + } + + @Override + public String getName() { + return "MAPREDLOCAL"; + } + @Override + public int getType() { + //assert false; + return StageType.MAPREDLOCAL; + } + +} Property changes on: ql/src/java/org/apache/hadoop/hive/ql/exec/MapredLocalTask.java ___________________________________________________________________ Added: svn:executable + * Index: ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java (revision 1023076) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Operator.java (working copy) @@ -281,6 +281,10 @@ return true; } for (Operator parent : parentOperators) { + if (parent == null) { + //return true; + continue; + } if (parent.state != State.INIT) { return false; } @@ -427,6 +431,14 @@ initialize(hconf, null); } + public ObjectInspector[] getInputObjInspectors() { + return inputObjInspectors; + } + + public void setInputObjInspectors(ObjectInspector[] inputObjInspectors) { + this.inputObjInspectors = inputObjInspectors; + } + /** * Process the row. * @@ -501,6 +513,9 @@ protected boolean allInitializedParentsAreClosed() { if (parentOperators != null) { for (Operator parent : parentOperators) { + if(parent==null){ + continue; + } if (!(parent.state == State.CLOSE || parent.state == State.UNINIT)) { return false; } @@ -710,6 +725,16 @@ } } + public void reset(){ + this.state=State.INIT; + if (childOperators != null) { + for (Operator o : childOperators) { + o.reset(); + } + } + + } + /** * OperatorFunc. * Index: ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java (revision 1023076) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/OperatorFactory.java (working copy) @@ -28,6 +28,8 @@ import org.apache.hadoop.hive.ql.plan.FilterDesc; import org.apache.hadoop.hive.ql.plan.ForwardDesc; import org.apache.hadoop.hive.ql.plan.GroupByDesc; +import org.apache.hadoop.hive.ql.plan.JDBMDummyDesc; +import org.apache.hadoop.hive.ql.plan.JDBMSinkDesc; import org.apache.hadoop.hive.ql.plan.JoinDesc; import org.apache.hadoop.hive.ql.plan.LateralViewForwardDesc; import org.apache.hadoop.hive.ql.plan.LateralViewJoinDesc; @@ -85,6 +87,10 @@ LateralViewJoinOperator.class)); opvec.add(new OpTuple(LateralViewForwardDesc.class, LateralViewForwardOperator.class)); + opvec.add(new OpTuple(JDBMDummyDesc.class, + JDBMDummyOperator.class)); + opvec.add(new OpTuple(JDBMSinkDesc.class, + JDBMSinkOperator.class)); } public static Operator get(Class opClass) { Index: ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java (revision 1023076) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java (working copy) @@ -34,9 +34,10 @@ import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.plan.MapJoinDesc; import org.apache.hadoop.hive.ql.plan.MapredLocalWork; +import org.apache.hadoop.hive.ql.plan.SMBJoinDesc; import org.apache.hadoop.hive.ql.plan.MapredLocalWork.BucketMapJoinContext; -import org.apache.hadoop.hive.ql.plan.SMBJoinDesc; import org.apache.hadoop.hive.ql.plan.api.OperatorType; +import org.apache.hadoop.hive.ql.util.JoinUtil; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -106,10 +107,13 @@ HiveConf.ConfVars.HIVEMAPJOINBUCKETCACHESIZE); byte storePos = (byte) 0; for (Byte alias : order) { - RowContainer rc = getRowContainer(hconf, storePos, alias, bucketSize); + RowContainer rc = JoinUtil.getRowContainer(hconf, + rowContainerStandardObjectInspectors.get(storePos), + alias, bucketSize,spillTableDesc, conf,noOuterJoin); nextGroupStorage[storePos] = rc; - RowContainer candidateRC = getRowContainer(hconf, storePos, alias, - bucketSize); + RowContainer candidateRC = JoinUtil.getRowContainer(hconf, + rowContainerStandardObjectInspectors.get((byte)storePos), + alias,bucketSize,spillTableDesc, conf,noOuterJoin); candidateStorage[alias] = candidateRC; storePos++; } @@ -208,12 +212,15 @@ byte alias = (byte) tag; // compute keys and values as StandardObjects - ArrayList key = computeKeys(row, joinKeys.get(alias), + + // compute keys and values as StandardObjects + ArrayList key = JoinUtil.computeKeys(row, joinKeys.get(alias), joinKeysObjectInspectors.get(alias)); - ArrayList value = computeValues(row, joinValues.get(alias), + ArrayList value = JoinUtil.computeValues(row, joinValues.get(alias), joinValuesObjectInspectors.get(alias), joinFilters.get(alias), joinFilterObjectInspectors.get(alias), noOuterJoin); + //have we reached a new key group? boolean nextKeyGroup = processKey(alias, key); if (nextKeyGroup) { Index: ql/src/java/org/apache/hadoop/hive/ql/exec/SkewJoinHandler.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/SkewJoinHandler.java (revision 1023076) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/SkewJoinHandler.java (working copy) @@ -36,6 +36,7 @@ import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.plan.JoinDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.util.JoinUtil; import org.apache.hadoop.hive.serde2.SerDe; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -60,15 +61,15 @@ * dir-T1-keys(containing keys which is big in T3), dir-T2-keys(containing big * keys in T3),dir-T3-bigkeys(containing keys which is big in T3), ... ..... * - * + * *

* For each skew key, we first write all values to a local tmp file. At the time * of ending the current group, the local tmp file will be uploaded to hdfs. * Right now, we use one file per skew key. - * + * *

* For more info, please see https://issues.apache.org/jira/browse/HIVE-964. - * + * */ public class SkewJoinHandler { @@ -89,6 +90,7 @@ private LongWritable skewjoinFollowupJobs; + private final boolean noOuterJoin; Configuration hconf = null; List dummyKey = null; String taskId; @@ -101,6 +103,7 @@ this.joinOp = joinOp; numAliases = joinOp.numAliases; conf = joinOp.getConf(); + noOuterJoin = joinOp.noOuterJoin; } public void initiliaze(Configuration hconf) { @@ -143,7 +146,7 @@ break; } - TableDesc valTblDesc = joinOp.getSpillTableDesc(alias); + TableDesc valTblDesc = JoinUtil.getSpillTableDesc(alias,joinOp.spillTableDesc,conf,noOuterJoin); List valColNames = new ArrayList(); if (valTblDesc != null) { valColNames = Utilities.getColumnNames(valTblDesc.getProperties()); Index: ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java (revision 1023076) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/TableScanOperator.java (working copy) @@ -26,15 +26,16 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.ql.plan.api.OperatorType; import org.apache.hadoop.hive.ql.stats.StatsPublisher; import org.apache.hadoop.hive.ql.stats.StatsSetupConst; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.hive.serde2.objectinspector.StructField; import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; import org.apache.hadoop.mapred.JobConf; /** @@ -50,6 +51,16 @@ private transient Configuration hconf; private transient Stat stat; private transient String partitionSpecs; + private TableDesc tableDesc; + + + public TableDesc getTableDesc() { + return tableDesc; + } + + public void setTableDesc(TableDesc tableDesc) { + this.tableDesc = tableDesc; + } /** * Other than gathering statistics for the ANALYZE command, the table scan operator Index: ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java (revision 1023076) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/TaskFactory.java (working copy) @@ -29,6 +29,7 @@ import org.apache.hadoop.hive.ql.plan.ExplainWork; import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.plan.FunctionWork; +import org.apache.hadoop.hive.ql.plan.MapredLocalWork; import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.MoveWork; import org.apache.hadoop.hive.ql.plan.StatsWork; @@ -68,8 +69,12 @@ ConditionalTask.class)); taskvec.add(new taskTuple(MapredWork.class, MapRedTask.class)); + + taskvec.add(new taskTuple(MapredLocalWork.class, + MapredLocalTask.class)); taskvec.add(new taskTuple(StatsWork.class, - StatsTask.class)); + StatsTask.class)); + } private static ThreadLocal tid = new ThreadLocal() { Index: ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java (revision 1023076) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/HashMapWrapper.java (working copy) @@ -21,6 +21,7 @@ import java.io.File; import java.util.HashMap; import java.util.HashSet; +import java.util.Map; import java.util.Properties; import java.util.Set; @@ -72,7 +73,7 @@ /** * Constructor. - * + * * @param threshold * User specified threshold to store new values into persistent * storage. @@ -91,16 +92,73 @@ } /** - * Get the value based on the key. We try to get it from the main memory hash - * table first. If it is not there we will look up the persistent hash table. - * This function also guarantees if any item is found given a key, it is - * available in main memory HashMap. So mutating the returned value will be - * reflected (saved) in HashMapWrapper. - * + * Get the value based on the key. this GET method will directly + * return the value from jdbm storage. * @param key * @return Value corresponding to the key. If the key is not found, return * null. */ +/* + public V getMapJoinValueObject(K key) throws HiveException{ + if(pHash == null) { + LOG.warn("the jdbm object is not ready!"); + throw new HiveException(); + } + try{ + V value = (V)pHash.get(key); + return value; + }catch(Exception e){ + throw new HiveException(e); + } + }*/ + + /* + * In this get operation, the jdbm should read only + */ + public V getMapJoinValueObject(K key) throws HiveException { + V value = null; + + // if not the MRU, searching the main memory hash table. + MRUItem item = mHash.get(key); + if (item != null) { + value = item.value; + MRUList.moveToHead(item); + } else if (pHash != null) { + try { + value = (V) pHash.get(key); + if (value != null) { + if (mHash.size() < threshold) { + MRUItem itm= new MRUItem(key, value); + mHash.put(key, itm); + //pHash.remove(key); + MRUList.put(itm); + //recman.commit(); + + } else if (threshold > 0) { // flush the LRU to disk + MRUItem tail = MRUList.tail(); // least recently used item + //pHash.put(tail.key, tail.value); + //pHash.remove(key); + //recman.commit(); + + // update mHash -- reuse MRUItem + item = mHash.remove(tail.key); + item.key = key; + item.value = value; + mHash.put(key, item); + + // update MRU -- reusing MRUItem + tail.key = key; + tail.value = value; + MRUList.moveToHead(tail); + } + } + } catch (Exception e) { + LOG.warn(e.toString()); + throw new HiveException(e); + } + } + return value; + } public V get(K key) throws HiveException { V value = null; @@ -146,7 +204,7 @@ * Put the key value pair in the hash table. It will first try to put it into * the main memory hash table. If the size exceeds the threshold, it will put * it into the persistent hash table. - * + * * @param key * @param value * @throws HiveException @@ -208,9 +266,82 @@ } } + public void putToJDBM(K key, V value) throws HiveException{ + if (pHash == null) { + pHash = getPersistentHash(); + } + try { + pHash.put(key, value); + recman.commit(); + } catch (Exception e) { + LOG.warn(e.toString()); + throw new HiveException(e); + } + + } + + /** + * Flush the main memory hash table into the persistent cache file + * + * @return persistent cache file + */ + public String flushMemoryCacheToPersistent() throws HiveException{ + try{ + //if no persistent cache file; create a new one + if(pHash == null){ + pHash = getPersistentHash(); + } + int mm_size = mHash.size(); + //no data in the memory cache + if(mm_size == 0){ + return tmpFile.getAbsolutePath(); + } + //iterate the memory hash table and put them into persistent file + for (Map.Entry entry : mHash.entrySet()) { + K key = entry.getKey(); + MRUItem item = entry.getValue(); + pHash.put(key, item.value); + } + //commit to the persistent file + recman.commit(); + + //release the memory + mHash.clear(); + + }catch (Exception e) { + LOG.warn(e.toString()); + throw new HiveException(e); + } + return tmpFile.getAbsolutePath(); + } + + public void initilizePersistentHash(File jdbmfile) throws HiveException{ + try{ + Properties props = new Properties(); + props.setProperty(RecordManagerOptions.CACHE_TYPE, + RecordManagerOptions.NORMAL_CACHE); + props.setProperty(RecordManagerOptions.DISABLE_TRANSACTIONS, "true"); + + recman = RecordManagerFactory.createRecordManager(jdbmfile, props); + long recid = recman.getNamedObject( "hashtable" ); + if ( recid != 0 ) { + System.out.println( "Reloading existing hashtable..." ); + pHash = HTree.load( recman, recid ); + }else{ + LOG.warn("initiliaze the hash table by jdbm file Error!"); + throw new HiveException(); + } + + } catch (Exception e) { + e.printStackTrace(); + LOG.warn(e.toString()); + throw new HiveException(e); + } + } + /** * Get the persistent hash table. - * + * * @return persistent hash table * @throws HiveException */ @@ -234,6 +365,9 @@ recman = RecordManagerFactory.createRecordManager(tmpFile, props); pHash = HTree.createInstance(recman); + recman.setNamedObject( "hashtable", pHash.getRecid() ); + //commit to the persistent file + recman.commit(); } catch (Exception e) { LOG.warn(e.toString()); throw new HiveException(e); @@ -259,7 +393,7 @@ * the pairs are removed from the main memory hash table, pairs in the * persistent hash table will not be moved to the main memory hash table. * Future inserted elements will go into the main memory hash table though. - * + * * @param key * @throws HiveException */ @@ -279,7 +413,7 @@ /** * Get a list of all keys in the hash map. - * + * * @return */ public Set keySet() { @@ -306,7 +440,7 @@ /** * Get the main memory cache capacity. - * + * * @return the maximum number of items can be put into main memory HashMap * cache. */ @@ -316,7 +450,7 @@ /** * Close the persistent hash table and clean it up. - * + * * @throws HiveException */ public void close() throws HiveException { @@ -330,8 +464,10 @@ throw new HiveException(e); } // delete the temporary file - tmpFile.delete(); - tmpFile = null; + if(tmpFile != null){ + tmpFile.delete(); + tmpFile = null; + } pHash = null; recman = null; } Index: ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectKey.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectKey.java (revision 1023355) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectKey.java (working copy) @@ -24,8 +24,8 @@ import java.io.ObjectOutput; import java.util.ArrayList; -import org.apache.hadoop.hive.ql.exec.MapJoinOperator; -import org.apache.hadoop.hive.ql.exec.MapJoinOperator.MapJoinObjectCtx; +import org.apache.hadoop.hive.ql.exec.MapJoinMetaData; +import org.apache.hadoop.hive.ql.exec.JDBMSinkOperator.JDBMSinkObjectCtx; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; @@ -81,7 +81,7 @@ metadataTag = in.readInt(); // get the tableDesc from the map stored in the mapjoin operator - MapJoinObjectCtx ctx = MapJoinOperator.getMapMetadata().get( + JDBMSinkObjectCtx ctx = MapJoinMetaData.get( Integer.valueOf(metadataTag)); Writable val = ctx.getSerDe().getSerializedClass().newInstance(); @@ -89,6 +89,9 @@ obj = (ArrayList) ObjectInspectorUtils.copyToStandardObject(ctx .getSerDe().deserialize(val), ctx.getSerDe().getObjectInspector(), ObjectInspectorCopyOption.WRITABLE); + if(obj == null){ + obj = new ArrayList(0); + } } catch (Exception e) { throw new IOException(e); } @@ -99,9 +102,8 @@ public void writeExternal(ObjectOutput out) throws IOException { try { out.writeInt(metadataTag); - // get the tableDesc from the map stored in the mapjoin operator - MapJoinObjectCtx ctx = MapJoinOperator.getMapMetadata().get( + JDBMSinkObjectCtx ctx = MapJoinMetaData.get( Integer.valueOf(metadataTag)); // Different processing for key and value Index: ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java (revision 1023076) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/MapJoinObjectValue.java (working copy) @@ -28,8 +28,8 @@ import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.conf.HiveConf; -import org.apache.hadoop.hive.ql.exec.MapJoinOperator; -import org.apache.hadoop.hive.ql.exec.MapJoinOperator.MapJoinObjectCtx; +import org.apache.hadoop.hive.ql.exec.MapJoinMetaData; +import org.apache.hadoop.hive.ql.exec.JDBMSinkOperator.JDBMSinkObjectCtx; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; @@ -85,10 +85,11 @@ public void readExternal(ObjectInput in) throws IOException, ClassNotFoundException { try { + metadataTag = in.readInt(); // get the tableDesc from the map stored in the mapjoin operator - MapJoinObjectCtx ctx = MapJoinOperator.getMapMetadata().get( + JDBMSinkObjectCtx ctx = MapJoinMetaData.get( Integer.valueOf(metadataTag)); int sz = in.readInt(); @@ -110,6 +111,11 @@ res.add(memObj); } } + else{ + for(int i = 0 ; i (0)); + } + } } obj = res; } catch (Exception e) { @@ -124,7 +130,7 @@ out.writeInt(metadataTag); // get the tableDesc from the map stored in the mapjoin operator - MapJoinObjectCtx ctx = MapJoinOperator.getMapMetadata().get( + JDBMSinkObjectCtx ctx = MapJoinMetaData.get( Integer.valueOf(metadataTag)); // Different processing for key and value Index: ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/RowContainer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/RowContainer.java (revision 1023076) +++ ql/src/java/org/apache/hadoop/hive/ql/exec/persistence/RowContainer.java (working copy) @@ -30,8 +30,8 @@ import org.apache.hadoop.fs.LocalFileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.FileSinkOperator.RecordWriter; -import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.io.HiveFileFormatUtils; import org.apache.hadoop.hive.ql.io.HiveOutputFormat; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -77,7 +77,7 @@ */ public class RowContainer> { - protected Log LOG = LogFactory.getLog(this.getClass().getName()); + protected static Log LOG = LogFactory.getLog(RowContainer.class); // max # of rows can be put into one block private static final int BLOCKSIZE = 25000; @@ -116,6 +116,7 @@ Writable val = null; // cached to use serialize data + Configuration jc; JobConf jobCloneUsingLocalFs = null; private LocalFileSystem localFs; @@ -136,16 +137,19 @@ this.firstReadBlockPointer = currentReadBlock; this.serde = null; this.standardOI = null; - try { - this.localFs = FileSystem.getLocal(jc); - } catch (IOException e) { - throw new HiveException(e); + this.jc=jc; + } + + private JobConf getLocalFSJobConfClone(Configuration jc) { + if(this.jobCloneUsingLocalFs == null) { + this.jobCloneUsingLocalFs = new JobConf(jc); + HiveConf.setVar(jobCloneUsingLocalFs, HiveConf.ConfVars.HADOOPFS, + Utilities.HADOOP_LOCAL_FS); } - this.jobCloneUsingLocalFs = new JobConf(jc); - HiveConf.setVar(jobCloneUsingLocalFs, HiveConf.ConfVars.HADOOPFS, - Utilities.HADOOP_LOCAL_FS); + return this.jobCloneUsingLocalFs; } + public RowContainer(int blockSize, SerDe sd, ObjectInspector oi, Configuration jc) throws HiveException { this(blockSize, jc); @@ -202,23 +206,24 @@ this.readBlockSize = this.addCursor; this.currentReadBlock = this.currentWriteBlock; } else { + JobConf localJc = getLocalFSJobConfClone(jc); if (inputSplits == null) { if (this.inputFormat == null) { inputFormat = (InputFormat) ReflectionUtils .newInstance(tblDesc.getInputFileFormatClass(), - jobCloneUsingLocalFs); + localJc); } - HiveConf.setVar(jobCloneUsingLocalFs, + HiveConf.setVar(localJc, HiveConf.ConfVars.HADOOPMAPREDINPUTDIR, org.apache.hadoop.util.StringUtils.escapeString(parentFile .getAbsolutePath())); - inputSplits = inputFormat.getSplits(jobCloneUsingLocalFs, 1); + inputSplits = inputFormat.getSplits(localJc, 1); acutalSplitNum = inputSplits.length; } currentSplitPointer = 0; rr = inputFormat.getRecordReader(inputSplits[currentSplitPointer], - jobCloneUsingLocalFs, Reporter.NULL); + localJc, Reporter.NULL); currentSplitPointer++; nextBlock(); @@ -315,6 +320,7 @@ HiveOutputFormat hiveOutputFormat = tblDesc .getOutputFileFormatClass().newInstance(); tempOutPath = new Path(tmpFile.toString()); + JobConf localJc = getLocalFSJobConfClone(jc); rw = HiveFileFormatUtils.getRecordWriter(this.jobCloneUsingLocalFs, hiveOutputFormat, serde.getSerializedClass(), false, tblDesc .getProperties(), tempOutPath); @@ -389,6 +395,7 @@ } if (nextSplit && this.currentSplitPointer < this.acutalSplitNum) { + JobConf localJc = getLocalFSJobConfClone(jc); // open record reader to read next split rr = inputFormat.getRecordReader(inputSplits[currentSplitPointer], jobCloneUsingLocalFs, Reporter.NULL); Index: ql/src/java/org/apache/hadoop/hive/ql/io/IOContext.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/io/IOContext.java (revision 1023076) +++ ql/src/java/org/apache/hadoop/hive/ql/io/IOContext.java (working copy) @@ -29,12 +29,11 @@ */ public class IOContext { - private static ThreadLocal threadLocal = new ThreadLocal(); - static { - if (threadLocal.get() == null) { - threadLocal.set(new IOContext()); - } - } + + private static ThreadLocal threadLocal = new ThreadLocal(){ + @Override + protected synchronized IOContext initialValue() { return new IOContext(); } + }; public static IOContext get() { return IOContext.threadLocal.get(); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java (revision 1023076) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/GenMRSkewJoinProcessor.java (working copy) @@ -247,6 +247,7 @@ for (int k = 0; k < tags.length; k++) { Operator ts = OperatorFactory.get( TableScanDesc.class, (RowSchema) null); + ((TableScanOperator)ts).setTableDesc(tableDescList.get((byte)k)); parentOps[k] = ts; } Operator tblScan_op = parentOps[i]; @@ -256,8 +257,14 @@ aliases.add(alias); String bigKeyDirPath = bigKeysDirMap.get(src); newPlan.getPathToAliases().put(bigKeyDirPath, aliases); + + + + newPlan.getAliasToWork().put(alias, tblScan_op); PartitionDesc part = new PartitionDesc(tableDescList.get(src), null); + + newPlan.getPathToPartitionInfo().put(bigKeyDirPath, part); newPlan.getAliasToPartnInfo().put(alias, part); Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LocalMapJoinProcFactory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LocalMapJoinProcFactory.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LocalMapJoinProcFactory.java (revision 0) @@ -0,0 +1,174 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.physical; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.List; +import java.util.Stack; + +import org.apache.hadoop.hive.ql.exec.JDBMDummyOperator; +import org.apache.hadoop.hive.ql.exec.JDBMSinkOperator; +import org.apache.hadoop.hive.ql.exec.MapJoinOperator; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.OperatorFactory; +import org.apache.hadoop.hive.ql.exec.RowSchema; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.optimizer.physical.MapJoinResolver.LocalMapJoinProcCtx; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.JDBMDummyDesc; +import org.apache.hadoop.hive.ql.plan.JDBMSinkDesc; +import org.apache.hadoop.hive.ql.plan.PlanUtils; +import org.apache.hadoop.hive.ql.plan.TableDesc; + +/** + * Node processor factory for skew join resolver. + */ +public final class LocalMapJoinProcFactory { + + + + public static NodeProcessor getJoinProc() { + return new LocalMapJoinProcessor(); + } + public static NodeProcessor getMapJoinMapJoinProc() { + return new MapJoinMapJoinProc(); + } + public static NodeProcessor getDefaultProc() { + return new NodeProcessor() { + @Override + public Object process(Node nd, Stack stack, + NodeProcessorCtx procCtx, Object... nodeOutputs) + throws SemanticException { + return null; + } + }; + } + + /** + * LocalMapJoinProcessor. + * + */ + public static class LocalMapJoinProcessor implements NodeProcessor { + public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, + Object... nodeOutputs) throws SemanticException { + LocalMapJoinProcCtx context = (LocalMapJoinProcCtx) ctx; + + if(!nd.getName().equals("MAPJOIN")){ + return null; + } + MapJoinOperator mapJoinOp = (MapJoinOperator) nd; + + //create an new operator: JDBMSinkOperator + JDBMSinkDesc jdbmSinkDesc = new JDBMSinkDesc(mapJoinOp.getConf()); + JDBMSinkOperator jdbmSinkOp =(JDBMSinkOperator)OperatorFactory.get(jdbmSinkDesc); + + + //get the last operator for processing big tables + int bigTable = mapJoinOp.getConf().getPosBigTable(); + Byte[] order = mapJoinOp.getConf().getTagOrder(); + int bigTableAlias=(int)order[bigTable]; + + Operator bigOp = mapJoinOp.getParentOperators().get(bigTable); + + //the parent ops for jdbmSinkOp + List> smallTablesParentOp= new ArrayList>(); + + List> dummyOperators= new ArrayList>(); + //get all parents + List > parentsOp = mapJoinOp.getParentOperators(); + for(int i = 0; i parent = parentsOp.get(i); + //let jdbmOp be the child of this parent + parent.replaceChild(mapJoinOp, jdbmSinkOp); + //keep the parent id correct + smallTablesParentOp.add(parent); + + //create an new operator: JDBMDummyOpeator, which share the table desc + JDBMDummyDesc desc = new JDBMDummyDesc(); + JDBMDummyOperator dummyOp =(JDBMDummyOperator)OperatorFactory.get(desc); + TableDesc tbl; + + if(parent.getSchema()==null){ + if(parent instanceof TableScanOperator ){ + tbl = ((TableScanOperator)parent).getTableDesc(); + }else{ + throw new SemanticException(); + } + }else{ + //get parent schema + RowSchema rowSchema = parent.getSchema(); + tbl = PlanUtils.getIntermediateFileTableDesc(PlanUtils + .getFieldSchemasFromRowSchema(rowSchema, "")); + } + + + dummyOp.getConf().setTbl(tbl); + + //let the dummy op be the parent of mapjoin op + mapJoinOp.replaceParent(parent, dummyOp); + List> dummyChildren = new ArrayList>(); + dummyChildren.add(mapJoinOp); + dummyOp.setChildOperators(dummyChildren); + + //add this dummy op to the dummp operator list + dummyOperators.add(dummyOp); + + } + + jdbmSinkOp.setParentOperators(smallTablesParentOp); + for(Operator op: dummyOperators){ + context.addDummyParentOp(op); + } + return null; + } + + } + + /** + * LocalMapJoinProcessor. + * + */ + public static class MapJoinMapJoinProc implements NodeProcessor { + public Object process(Node nd, Stack stack, NodeProcessorCtx ctx, + Object... nodeOutputs) throws SemanticException { + LocalMapJoinProcCtx context = (LocalMapJoinProcCtx) ctx; + if(!nd.getName().equals("MAPJOIN")){ + return null; + } + System.out.println("Mapjoin * MapJoin"); + + return null; + } + } + + + private LocalMapJoinProcFactory() { + // prevent instantiation + } + } + Property changes on: ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/LocalMapJoinProcFactory.java ___________________________________________________________________ Added: svn:executable + * Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MapJoinResolver.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MapJoinResolver.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MapJoinResolver.java (revision 0) @@ -0,0 +1,287 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.optimizer.physical; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Stack; + +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.ql.Context; +import org.apache.hadoop.hive.ql.exec.ConditionalTask; +import org.apache.hadoop.hive.ql.exec.MapredLocalTask; +import org.apache.hadoop.hive.ql.exec.Operator; +import org.apache.hadoop.hive.ql.exec.Task; +import org.apache.hadoop.hive.ql.exec.TaskFactory; +import org.apache.hadoop.hive.ql.lib.DefaultGraphWalker; +import org.apache.hadoop.hive.ql.lib.DefaultRuleDispatcher; +import org.apache.hadoop.hive.ql.lib.Dispatcher; +import org.apache.hadoop.hive.ql.lib.GraphWalker; +import org.apache.hadoop.hive.ql.lib.Node; +import org.apache.hadoop.hive.ql.lib.NodeProcessor; +import org.apache.hadoop.hive.ql.lib.NodeProcessorCtx; +import org.apache.hadoop.hive.ql.lib.Rule; +import org.apache.hadoop.hive.ql.lib.RuleRegExp; +import org.apache.hadoop.hive.ql.parse.ParseContext; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.ConditionalWork; +import org.apache.hadoop.hive.ql.plan.MapredLocalWork; +import org.apache.hadoop.hive.ql.plan.MapredWork; +import org.apache.hadoop.hive.ql.plan.ConditionalResolverSkewJoin.ConditionalResolverSkewJoinCtx; + +/** + * An implementation of PhysicalPlanResolver. It iterator each MapRedTask to see whether the task has a local map work + * if it has, it will move the local work to a new local map join task. Then it will make this new generated task depends on + * current task's parent task and make current task depends on this new generated task. + */ +public class MapJoinResolver implements PhysicalPlanResolver { + @Override + public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException { + + //create dispatcher and graph walker + Dispatcher disp = new LocalMapJoinTaskDispatcher(pctx); + GraphWalker ogw = new DefaultGraphWalker(disp); + + //get all the tasks nodes from root task + ArrayList topNodes = new ArrayList(); + topNodes.addAll(pctx.rootTasks); + + //begin to walk through the task tree. + ogw.startWalking(topNodes, null); + return pctx; + } + + /** + * Iterator each tasks. If this task has a local work,create a new task for this local work, named MapredLocalTask. + * then make this new generated task depends on current task's parent task, and make current task + * depends on this new generated task + */ + class LocalMapJoinTaskDispatcher implements Dispatcher { + + private PhysicalContext physicalContext; + + public LocalMapJoinTaskDispatcher(PhysicalContext context) { + super(); + physicalContext = context; + } + + private void processCurrentTask(Task currTask, + ConditionalTask conditionalTask) throws SemanticException{ + + + //get current mapred work and its local work + MapredWork mapredWork = (MapredWork) currTask.getWork(); + MapredLocalWork localwork = mapredWork.getMapLocalWork(); + + + if(localwork != null){ + //get the context info and set up the shared tmp URI + Context ctx = physicalContext.getContext(); + String tmpFileURI = ctx.getLocalTmpFileURI()+Path.SEPARATOR+"JDBM-"+currTask.getId(); + localwork.setTmpFileURI(tmpFileURI); + mapredWork.setTmpHDFSFileURI(ctx.getMRTmpFileURI()+Path.SEPARATOR+"JDBM-"+currTask.getId()); + //create a task for this local work; right now, this local work is shared + //by the original MapredTask and this new generated MapredLocalTask. + MapredLocalTask localTask = (MapredLocalTask) TaskFactory.get(localwork, + physicalContext.getParseContext().getConf()); + + + //replace the map join operator to local_map_join operator in the operator tree + //and return all the dummy parent + List> dummyOps= adjustLocalTask(localTask); + + //create new local work and setup the dummy ops + MapredLocalWork newLocalWork = new MapredLocalWork(); + newLocalWork.setDummyParentOp(dummyOps); + newLocalWork.setTmpFileURI(tmpFileURI); + newLocalWork.setInputFileChangeSensitive(localwork.getInputFileChangeSensitive()); + mapredWork.setMapLocalWork(newLocalWork); + + //get all parent tasks + List> parentTasks = currTask.getParentTasks(); + currTask.setParentTasks(null); + if (parentTasks != null) { + + for (Task tsk : parentTasks) { + //make new generated task depends on all the parent tasks of current task. + tsk.addDependentTask(localTask); + //remove the current task from its original parent task's dependent task + tsk.removeDependentTask(currTask); + } + + }else{ + //in this case, current task is in the root tasks + //so add this new task into root tasks and remove the current task from root tasks + if(conditionalTask== null){ + physicalContext.addToRootTask(localTask); + physicalContext.removeFromRootTask(currTask); + }else{ + //set list task + List> listTask = conditionalTask.getListTasks(); + ConditionalWork conditionalWork= conditionalTask.getWork(); + int index = listTask.indexOf(currTask); + listTask.set(index, localTask); + + //set list work + List listWork = (List)conditionalWork.getListWorks(); + index = listWork.indexOf(mapredWork); + listWork.set(index,(Serializable)localwork); + conditionalWork.setListWorks(listWork); + + //get bigKeysDirToTaskMap + ConditionalResolverSkewJoinCtx context = + (ConditionalResolverSkewJoinCtx) conditionalTask.getResolverCtx(); + HashMap> bigKeysDirToTaskMap = + context.getDirToTaskMap(); + + //to avoid concurrent modify the hashmap + HashMap> newbigKeysDirToTaskMap = + new HashMap>(); + + + //reset the resolver + for(Map.Entry> entry: bigKeysDirToTaskMap.entrySet()){ + Task task = entry.getValue(); + String key = entry.getKey(); + + if(task.equals(currTask)){ + newbigKeysDirToTaskMap.put(key, localTask); + }else{ + newbigKeysDirToTaskMap.put(key, task); + } + } + + context.setDirToTaskMap(newbigKeysDirToTaskMap); + conditionalTask.setResolverCtx(context); + } + } + + //make current task depends on this new generated localMapJoinTask + //now localTask is the parent task of the current task + localTask.addDependentTask(currTask); + + } + + } + + @Override + public Object dispatch(Node nd, Stack stack, Object... nodeOutputs) + throws SemanticException { + Task currTask = (Task) nd; + //not map reduce task or not conditional task, just skip + if(currTask.isMapRedTask() ){ + if(currTask instanceof ConditionalTask){ + //get the list of task + List> taskList = ((ConditionalTask) currTask).getListTasks(); + for(Task tsk : taskList){ + if(tsk.isMapRedTask()){ + this.processCurrentTask(tsk,((ConditionalTask) currTask)); + } + } + }else{ + this.processCurrentTask(currTask,null); + } + } + return null; + } + + //replace the map join operator to local_map_join operator in the operator tree + private List> adjustLocalTask(MapredLocalTask task) throws SemanticException { + + LocalMapJoinProcCtx localMapJoinProcCtx = new LocalMapJoinProcCtx(task, + physicalContext.getParseContext()); + + Map opRules = new LinkedHashMap(); + //opRules.put(new RuleRegExp("R1", "MAPJOIN%.*MAPJOIN%"), + //LocalMapJoinProcFactory.getMapJoinMapJoinProc()); + opRules.put(new RuleRegExp("R1", "MAPJOIN%"), LocalMapJoinProcFactory.getJoinProc()); + + // The dispatcher fires the processor corresponding to the closest + // matching rule and passes the context along + Dispatcher disp = new DefaultRuleDispatcher(LocalMapJoinProcFactory + .getDefaultProc(), opRules, localMapJoinProcCtx); + GraphWalker ogw = new DefaultGraphWalker(disp); + + // iterator the reducer operator tree + ArrayList topNodes = new ArrayList(); + + topNodes.addAll(task.getWork().getAliasToWork().values()); + ogw.startWalking(topNodes, null); + + return localMapJoinProcCtx.getDummyParentOp(); + + } + + public PhysicalContext getPhysicalContext() { + return physicalContext; + } + + public void setPhysicalContext(PhysicalContext physicalContext) { + this.physicalContext = physicalContext; + } + } + /** + * A container of current task and parse context. + */ + public static class LocalMapJoinProcCtx implements NodeProcessorCtx { + private Task currentTask; + private ParseContext parseCtx; + private List> dummyParentOp = null; + + public LocalMapJoinProcCtx(Task task, + ParseContext parseCtx) { + currentTask = task; + this.parseCtx = parseCtx; + dummyParentOp = new ArrayList>(); + } + + public Task getCurrentTask() { + return currentTask; + } + + public void setCurrentTask(Task currentTask) { + this.currentTask = currentTask; + } + + public ParseContext getParseCtx() { + return parseCtx; + } + + public void setParseCtx(ParseContext parseCtx) { + this.parseCtx = parseCtx; + } + + public void setDummyParentOp(List> op){ + this.dummyParentOp=op; + } + + public List> getDummyParentOp(){ + return this.dummyParentOp; + } + public void addDummyParentOp(Operator op){ + this.dummyParentOp.add(op); + } + + } +} + Property changes on: ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/MapJoinResolver.java ___________________________________________________________________ Added: svn:executable + * Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalContext.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalContext.java (revision 1023076) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalContext.java (working copy) @@ -72,4 +72,11 @@ this.context = context; } + public void addToRootTask(Task tsk){ + rootTasks.add(tsk); + } + public void removeFromRootTask(Task tsk){ + rootTasks.remove(tsk); + } + } Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java (revision 1023076) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/PhysicalOptimizer.java (working copy) @@ -41,7 +41,7 @@ /** * create the list of physical plan resolvers. - * + * * @param hiveConf */ private void initialize(HiveConf hiveConf) { @@ -49,11 +49,12 @@ if (hiveConf.getBoolVar(HiveConf.ConfVars.HIVESKEWJOIN)) { resolvers.add(new SkewJoinResolver()); } + resolvers.add(new MapJoinResolver()); } /** * invoke all the resolvers one-by-one, and alter the physical plan. - * + * * @return PhysicalContext * @throws HiveException */ Index: ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SkewJoinResolver.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SkewJoinResolver.java (revision 1023076) +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/SkewJoinResolver.java (working copy) @@ -52,7 +52,7 @@ ArrayList topNodes = new ArrayList(); topNodes.addAll(pctx.rootTasks); ogw.startWalking(topNodes, null); - return null; + return pctx; } /** Index: ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverSkewJoin.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverSkewJoin.java (revision 1023076) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ConditionalResolverSkewJoin.java (working copy) @@ -56,7 +56,7 @@ */ public ConditionalResolverSkewJoinCtx() { } - + public ConditionalResolverSkewJoinCtx( HashMap> dirToTaskMap) { super(); @@ -95,7 +95,16 @@ FileSystem inpFs = dirPath.getFileSystem(conf); FileStatus[] fstatus = inpFs.listStatus(dirPath); if (fstatus.length > 0) { - resTsks.add(entry.getValue()); + Task task = entry.getValue(); + List> parentOps = task.getParentTasks(); + if(parentOps!=null){ + for(Task parentOp: parentOps){ + //right now only one parent + resTsks.add(parentOp); + } + }else{ + resTsks.add(task); + } } } } catch (IOException e) { Index: ql/src/java/org/apache/hadoop/hive/ql/plan/JDBMDummyDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/JDBMDummyDesc.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/JDBMDummyDesc.java (revision 0) @@ -0,0 +1,38 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.io.Serializable; +/** + * JDBM Dummy Descriptor implementation. + * + */ +@Explain(displayName = "JDBMDummy Operator") +public class JDBMDummyDesc implements Serializable { + private TableDesc tbl; + + public TableDesc getTbl() { + return tbl; + } + + public void setTbl(TableDesc tbl) { + this.tbl = tbl; + } + +} Property changes on: ql/src/java/org/apache/hadoop/hive/ql/plan/JDBMDummyDesc.java ___________________________________________________________________ Added: svn:executable + * Index: ql/src/java/org/apache/hadoop/hive/ql/plan/JDBMSinkDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/JDBMSinkDesc.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/JDBMSinkDesc.java (revision 0) @@ -0,0 +1,355 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.plan; + +import java.io.Serializable; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.Map.Entry; + +/** + * Map Join operator Descriptor implementation. + * + */ +@Explain(displayName = "JDBM Sink Operator") +public class JDBMSinkDesc extends JoinDesc implements Serializable { + private static final long serialVersionUID = 1L; + + + // used to handle skew join + private boolean handleSkewJoin = false; + private int skewKeyDefinition = -1; + private Map bigKeysDirMap; + private Map> smallKeysDirMap; + private Map skewKeysValuesTables; + + // alias to key mapping + private Map> exprs; + + // alias to filter mapping + private Map> filters; + + // used for create joinOutputObjectInspector + protected List outputColumnNames; + + // key:column output name, value:tag + private transient Map reversedExprs; + + // No outer join involved + protected boolean noOuterJoin; + + protected JoinCondDesc[] conds; + + protected Byte[] tagOrder; + private TableDesc keyTableDesc; + + + private Map> keys; + private TableDesc keyTblDesc; + private List valueTblDescs; + + private int posBigTable; + + private Map> retainList; + + private transient String bigTableAlias; + + private LinkedHashMap>> aliasBucketFileNameMapping; + private LinkedHashMap bucketFileNameMapping; + + public JDBMSinkDesc() { + bucketFileNameMapping = new LinkedHashMap(); + } + + public JDBMSinkDesc(MapJoinDesc clone) { + this.bigKeysDirMap = clone.getBigKeysDirMap(); + this.conds = clone.getConds(); + this.exprs= clone.getExprs(); + this.handleSkewJoin = clone.getHandleSkewJoin(); + this.keyTableDesc = clone.getKeyTableDesc(); + this.noOuterJoin = clone.getNoOuterJoin(); + this.outputColumnNames = clone.getOutputColumnNames(); + this.reversedExprs = clone.getReversedExprs(); + this.skewKeyDefinition = clone.getSkewKeyDefinition(); + this.skewKeysValuesTables = clone.getSkewKeysValuesTables(); + this.smallKeysDirMap = clone.getSmallKeysDirMap(); + this.tagOrder = clone.getTagOrder(); + this.filters = clone.getFilters(); + + this.keys = clone.getKeys(); + this.keyTblDesc = clone.getKeyTblDesc(); + this.valueTblDescs = clone.getValueTblDescs(); + this.posBigTable = clone.getPosBigTable(); + this.retainList = clone.getRetainList(); + this.bigTableAlias = clone.getBigTableAlias(); + this.aliasBucketFileNameMapping = clone.getAliasBucketFileNameMapping(); + this.bucketFileNameMapping = clone.getBucketFileNameMapping(); + } + + + private void initRetainExprList() { + retainList = new HashMap>(); + Set>> set = exprs.entrySet(); + Iterator>> setIter = set.iterator(); + while (setIter.hasNext()) { + Entry> current = setIter.next(); + List list = new ArrayList(); + for (int i = 0; i < current.getValue().size(); i++) { + list.add(i); + } + retainList.put(current.getKey(), list); + } + } + + public boolean isHandleSkewJoin() { + return handleSkewJoin; + } + + @Override + public void setHandleSkewJoin(boolean handleSkewJoin) { + this.handleSkewJoin = handleSkewJoin; + } + + @Override + public int getSkewKeyDefinition() { + return skewKeyDefinition; + } + + @Override + public void setSkewKeyDefinition(int skewKeyDefinition) { + this.skewKeyDefinition = skewKeyDefinition; + } + + @Override + public Map getBigKeysDirMap() { + return bigKeysDirMap; + } + + @Override + public void setBigKeysDirMap(Map bigKeysDirMap) { + this.bigKeysDirMap = bigKeysDirMap; + } + + @Override + public Map> getSmallKeysDirMap() { + return smallKeysDirMap; + } + + @Override + public void setSmallKeysDirMap(Map> smallKeysDirMap) { + this.smallKeysDirMap = smallKeysDirMap; + } + + @Override + public Map getSkewKeysValuesTables() { + return skewKeysValuesTables; + } + + @Override + public void setSkewKeysValuesTables(Map skewKeysValuesTables) { + this.skewKeysValuesTables = skewKeysValuesTables; + } + + @Override + public Map> getExprs() { + return exprs; + } + + @Override + public void setExprs(Map> exprs) { + this.exprs = exprs; + } + + @Override + public Map> getFilters() { + return filters; + } + + @Override + public void setFilters(Map> filters) { + this.filters = filters; + } + + @Override + public List getOutputColumnNames() { + return outputColumnNames; + } + + @Override + public void setOutputColumnNames(List outputColumnNames) { + this.outputColumnNames = outputColumnNames; + } + + @Override + public Map getReversedExprs() { + return reversedExprs; + } + + @Override + public void setReversedExprs(Map reversedExprs) { + this.reversedExprs = reversedExprs; + } + + @Override + public boolean isNoOuterJoin() { + return noOuterJoin; + } + + @Override + public void setNoOuterJoin(boolean noOuterJoin) { + this.noOuterJoin = noOuterJoin; + } + + @Override + public JoinCondDesc[] getConds() { + return conds; + } + + @Override + public void setConds(JoinCondDesc[] conds) { + this.conds = conds; + } + + @Override + public Byte[] getTagOrder() { + return tagOrder; + } + + @Override + public void setTagOrder(Byte[] tagOrder) { + this.tagOrder = tagOrder; + } + + @Override + public TableDesc getKeyTableDesc() { + return keyTableDesc; + } + + @Override + public void setKeyTableDesc(TableDesc keyTableDesc) { + this.keyTableDesc = keyTableDesc; + } + + + public Map> getRetainList() { + return retainList; + } + + public void setRetainList(Map> retainList) { + this.retainList = retainList; + } + + /** + * @return the keys + */ + @Explain(displayName = "keys") + public Map> getKeys() { + return keys; + } + + /** + * @param keys + * the keys to set + */ + public void setKeys(Map> keys) { + this.keys = keys; + } + + /** + * @return the position of the big table not in memory + */ + @Explain(displayName = "Position of Big Table") + public int getPosBigTable() { + return posBigTable; + } + + /** + * @param posBigTable + * the position of the big table not in memory + */ + public void setPosBigTable(int posBigTable) { + this.posBigTable = posBigTable; + } + + /** + * @return the keyTblDesc + */ + public TableDesc getKeyTblDesc() { + return keyTblDesc; + } + + /** + * @param keyTblDesc + * the keyTblDesc to set + */ + public void setKeyTblDesc(TableDesc keyTblDesc) { + this.keyTblDesc = keyTblDesc; + } + + /** + * @return the valueTblDescs + */ + public List getValueTblDescs() { + return valueTblDescs; + } + + /** + * @param valueTblDescs + * the valueTblDescs to set + */ + public void setValueTblDescs(List valueTblDescs) { + this.valueTblDescs = valueTblDescs; + } + + /** + * @return bigTableAlias + */ + public String getBigTableAlias() { + return bigTableAlias; + } + + /** + * @param bigTableAlias + */ + public void setBigTableAlias(String bigTableAlias) { + this.bigTableAlias = bigTableAlias; + } + + public LinkedHashMap>> getAliasBucketFileNameMapping() { + return aliasBucketFileNameMapping; + } + + public void setAliasBucketFileNameMapping( + LinkedHashMap>> aliasBucketFileNameMapping) { + this.aliasBucketFileNameMapping = aliasBucketFileNameMapping; + } + + public LinkedHashMap getBucketFileNameMapping() { + return bucketFileNameMapping; + } + + public void setBucketFileNameMapping(LinkedHashMap bucketFileNameMapping) { + this.bucketFileNameMapping = bucketFileNameMapping; + } +} Property changes on: ql/src/java/org/apache/hadoop/hive/ql/plan/JDBMSinkDesc.java ___________________________________________________________________ Added: svn:executable + * Index: ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java (revision 1023076) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapJoinDesc.java (working copy) @@ -25,14 +25,14 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; +import java.util.Set; import java.util.Map.Entry; -import java.util.Set; /** * Map Join operator Descriptor implementation. * */ -@Explain(displayName = "Common Join Operator") +@Explain(displayName = "Map Join Operator") public class MapJoinDesc extends JoinDesc implements Serializable { private static final long serialVersionUID = 1L; Index: ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java (revision 1023076) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapredLocalWork.java (working copy) @@ -20,15 +20,15 @@ import java.io.File; import java.io.Serializable; -import java.net.URI; import java.util.ArrayList; import java.util.Iterator; import java.util.LinkedHashMap; +import java.util.List; import java.util.Map.Entry; +import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.ql.exec.BucketMatcher; import org.apache.hadoop.hive.ql.exec.Operator; -import org.apache.hadoop.fs.Path; /** * MapredLocalWork. @@ -42,8 +42,13 @@ private LinkedHashMap aliasToFetchWork; private boolean inputFileChangeSensitive; private BucketMapJoinContext bucketMapjoinContext; + private String tmpFileURI; + + + private List> dummyParentOp ; public MapredLocalWork() { + } public MapredLocalWork( @@ -51,13 +56,32 @@ final LinkedHashMap aliasToFetchWork) { this.aliasToWork = aliasToWork; this.aliasToFetchWork = aliasToFetchWork; + } + public MapredLocalWork(MapredLocalWork clone){ + this.tmpFileURI = clone.tmpFileURI; + this.inputFileChangeSensitive=clone.inputFileChangeSensitive; + + } + + + public void setDummyParentOp(List> op){ + this.dummyParentOp=op; + } + + + public List> getDummyParentOp(){ + return this.dummyParentOp; + } + + @Explain(displayName = "Alias -> Map Local Operator Tree") public LinkedHashMap> getAliasToWork() { return aliasToWork; } + public void setAliasToWork( final LinkedHashMap> aliasToWork) { this.aliasToWork = aliasToWork; @@ -88,6 +112,8 @@ this.inputFileChangeSensitive = inputFileChangeSensitive; } + + public void deriveExplainAttributes() { if (bucketMapjoinContext != null) { bucketMapjoinContext.deriveBucketMapJoinMapping(); @@ -110,6 +136,14 @@ this.bucketMapjoinContext = bucketMapjoinContext; } + public void setTmpFileURI(String tmpFileURI) { + this.tmpFileURI = tmpFileURI; + } + + public String getTmpFileURI() { + return tmpFileURI; + } + public static class BucketMapJoinContext implements Serializable { private static final long serialVersionUID = 1L; @@ -198,11 +232,13 @@ this.aliasBucketFileNameMapping = aliasBucketFileNameMapping; } + @Override public String toString() { - if (aliasBucketFileNameMapping != null) + if (aliasBucketFileNameMapping != null) { return "Mapping:" + aliasBucketFileNameMapping.toString(); - else + } else { return ""; + } } @Explain(displayName = "Alias Bucket Base File Name Mapping", normalExplain = false) Index: ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java (revision 1023076) +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java (working copy) @@ -67,6 +67,8 @@ private String inputformat; private boolean gatheringStats; + private String tmpHDFSFileURI; + public MapredWork() { aliasToPartnInfo = new LinkedHashMap(); } @@ -330,4 +332,12 @@ public boolean isGatheringStats() { return this.gatheringStats; } + + public String getTmpHDFSFileURI() { + return tmpHDFSFileURI; + } + + public void setTmpHDFSFileURI(String tmpHDFSFileURI) { + this.tmpHDFSFileURI = tmpHDFSFileURI; + } } Index: ql/src/java/org/apache/hadoop/hive/ql/util/JoinUtil.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/JoinUtil.java (revision 0) +++ ql/src/java/org/apache/hadoop/hive/ql/util/JoinUtil.java (revision 0) @@ -0,0 +1,305 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.util; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.Map.Entry; + +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluator; +import org.apache.hadoop.hive.ql.exec.ExprNodeEvaluatorFactory; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.exec.persistence.RowContainer; +import org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; +import org.apache.hadoop.hive.ql.plan.JoinDesc; +import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.serde2.SerDe; +import org.apache.hadoop.hive.serde2.SerDeException; +import org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils.ObjectInspectorCopyOption; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.io.BooleanWritable; +import org.apache.hadoop.mapred.SequenceFileInputFormat; +import org.apache.hadoop.util.ReflectionUtils; + +public class JoinUtil { + + public static HashMap> getObjectInspectorsFromEvaluators( + Map> exprEntries, + ObjectInspector[] inputObjInspector, + int posBigTableAlias) throws HiveException { + HashMap> result = new HashMap>(); + for (Entry> exprEntry : exprEntries + .entrySet()) { + Byte alias = exprEntry.getKey(); + //get big table + if(alias == (byte) posBigTableAlias){ + //skip the big tables + continue; + } + + List exprList = exprEntry.getValue(); + ArrayList fieldOIList = new ArrayList(); + for (int i = 0; i < exprList.size(); i++) { + fieldOIList.add(exprList.get(i).initialize(inputObjInspector[alias])); + } + result.put(alias, fieldOIList); + } + return result; + } + + + public static HashMap> getStandardObjectInspectors( + Map> aliasToObjectInspectors, + int posBigTableAlias) { + HashMap> result = new HashMap>(); + for (Entry> oiEntry : aliasToObjectInspectors + .entrySet()) { + Byte alias = oiEntry.getKey(); + + //get big table + if(alias == (byte) posBigTableAlias ){ + //skip the big tables + continue; + } + + List oiList = oiEntry.getValue(); + ArrayList fieldOIList = new ArrayList( + oiList.size()); + for (int i = 0; i < oiList.size(); i++) { + fieldOIList.add(ObjectInspectorUtils.getStandardObjectInspector(oiList + .get(i), ObjectInspectorCopyOption.WRITABLE)); + } + result.put(alias, fieldOIList); + } + return result; + + } + public static int populateJoinKeyValue(Map> outMap, + Map> inputMap, + Byte[] order, + int posBigTableAlias) { + + int total = 0; + + Iterator>> entryIter = inputMap + .entrySet().iterator(); + while (entryIter.hasNext()) { + Map.Entry> e = entryIter.next(); + Byte key = order[e.getKey()]; + + List valueFields = new ArrayList(); + + List expr = e.getValue(); + int sz = expr.size(); + total += sz; + + for (int j = 0; j < sz; j++) { + if(key == (byte) posBigTableAlias){ + valueFields.add(null); + }else{ + valueFields.add(ExprNodeEvaluatorFactory.get(expr.get(j))); + } + } + + outMap.put(key, valueFields); + } + + return total; + } + + + /** + * Return the key as a standard object. StandardObject can be inspected by a + * standard ObjectInspector. + */ + public static ArrayList computeKeys(Object row, + List keyFields, List keyFieldsOI) + throws HiveException { + + // Compute the keys + ArrayList nr = new ArrayList(keyFields.size()); + for (int i = 0; i < keyFields.size(); i++) { + + nr.add(ObjectInspectorUtils.copyToStandardObject(keyFields.get(i) + .evaluate(row), keyFieldsOI.get(i), + ObjectInspectorCopyOption.WRITABLE)); + } + + return nr; + } + + /** + * Return the value as a standard object. StandardObject can be inspected by a + * standard ObjectInspector. + */ + public static ArrayList computeValues(Object row, + List valueFields, List valueFieldsOI, + List filters, List filtersOI, + boolean noOuterJoin) throws HiveException { + + // Compute the values + ArrayList nr = new ArrayList(valueFields.size()); + for (int i = 0; i < valueFields.size(); i++) { + nr.add(ObjectInspectorUtils.copyToStandardObject(valueFields.get(i) + .evaluate(row), valueFieldsOI.get(i), + ObjectInspectorCopyOption.WRITABLE)); + } + if (!noOuterJoin) { + // add whether the row is filtered or not. + nr.add(new BooleanWritable(isFiltered(row, filters, filtersOI))); + } + + return nr; + } + /** + * Returns true if the row does not pass through filters. + */ + protected static Boolean isFiltered(Object row, + List filters, List ois) + throws HiveException { + // apply join filters on the row. + Boolean ret = false; + for (int j = 0; j < filters.size(); j++) { + Object condition = filters.get(j).evaluate(row); + ret = (Boolean) ((PrimitiveObjectInspector) + ois.get(j)).getPrimitiveJavaObject(condition); + if (ret == null || !ret) { + return true; + } + } + return false; + } + + public static TableDesc getSpillTableDesc(Byte alias, + Map spillTableDesc,JoinDesc conf, + boolean noOuterJoin) { + if (spillTableDesc == null || spillTableDesc.size() == 0) { + spillTableDesc = initSpillTables(conf,noOuterJoin); + } + return spillTableDesc.get(alias); + } + + public static Map getSpillTableDesc( + Map spillTableDesc,JoinDesc conf, + boolean noOuterJoin) { + if (spillTableDesc == null) { + spillTableDesc = initSpillTables(conf,noOuterJoin); + } + return spillTableDesc; + } + + public static SerDe getSpillSerDe(byte alias, + Map spillTableDesc,JoinDesc conf, + boolean noOuterJoin) { + TableDesc desc = getSpillTableDesc(alias,spillTableDesc,conf, noOuterJoin); + if (desc == null) { + return null; + } + SerDe sd = (SerDe) ReflectionUtils.newInstance(desc.getDeserializerClass(), + null); + try { + sd.initialize(null, desc.getProperties()); + } catch (SerDeException e) { + e.printStackTrace(); + return null; + } + return sd; + } + + public static Map initSpillTables(JoinDesc conf,boolean noOuterJoin) { + Map> exprs = conf.getExprs(); + Map spillTableDesc = new HashMap(exprs.size()); + for (int tag = 0; tag < exprs.size(); tag++) { + List valueCols = exprs.get((byte) tag); + int columnSize = valueCols.size(); + StringBuilder colNames = new StringBuilder(); + StringBuilder colTypes = new StringBuilder(); + if (columnSize <= 0) { + continue; + } + for (int k = 0; k < columnSize; k++) { + String newColName = tag + "_VALUE_" + k; // any name, it does not + // matter. + colNames.append(newColName); + colNames.append(','); + colTypes.append(valueCols.get(k).getTypeString()); + colTypes.append(','); + } + if (!noOuterJoin) { + colNames.append("filtered"); + colNames.append(','); + colTypes.append(TypeInfoFactory.booleanTypeInfo.getTypeName()); + colTypes.append(','); + } + // remove the last ',' + colNames.setLength(colNames.length() - 1); + colTypes.setLength(colTypes.length() - 1); + TableDesc tblDesc = new TableDesc(LazyBinarySerDe.class, + SequenceFileInputFormat.class, HiveSequenceFileOutputFormat.class, + Utilities.makeProperties( + org.apache.hadoop.hive.serde.Constants.SERIALIZATION_FORMAT, "" + + Utilities.ctrlaCode, + org.apache.hadoop.hive.serde.Constants.LIST_COLUMNS, colNames + .toString(), + org.apache.hadoop.hive.serde.Constants.LIST_COLUMN_TYPES, + colTypes.toString())); + spillTableDesc.put((byte) tag, tblDesc); + } + return spillTableDesc; + } + + + public static RowContainer getRowContainer(Configuration hconf, + List structFieldObjectInspectors, + Byte alias,int containerSize, Map spillTableDesc, + JoinDesc conf,boolean noOuterJoin) throws HiveException { + + TableDesc tblDesc = JoinUtil.getSpillTableDesc(alias,spillTableDesc,conf, noOuterJoin); + SerDe serde = JoinUtil.getSpillSerDe(alias,spillTableDesc,conf,noOuterJoin); + + if (serde == null) { + containerSize = 1; + } + + RowContainer rc = new RowContainer(containerSize, hconf); + StructObjectInspector rcOI = null; + if (tblDesc != null) { + // arbitrary column names used internally for serializing to spill table + List colNames = Utilities.getColumnNames(tblDesc.getProperties()); + // object inspector for serializing input tuples + rcOI = ObjectInspectorFactory.getStandardStructObjectInspector(colNames, + structFieldObjectInspectors); + } + + rc.setSerDe(serde, rcOI); + rc.setTableDesc(tblDesc); + return rc; + } +} Property changes on: ql/src/java/org/apache/hadoop/hive/ql/util/JoinUtil.java ___________________________________________________________________ Added: svn:executable + * Index: ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/htree/HashDirectory.java =================================================================== --- ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/htree/HashDirectory.java (revision 1023076) +++ ql/src/java/org/apache/hadoop/hive/ql/util/jdbm/htree/HashDirectory.java (working copy) @@ -77,7 +77,7 @@ /** * Hashtable directory page. - * + * * @author Alex Boisvert * @version $Id: HashDirectory.java,v 1.5 2005/06/25 23:12:32 doomdark Exp $ */ @@ -87,24 +87,24 @@ /** * Maximum number of children in a directory. - * + * * (Must be a power of 2 -- if you update this value, you must also update * BIT_SIZE and MAX_DEPTH.) */ - static final int MAX_CHILDREN = 256; + static final int MAX_CHILDREN = 65536; /** * Number of significant bits per directory level. */ - static final int BIT_SIZE = 8; // log2(256) = 8 + static final int BIT_SIZE = 16; // log2(256) = 8 /** * Maximum number of levels (zero-based) - * + * * (4 * 8 bits = 32 bits, which is the size of an "int", and as you know, * hashcodes in Java are "ints") */ - static final int MAX_DEPTH = 3; // 4 levels + static final int MAX_DEPTH = 64; // 4 levels /** * Record ids of children pages. @@ -135,7 +135,7 @@ /** * Construct a HashDirectory - * + * * @param depth * Depth of this directory page. */ @@ -147,7 +147,7 @@ /** * Sets persistence context. This method must be called before any * persistence-related operation. - * + * * @param recman * RecordManager which stores this directory * @param recid @@ -181,7 +181,7 @@ /** * Returns the value which is associated with the given key. Returns * null if there is not association for this key. - * + * * @param key * key whose associated value is to be returned */ @@ -210,7 +210,7 @@ /** * Associates the specified value with the specified key. - * + * * @param key * key with which the specified value is to be assocated. * @param value @@ -288,7 +288,7 @@ /** * Remove the value which is associated with the given key. If the key does * not exist, this method simply ignores the operation. - * + * * @param key * key whose associated value is to be removed * @return object which was associated with the given key, or @@ -437,7 +437,7 @@ /** * Construct an iterator on this directory. - * + * * @param iterateKeys * True if iteration supplies keys, False if iterateKeys supplies * values. @@ -475,7 +475,7 @@ /** * Prepare internal state so we can answer hasMoreElements - * + * * Actually, this code prepares an Enumeration on the next Bucket to * enumerate. If no following bucket is found, the next Enumeration is set * to null. Index: ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java =================================================================== --- ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java (revision 1023076) +++ ql/src/test/org/apache/hadoop/hive/ql/QTestUtil.java (working copy) @@ -28,7 +28,6 @@ import java.io.FileWriter; import java.io.PrintStream; import java.io.Serializable; -import java.net.URI; import java.util.ArrayList; import java.util.Arrays; import java.util.Deque; @@ -40,11 +39,13 @@ import java.util.TreeMap; import java.util.regex.Matcher; import java.util.regex.Pattern; + import junit.framework.Test; import org.apache.hadoop.fs.FileStatus; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hbase.MiniZooKeeperCluster; import org.apache.hadoop.hive.cli.CliDriver; import org.apache.hadoop.hive.cli.CliSessionState; import org.apache.hadoop.hive.conf.HiveConf; @@ -54,6 +55,7 @@ import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.exec.Utilities.StreamPrinter; import org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat; +import org.apache.hadoop.hive.ql.lockmgr.zookeeper.ZooKeeperHiveLockManager; import org.apache.hadoop.hive.ql.metadata.Hive; import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.parse.ASTNode; @@ -72,9 +74,7 @@ import org.apache.hadoop.mapred.SequenceFileOutputFormat; import org.apache.hadoop.mapred.TextInputFormat; import org.apache.thrift.protocol.TBinaryProtocol; -import org.apache.hadoop.hbase.MiniZooKeeperCluster; import org.apache.zookeeper.ZooKeeper; -import org.apache.hadoop.hive.ql.lockmgr.zookeeper.ZooKeeperHiveLockManager; /** * QTestUtil. @@ -83,9 +83,10 @@ public class QTestUtil { private String testWarehouse; - private final String tmpdir = System.getProperty("test.tmp.dir"); + private final String tmpdir= System.getProperty("test.tmp.dir") ; private final Path tmppath = new Path(tmpdir); + private final String testFiles; private final String outDir; private final String logDir; @@ -203,7 +204,7 @@ // set fs.default.name to the uri of mini-dfs conf.setVar(HiveConf.ConfVars.HADOOPFS, dfs.getFileSystem().getUri().toString()); // hive.metastore.warehouse.dir needs to be set relative to the mini-dfs - conf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE, + conf.setVar(HiveConf.ConfVars.METASTOREWAREHOUSE, (new Path(dfs.getFileSystem().getUri().toString(), "/build/ql/test/data/warehouse/")).toString()); conf.setVar(HiveConf.ConfVars.HADOOPJT, "localhost:" + mr.getJobTrackerPort()); Index: ql/src/test/queries/clientpositive/join39.q =================================================================== --- ql/src/test/queries/clientpositive/join39.q (revision 1023076) +++ ql/src/test/queries/clientpositive/join39.q (working copy) @@ -4,6 +4,11 @@ CREATE TABLE dest_j1(key STRING, value STRING, key1 string, val2 STRING) STORED AS TEXTFILE; +explain +INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(y) */ x.key, x.value, y.key, y.value +FROM src x left outer JOIN (select * from src where key <= 100) y ON (x.key = y.key); + INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(y) */ x.key, x.value, y.key, y.value Index: ql/src/test/results/clientpositive/bucketmapjoin1.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin1.q.out (revision 1023076) +++ ql/src/test/results/clientpositive/bucketmapjoin1.q.out (working copy) @@ -84,7 +84,8 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF srcbucket_mapjoin a) (TOK_TABREF srcbucket_mapjoin_part b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB bucketmapjoin_tmp_result)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL b) ds) "2008-04-08")))) STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-7 is a root stage + Stage-1 depends on stages: Stage-7 Stage-5 depends on stages: Stage-1 , consists of Stage-4, Stage-3 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3 @@ -92,6 +93,43 @@ Stage-3 STAGE PLANS: + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + b + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + b + TableScan + alias: b + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (ds = '2008-04-08') + type: boolean + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} {ds} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col5, _col6 + Position of Big Table: 0 + Bucket Mapjoin Context: + Alias Bucket Base File Name Mapping: + b {srcbucket20.txt=[srcbucket20.txt, srcbucket22.txt], srcbucket21.txt=[srcbucket21.txt, srcbucket23.txt]} + Alias Bucket File Name Mapping: + b {pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt=[pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt, pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt], pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt=[pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt, pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt]} + Alias Bucket Output File Name Mapping: + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt 0 + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt 1 + Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: @@ -139,9 +177,9 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-52-14_568_8446001704480162238/-ext-10002 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-45-14_022_599970964789599988/-ext-10002 NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-52-14_568_8446001704480162238/-ext-10000/ + Stats Publishing Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-45-14_022_599970964789599988/-ext-10000/ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -151,12 +189,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504734 + transient_lastDdlTime 1287175513 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -164,96 +202,11 @@ MultiFileSpray: false Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: - expr: (ds = '2008-04-08') - type: boolean - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {value} {ds} - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - outputColumnNames: _col0, _col1, _col5, _col6 - Position of Big Table: 0 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col5 - type: string - expr: _col6 - type: string - outputColumnNames: _col0, _col1, _col5, _col6 - Filter Operator - isSamplingPred: false - predicate: - expr: (_col6 = '2008-04-08') - type: boolean - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-52-14_568_8446001704480162238/-ext-10002 - NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-52-14_568_8446001704480162238/-ext-10000/ - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value1,value2 - columns.types string:string:string - file.inputformat org.apache.hadoop.mapred.TextInputFormat - file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result - name bucketmapjoin_tmp_result - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504734 - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Bucket Mapjoin Context: - Alias Bucket Base File Name Mapping: - b {srcbucket20.txt=[srcbucket20.txt, srcbucket22.txt], srcbucket21.txt=[srcbucket21.txt, srcbucket23.txt]} - Alias Bucket File Name Mapping: - b {pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt=[pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt, pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt], pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt=[pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt, pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt]} - Alias Bucket Output File Name Mapping: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt 0 - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt 1 Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin [a] + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin [a] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin Partition base file name: srcbucket_mapjoin input format: org.apache.hadoop.mapred.TextInputFormat @@ -265,12 +218,12 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin name srcbucket_mapjoin serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504727 + transient_lastDdlTime 1287175506 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -282,12 +235,12 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin name srcbucket_mapjoin serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504727 + transient_lastDdlTime 1287175506 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: srcbucket_mapjoin name: srcbucket_mapjoin @@ -299,14 +252,14 @@ Move Operator files: hdfs directory: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-52-14_568_8446001704480162238/-ext-10002 - destination: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-52-14_568_8446001704480162238/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-45-14_022_599970964789599988/-ext-10002 + destination: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-45-14_022_599970964789599988/-ext-10000 Stage: Stage-0 Move Operator tables: replace: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-52-14_568_8446001704480162238/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-45-14_022_599970964789599988/-ext-10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -316,28 +269,28 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504734 + transient_lastDdlTime 1287175513 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result - tmp directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-52-14_568_8446001704480162238/-ext-10001 + tmp directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-45-14_022_599970964789599988/-ext-10001 Stage: Stage-2 Stats-Aggr Operator - Stats Aggregation Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-52-14_568_8446001704480162238/-ext-10000/ + Stats Aggregation Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-45-14_022_599970964789599988/-ext-10000/ Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-52-14_568_8446001704480162238/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-45-14_022_599970964789599988/-ext-10002 File Output Operator compressed: false GlobalTableId: 0 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-52-14_568_8446001704480162238/-ext-10000 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-45-14_022_599970964789599988/-ext-10000 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -348,12 +301,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504734 + transient_lastDdlTime 1287175513 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -361,9 +314,9 @@ MultiFileSpray: false Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-52-14_568_8446001704480162238/-ext-10002 [pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-52-14_568_8446001704480162238/-ext-10002] + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-45-14_022_599970964789599988/-ext-10002 [pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-45-14_022_599970964789599988/-ext-10002] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-52-14_568_8446001704480162238/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-45-14_022_599970964789599988/-ext-10002 Partition base file name: -ext-10002 input format: org.apache.hadoop.mapred.TextInputFormat @@ -374,12 +327,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504734 + transient_lastDdlTime 1287175513 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -390,12 +343,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504734 + transient_lastDdlTime 1287175513 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result name: bucketmapjoin_tmp_result @@ -423,11 +376,11 @@ PREHOOK: query: select count(1) from bucketmapjoin_tmp_result PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-52-26_025_7986849883515222888/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-45-27_792_4337834774600359771/-mr-10000 POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-52-26_025_7986849883515222888/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-45-27_792_4337834774600359771/-mr-10000 POSTHOOK: Lineage: bucketmapjoin_tmp_result.key SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_part)b.FieldSchema(name:value, type:string, comment:null), ] @@ -476,11 +429,11 @@ PREHOOK: query: select count(1) from bucketmapjoin_tmp_result PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-52-47_328_3277552806566207374/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-45-49_254_1178511872636435849/-mr-10000 POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-52-47_328_3277552806566207374/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-45-49_254_1178511872636435849/-mr-10000 POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] @@ -519,14 +472,14 @@ PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_hash_result_1 PREHOOK: Input: default@bucketmapjoin_hash_result_2 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-52-57_482_1461425029485724814/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-45-58_457_5791794441739067859/-mr-10000 POSTHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b on a.key = b.key POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_hash_result_1 POSTHOOK: Input: default@bucketmapjoin_hash_result_2 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-52-57_482_1461425029485724814/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-45-58_457_5791794441739067859/-mr-10000 POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] @@ -568,7 +521,8 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF srcbucket_mapjoin a) (TOK_TABREF srcbucket_mapjoin_part b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB bucketmapjoin_tmp_result)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL b) ds) "2008-04-08")))) STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-7 is a root stage + Stage-1 depends on stages: Stage-7 Stage-5 depends on stages: Stage-1 , consists of Stage-4, Stage-3 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3 @@ -576,6 +530,40 @@ Stage-3 STAGE PLANS: + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + a + TableScan + alias: a + GatherStats: false + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} {ds} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col5, _col6 + Position of Big Table: 1 + Bucket Mapjoin Context: + Alias Bucket Base File Name Mapping: + a {srcbucket20.txt=[srcbucket20.txt], srcbucket21.txt=[srcbucket21.txt], srcbucket22.txt=[srcbucket20.txt], srcbucket23.txt=[srcbucket21.txt]} + Alias Bucket File Name Mapping: + a {pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt=[pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt=[pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt], pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt=[pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt=[pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt]} + Alias Bucket Output File Name Mapping: + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt 0 + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt 1 + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt 2 + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt 3 + Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: @@ -628,9 +616,9 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-53-02_338_1389292184977084722/-ext-10002 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-46-03_429_901254008145320602/-ext-10002 NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-53-02_338_1389292184977084722/-ext-10000/ + Stats Publishing Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-46-03_429_901254008145320602/-ext-10000/ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -640,7 +628,7 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result numFiles 1 numPartitions 0 @@ -649,7 +637,7 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 8983 - transient_lastDdlTime 1284504767 + transient_lastDdlTime 1287175549 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -657,97 +645,11 @@ MultiFileSpray: false Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - a - TableScan - alias: a - GatherStats: false - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {value} {ds} - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - outputColumnNames: _col0, _col1, _col5, _col6 - Position of Big Table: 1 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col5 - type: string - expr: _col6 - type: string - outputColumnNames: _col0, _col1, _col5, _col6 - Filter Operator - isSamplingPred: false - predicate: - expr: (_col6 = '2008-04-08') - type: boolean - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-53-02_338_1389292184977084722/-ext-10002 - NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-53-02_338_1389292184977084722/-ext-10000/ - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value1,value2 - columns.types string:string:string - file.inputformat org.apache.hadoop.mapred.TextInputFormat - file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result - name bucketmapjoin_tmp_result - numFiles 1 - numPartitions 0 - numRows 464 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 8983 - transient_lastDdlTime 1284504767 - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Bucket Mapjoin Context: - Alias Bucket Base File Name Mapping: - a {srcbucket20.txt=[srcbucket20.txt], srcbucket21.txt=[srcbucket21.txt], srcbucket22.txt=[srcbucket20.txt], srcbucket23.txt=[srcbucket21.txt]} - Alias Bucket File Name Mapping: - a {pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt=[pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt=[pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt], pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt=[pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt=[pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt]} - Alias Bucket Output File Name Mapping: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt 0 - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt 1 - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt 2 - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt 3 Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 [b] + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 [b] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 Partition base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat @@ -761,13 +663,13 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part name srcbucket_mapjoin_part partition_columns ds serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504727 + transient_lastDdlTime 1287175507 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -779,13 +681,13 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part name srcbucket_mapjoin_part partition_columns ds serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504727 + transient_lastDdlTime 1287175507 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: srcbucket_mapjoin_part name: srcbucket_mapjoin_part @@ -797,14 +699,14 @@ Move Operator files: hdfs directory: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-53-02_338_1389292184977084722/-ext-10002 - destination: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-53-02_338_1389292184977084722/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-46-03_429_901254008145320602/-ext-10002 + destination: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-46-03_429_901254008145320602/-ext-10000 Stage: Stage-0 Move Operator tables: replace: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-53-02_338_1389292184977084722/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-46-03_429_901254008145320602/-ext-10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -814,7 +716,7 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result numFiles 1 numPartitions 0 @@ -823,23 +725,23 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 8983 - transient_lastDdlTime 1284504767 + transient_lastDdlTime 1287175549 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result - tmp directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-53-02_338_1389292184977084722/-ext-10001 + tmp directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-46-03_429_901254008145320602/-ext-10001 Stage: Stage-2 Stats-Aggr Operator - Stats Aggregation Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-53-02_338_1389292184977084722/-ext-10000/ + Stats Aggregation Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-46-03_429_901254008145320602/-ext-10000/ Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-53-02_338_1389292184977084722/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-46-03_429_901254008145320602/-ext-10002 File Output Operator compressed: false GlobalTableId: 0 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-53-02_338_1389292184977084722/-ext-10000 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-46-03_429_901254008145320602/-ext-10000 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -850,7 +752,7 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result numFiles 1 numPartitions 0 @@ -859,7 +761,7 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 8983 - transient_lastDdlTime 1284504767 + transient_lastDdlTime 1287175549 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -867,9 +769,9 @@ MultiFileSpray: false Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-53-02_338_1389292184977084722/-ext-10002 [pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-53-02_338_1389292184977084722/-ext-10002] + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-46-03_429_901254008145320602/-ext-10002 [pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-46-03_429_901254008145320602/-ext-10002] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-53-02_338_1389292184977084722/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-46-03_429_901254008145320602/-ext-10002 Partition base file name: -ext-10002 input format: org.apache.hadoop.mapred.TextInputFormat @@ -880,7 +782,7 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result numFiles 1 numPartitions 0 @@ -889,7 +791,7 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 8983 - transient_lastDdlTime 1284504767 + transient_lastDdlTime 1287175549 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -900,7 +802,7 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result numFiles 1 numPartitions 0 @@ -909,7 +811,7 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 8983 - transient_lastDdlTime 1284504767 + transient_lastDdlTime 1287175549 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result name: bucketmapjoin_tmp_result @@ -949,11 +851,11 @@ PREHOOK: query: select count(1) from bucketmapjoin_tmp_result PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-53-15_338_4031538421928596147/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-46-17_526_3445835636731954548/-mr-10000 POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-53-15_338_4031538421928596147/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-46-17_526_3445835636731954548/-mr-10000 POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] @@ -1038,11 +940,11 @@ PREHOOK: query: select count(1) from bucketmapjoin_tmp_result PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-53-39_709_5928500906509440912/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-46-38_542_1059502483607510791/-mr-10000 POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-53-39_709_5928500906509440912/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-46-38_542_1059502483607510791/-mr-10000 POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] @@ -1105,14 +1007,14 @@ PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_hash_result_1 PREHOOK: Input: default@bucketmapjoin_hash_result_2 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-53-49_922_7370633857792864325/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-46-47_829_8437087111818030853/-mr-10000 POSTHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b on a.key = b.key POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_hash_result_1 POSTHOOK: Input: default@bucketmapjoin_hash_result_2 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-53-49_922_7370633857792864325/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-46-47_829_8437087111818030853/-mr-10000 POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] Index: ql/src/test/results/clientpositive/bucketmapjoin2.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin2.q.out (revision 1023076) +++ ql/src/test/results/clientpositive/bucketmapjoin2.q.out (working copy) @@ -84,7 +84,8 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF srcbucket_mapjoin a) (TOK_TABREF srcbucket_mapjoin_part_2 b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL b) ds) "2008-04-08")))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB bucketmapjoin_tmp_result)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))))) STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-7 is a root stage + Stage-1 depends on stages: Stage-7 Stage-5 depends on stages: Stage-1 , consists of Stage-4, Stage-3 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3 @@ -92,6 +93,48 @@ Stage-3 STAGE PLANS: + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + b + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + b + TableScan + alias: b + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (ds = '2008-04-08') + type: boolean + Filter Operator + isSamplingPred: false + predicate: + expr: (ds = '2008-04-08') + type: boolean + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col5 + Position of Big Table: 0 + Bucket Mapjoin Context: + Alias Bucket Base File Name Mapping: + b {srcbucket20.txt=[srcbucket22.txt], srcbucket21.txt=[srcbucket23.txt]} + Alias Bucket File Name Mapping: + b {pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt=[pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket22.txt], pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt=[pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket23.txt]} + Alias Bucket Output File Name Mapping: + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt 0 + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt 1 + Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: @@ -132,9 +175,9 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-54-06_179_6521652216467482451/-ext-10002 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-47-02_796_1030871139150126578/-ext-10002 NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-54-06_179_6521652216467482451/-ext-10000/ + Stats Publishing Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-47-02_796_1030871139150126578/-ext-10000/ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -144,12 +187,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504846 + transient_lastDdlTime 1287175622 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -157,94 +200,11 @@ MultiFileSpray: false Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: - expr: (ds = '2008-04-08') - type: boolean - Filter Operator - isSamplingPred: false - predicate: - expr: (ds = '2008-04-08') - type: boolean - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {value} - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - outputColumnNames: _col0, _col1, _col5 - Position of Big Table: 0 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col5 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-54-06_179_6521652216467482451/-ext-10002 - NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-54-06_179_6521652216467482451/-ext-10000/ - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value1,value2 - columns.types string:string:string - file.inputformat org.apache.hadoop.mapred.TextInputFormat - file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result - name bucketmapjoin_tmp_result - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504846 - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Bucket Mapjoin Context: - Alias Bucket Base File Name Mapping: - b {srcbucket20.txt=[srcbucket22.txt], srcbucket21.txt=[srcbucket23.txt]} - Alias Bucket File Name Mapping: - b {pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt=[pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket22.txt], pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt=[pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket23.txt]} - Alias Bucket Output File Name Mapping: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt 0 - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt 1 Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin [a] + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin [a] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin Partition base file name: srcbucket_mapjoin input format: org.apache.hadoop.mapred.TextInputFormat @@ -256,12 +216,12 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin name srcbucket_mapjoin serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504838 + transient_lastDdlTime 1287175615 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -273,12 +233,12 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin name srcbucket_mapjoin serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504838 + transient_lastDdlTime 1287175615 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: srcbucket_mapjoin name: srcbucket_mapjoin @@ -290,14 +250,14 @@ Move Operator files: hdfs directory: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-54-06_179_6521652216467482451/-ext-10002 - destination: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-54-06_179_6521652216467482451/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-47-02_796_1030871139150126578/-ext-10002 + destination: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-47-02_796_1030871139150126578/-ext-10000 Stage: Stage-0 Move Operator tables: replace: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-54-06_179_6521652216467482451/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-47-02_796_1030871139150126578/-ext-10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -307,28 +267,28 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504846 + transient_lastDdlTime 1287175622 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result - tmp directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-54-06_179_6521652216467482451/-ext-10001 + tmp directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-47-02_796_1030871139150126578/-ext-10001 Stage: Stage-2 Stats-Aggr Operator - Stats Aggregation Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-54-06_179_6521652216467482451/-ext-10000/ + Stats Aggregation Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-47-02_796_1030871139150126578/-ext-10000/ Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-54-06_179_6521652216467482451/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-47-02_796_1030871139150126578/-ext-10002 File Output Operator compressed: false GlobalTableId: 0 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-54-06_179_6521652216467482451/-ext-10000 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-47-02_796_1030871139150126578/-ext-10000 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -339,12 +299,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504846 + transient_lastDdlTime 1287175622 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -352,9 +312,9 @@ MultiFileSpray: false Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-54-06_179_6521652216467482451/-ext-10002 [pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-54-06_179_6521652216467482451/-ext-10002] + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-47-02_796_1030871139150126578/-ext-10002 [pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-47-02_796_1030871139150126578/-ext-10002] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-54-06_179_6521652216467482451/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-47-02_796_1030871139150126578/-ext-10002 Partition base file name: -ext-10002 input format: org.apache.hadoop.mapred.TextInputFormat @@ -365,12 +325,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504846 + transient_lastDdlTime 1287175622 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -381,12 +341,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504846 + transient_lastDdlTime 1287175622 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result name: bucketmapjoin_tmp_result @@ -414,11 +374,11 @@ PREHOOK: query: select count(1) from bucketmapjoin_tmp_result PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-54-17_966_8697223797090431206/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-47-15_438_6198360107600383327/-mr-10000 POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-54-17_966_8697223797090431206/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-47-15_438_6198360107600383327/-mr-10000 POSTHOOK: Lineage: bucketmapjoin_tmp_result.key SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_part_2)b.FieldSchema(name:value, type:string, comment:null), ] @@ -467,11 +427,11 @@ PREHOOK: query: select count(1) from bucketmapjoin_tmp_result PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-54-39_374_5776763971385970443/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-47-36_353_7045805169561363831/-mr-10000 POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-54-39_374_5776763971385970443/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-47-36_353_7045805169561363831/-mr-10000 POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] @@ -510,14 +470,14 @@ PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_hash_result_1 PREHOOK: Input: default@bucketmapjoin_hash_result_2 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-54-49_838_4256098155623662521/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-47-45_796_3892887570223971170/-mr-10000 POSTHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b on a.key = b.key POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_hash_result_1 POSTHOOK: Input: default@bucketmapjoin_hash_result_2 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-54-49_838_4256098155623662521/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-47-45_796_3892887570223971170/-mr-10000 POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] @@ -559,7 +519,8 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF srcbucket_mapjoin a) (TOK_TABREF srcbucket_mapjoin_part_2 b) (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL b) ds) "2008-04-08")))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB bucketmapjoin_tmp_result)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))))) STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-7 is a root stage + Stage-1 depends on stages: Stage-7 Stage-5 depends on stages: Stage-1 , consists of Stage-4, Stage-3 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3 @@ -567,6 +528,38 @@ Stage-3 STAGE PLANS: + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + a + TableScan + alias: a + GatherStats: false + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col5 + Position of Big Table: 1 + Bucket Mapjoin Context: + Alias Bucket Base File Name Mapping: + a {srcbucket22.txt=[srcbucket20.txt], srcbucket23.txt=[srcbucket21.txt]} + Alias Bucket File Name Mapping: + a {pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket22.txt=[pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket23.txt=[pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt]} + Alias Bucket Output File Name Mapping: + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket22.txt 0 + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket23.txt 1 + Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: @@ -617,9 +610,9 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-54-55_054_5147409437043781660/-ext-10002 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-47-49_830_6797075297816313305/-ext-10002 NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-54-55_054_5147409437043781660/-ext-10000/ + Stats Publishing Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-47-49_830_6797075297816313305/-ext-10000/ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -629,7 +622,7 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result numFiles 1 numPartitions 0 @@ -638,7 +631,7 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 0 - transient_lastDdlTime 1284504879 + transient_lastDdlTime 1287175656 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -646,88 +639,11 @@ MultiFileSpray: false Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - a - TableScan - alias: a - GatherStats: false - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {value} - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - outputColumnNames: _col0, _col1, _col5 - Position of Big Table: 1 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col5 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-54-55_054_5147409437043781660/-ext-10002 - NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-54-55_054_5147409437043781660/-ext-10000/ - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value1,value2 - columns.types string:string:string - file.inputformat org.apache.hadoop.mapred.TextInputFormat - file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result - name bucketmapjoin_tmp_result - numFiles 1 - numPartitions 0 - numRows 0 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 0 - transient_lastDdlTime 1284504879 - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Bucket Mapjoin Context: - Alias Bucket Base File Name Mapping: - a {srcbucket22.txt=[srcbucket20.txt], srcbucket23.txt=[srcbucket21.txt]} - Alias Bucket File Name Mapping: - a {pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket22.txt=[pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket23.txt=[pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt]} - Alias Bucket Output File Name Mapping: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket22.txt 0 - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket23.txt 1 Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08 [b] + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08 [b] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08 + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08 Partition base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat @@ -741,13 +657,13 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2 name srcbucket_mapjoin_part_2 partition_columns ds serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504843 + transient_lastDdlTime 1287175620 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -759,13 +675,13 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2 name srcbucket_mapjoin_part_2 partition_columns ds serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504843 + transient_lastDdlTime 1287175620 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: srcbucket_mapjoin_part_2 name: srcbucket_mapjoin_part_2 @@ -777,14 +693,14 @@ Move Operator files: hdfs directory: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-54-55_054_5147409437043781660/-ext-10002 - destination: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-54-55_054_5147409437043781660/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-47-49_830_6797075297816313305/-ext-10002 + destination: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-47-49_830_6797075297816313305/-ext-10000 Stage: Stage-0 Move Operator tables: replace: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-54-55_054_5147409437043781660/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-47-49_830_6797075297816313305/-ext-10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -794,7 +710,7 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result numFiles 1 numPartitions 0 @@ -803,23 +719,23 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 0 - transient_lastDdlTime 1284504879 + transient_lastDdlTime 1287175656 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result - tmp directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-54-55_054_5147409437043781660/-ext-10001 + tmp directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-47-49_830_6797075297816313305/-ext-10001 Stage: Stage-2 Stats-Aggr Operator - Stats Aggregation Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-54-55_054_5147409437043781660/-ext-10000/ + Stats Aggregation Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-47-49_830_6797075297816313305/-ext-10000/ Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-54-55_054_5147409437043781660/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-47-49_830_6797075297816313305/-ext-10002 File Output Operator compressed: false GlobalTableId: 0 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-54-55_054_5147409437043781660/-ext-10000 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-47-49_830_6797075297816313305/-ext-10000 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -830,7 +746,7 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result numFiles 1 numPartitions 0 @@ -839,7 +755,7 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 0 - transient_lastDdlTime 1284504879 + transient_lastDdlTime 1287175656 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -847,9 +763,9 @@ MultiFileSpray: false Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-54-55_054_5147409437043781660/-ext-10002 [pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-54-55_054_5147409437043781660/-ext-10002] + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-47-49_830_6797075297816313305/-ext-10002 [pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-47-49_830_6797075297816313305/-ext-10002] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-54-55_054_5147409437043781660/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-47-49_830_6797075297816313305/-ext-10002 Partition base file name: -ext-10002 input format: org.apache.hadoop.mapred.TextInputFormat @@ -860,7 +776,7 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result numFiles 1 numPartitions 0 @@ -869,7 +785,7 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 0 - transient_lastDdlTime 1284504879 + transient_lastDdlTime 1287175656 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -880,7 +796,7 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result numFiles 1 numPartitions 0 @@ -889,7 +805,7 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 0 - transient_lastDdlTime 1284504879 + transient_lastDdlTime 1287175656 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result name: bucketmapjoin_tmp_result @@ -929,11 +845,11 @@ PREHOOK: query: select count(1) from bucketmapjoin_tmp_result PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-55-07_203_4137128469733058276/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-48-02_385_5364647147417692835/-mr-10000 POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-55-07_203_4137128469733058276/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-48-02_385_5364647147417692835/-mr-10000 POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] @@ -1018,11 +934,11 @@ PREHOOK: query: select count(1) from bucketmapjoin_tmp_result PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-55-27_916_6426220424044343424/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-48-23_250_699466185742016272/-mr-10000 POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-55-27_916_6426220424044343424/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-48-23_250_699466185742016272/-mr-10000 POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] @@ -1085,14 +1001,14 @@ PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_hash_result_1 PREHOOK: Input: default@bucketmapjoin_hash_result_2 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-55-37_835_5727221086898908041/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-48-32_595_7129319940362934902/-mr-10000 POSTHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b on a.key = b.key POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_hash_result_1 POSTHOOK: Input: default@bucketmapjoin_hash_result_2 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-55-37_835_5727221086898908041/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-48-32_595_7129319940362934902/-mr-10000 POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] Index: ql/src/test/results/clientpositive/bucketmapjoin3.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin3.q.out (revision 1023076) +++ ql/src/test/results/clientpositive/bucketmapjoin3.q.out (working copy) @@ -84,7 +84,8 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF srcbucket_mapjoin_part_2 a) (TOK_TABREF srcbucket_mapjoin_part b) (and (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL b) ds) "2008-04-08")) (= (. (TOK_TABLE_OR_COL a) ds) "2008-04-08")))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB bucketmapjoin_tmp_result)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))))) STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-7 is a root stage + Stage-1 depends on stages: Stage-7 Stage-5 depends on stages: Stage-1 , consists of Stage-4, Stage-3 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3 @@ -92,6 +93,48 @@ Stage-3 STAGE PLANS: + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + b + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + b + TableScan + alias: b + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (ds = '2008-04-08') + type: boolean + Filter Operator + isSamplingPred: false + predicate: + expr: (ds = '2008-04-08') + type: boolean + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col6 + Position of Big Table: 0 + Bucket Mapjoin Context: + Alias Bucket Base File Name Mapping: + b {srcbucket22.txt=[srcbucket20.txt, srcbucket22.txt], srcbucket23.txt=[srcbucket21.txt, srcbucket23.txt]} + Alias Bucket File Name Mapping: + b {pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket22.txt=[pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt, pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt], pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket23.txt=[pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt, pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt]} + Alias Bucket Output File Name Mapping: + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket22.txt 0 + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket23.txt 1 + Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: @@ -142,9 +185,9 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-55-53_625_5848530933180579779/-ext-10002 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-48-47_135_3137244231388558119/-ext-10002 NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-55-53_625_5848530933180579779/-ext-10000/ + Stats Publishing Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-48-47_135_3137244231388558119/-ext-10000/ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -154,12 +197,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504953 + transient_lastDdlTime 1287175727 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -167,94 +210,11 @@ MultiFileSpray: false Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: - expr: (ds = '2008-04-08') - type: boolean - Filter Operator - isSamplingPred: false - predicate: - expr: (ds = '2008-04-08') - type: boolean - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {value} - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - outputColumnNames: _col0, _col1, _col6 - Position of Big Table: 0 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col6 - type: string - outputColumnNames: _col0, _col1, _col6 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col6 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-55-53_625_5848530933180579779/-ext-10002 - NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-55-53_625_5848530933180579779/-ext-10000/ - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value1,value2 - columns.types string:string:string - file.inputformat org.apache.hadoop.mapred.TextInputFormat - file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result - name bucketmapjoin_tmp_result - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504953 - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Bucket Mapjoin Context: - Alias Bucket Base File Name Mapping: - b {srcbucket22.txt=[srcbucket20.txt, srcbucket22.txt], srcbucket23.txt=[srcbucket21.txt, srcbucket23.txt]} - Alias Bucket File Name Mapping: - b {pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket22.txt=[pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt, pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt], pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket23.txt=[pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt, pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt]} - Alias Bucket Output File Name Mapping: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket22.txt 0 - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket23.txt 1 Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08 [a] + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08 [a] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08 + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08 Partition base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat @@ -268,13 +228,13 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2 name srcbucket_mapjoin_part_2 partition_columns ds serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504951 + transient_lastDdlTime 1287175724 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -286,13 +246,13 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2 name srcbucket_mapjoin_part_2 partition_columns ds serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504951 + transient_lastDdlTime 1287175724 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: srcbucket_mapjoin_part_2 name: srcbucket_mapjoin_part_2 @@ -304,14 +264,14 @@ Move Operator files: hdfs directory: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-55-53_625_5848530933180579779/-ext-10002 - destination: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-55-53_625_5848530933180579779/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-48-47_135_3137244231388558119/-ext-10002 + destination: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-48-47_135_3137244231388558119/-ext-10000 Stage: Stage-0 Move Operator tables: replace: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-55-53_625_5848530933180579779/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-48-47_135_3137244231388558119/-ext-10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -321,28 +281,28 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504953 + transient_lastDdlTime 1287175727 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result - tmp directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-55-53_625_5848530933180579779/-ext-10001 + tmp directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-48-47_135_3137244231388558119/-ext-10001 Stage: Stage-2 Stats-Aggr Operator - Stats Aggregation Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-55-53_625_5848530933180579779/-ext-10000/ + Stats Aggregation Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-48-47_135_3137244231388558119/-ext-10000/ Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-55-53_625_5848530933180579779/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-48-47_135_3137244231388558119/-ext-10002 File Output Operator compressed: false GlobalTableId: 0 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-55-53_625_5848530933180579779/-ext-10000 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-48-47_135_3137244231388558119/-ext-10000 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -353,12 +313,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504953 + transient_lastDdlTime 1287175727 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -366,9 +326,9 @@ MultiFileSpray: false Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-55-53_625_5848530933180579779/-ext-10002 [pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-55-53_625_5848530933180579779/-ext-10002] + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-48-47_135_3137244231388558119/-ext-10002 [pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-48-47_135_3137244231388558119/-ext-10002] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-55-53_625_5848530933180579779/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-48-47_135_3137244231388558119/-ext-10002 Partition base file name: -ext-10002 input format: org.apache.hadoop.mapred.TextInputFormat @@ -379,12 +339,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504953 + transient_lastDdlTime 1287175727 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -395,12 +355,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504953 + transient_lastDdlTime 1287175727 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result name: bucketmapjoin_tmp_result @@ -428,11 +388,11 @@ PREHOOK: query: select count(1) from bucketmapjoin_tmp_result PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-56-06_599_7079861620765207719/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-49-00_540_8008608079574673286/-mr-10000 POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-56-06_599_7079861620765207719/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-49-00_540_8008608079574673286/-mr-10000 POSTHOOK: Lineage: bucketmapjoin_tmp_result.key SIMPLE [(srcbucket_mapjoin_part_2)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin_part_2)a.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_part)b.FieldSchema(name:value, type:string, comment:null), ] @@ -481,11 +441,11 @@ PREHOOK: query: select count(1) from bucketmapjoin_tmp_result PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-56-28_712_8879884730120469839/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-49-23_110_5294238609208878803/-mr-10000 POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-56-28_712_8879884730120469839/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-49-23_110_5294238609208878803/-mr-10000 POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] @@ -524,14 +484,14 @@ PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_hash_result_1 PREHOOK: Input: default@bucketmapjoin_hash_result_2 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-56-38_946_2041800544454288745/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-49-32_539_1810286211446872017/-mr-10000 POSTHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b on a.key = b.key POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_hash_result_1 POSTHOOK: Input: default@bucketmapjoin_hash_result_2 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-56-38_946_2041800544454288745/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-49-32_539_1810286211446872017/-mr-10000 POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] @@ -573,7 +533,8 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF srcbucket_mapjoin_part_2 a) (TOK_TABREF srcbucket_mapjoin_part b) (and (and (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)) (= (. (TOK_TABLE_OR_COL b) ds) "2008-04-08")) (= (. (TOK_TABLE_OR_COL a) ds) "2008-04-08")))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB bucketmapjoin_tmp_result)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))))) STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-7 is a root stage + Stage-1 depends on stages: Stage-7 Stage-5 depends on stages: Stage-1 , consists of Stage-4, Stage-3 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3 @@ -581,6 +542,50 @@ Stage-3 STAGE PLANS: + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + a + TableScan + alias: a + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (ds = '2008-04-08') + type: boolean + Filter Operator + isSamplingPred: false + predicate: + expr: (ds = '2008-04-08') + type: boolean + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col6 + Position of Big Table: 1 + Bucket Mapjoin Context: + Alias Bucket Base File Name Mapping: + a {srcbucket20.txt=[srcbucket22.txt], srcbucket21.txt=[srcbucket23.txt], srcbucket22.txt=[srcbucket22.txt], srcbucket23.txt=[srcbucket23.txt]} + Alias Bucket File Name Mapping: + a {pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt=[pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket22.txt], pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt=[pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket23.txt], pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt=[pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket22.txt], pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt=[pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket23.txt]} + Alias Bucket Output File Name Mapping: + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt 0 + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt 1 + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt 2 + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt 3 + Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: @@ -631,9 +636,9 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-56-43_857_4954976106545022146/-ext-10002 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-49-36_540_8127189479772806338/-ext-10002 NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-56-43_857_4954976106545022146/-ext-10000/ + Stats Publishing Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-49-36_540_8127189479772806338/-ext-10000/ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -643,7 +648,7 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result numFiles 1 numPartitions 0 @@ -652,7 +657,7 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 11067 - transient_lastDdlTime 1284504988 + transient_lastDdlTime 1287175763 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -660,100 +665,11 @@ MultiFileSpray: false Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - a - TableScan - alias: a - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: - expr: (ds = '2008-04-08') - type: boolean - Filter Operator - isSamplingPred: false - predicate: - expr: (ds = '2008-04-08') - type: boolean - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {value} - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - outputColumnNames: _col0, _col1, _col6 - Position of Big Table: 1 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col6 - type: string - outputColumnNames: _col0, _col1, _col6 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col6 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-56-43_857_4954976106545022146/-ext-10002 - NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-56-43_857_4954976106545022146/-ext-10000/ - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value1,value2 - columns.types string:string:string - file.inputformat org.apache.hadoop.mapred.TextInputFormat - file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result - name bucketmapjoin_tmp_result - numFiles 1 - numPartitions 0 - numRows 564 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 11067 - transient_lastDdlTime 1284504988 - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Bucket Mapjoin Context: - Alias Bucket Base File Name Mapping: - a {srcbucket20.txt=[srcbucket22.txt], srcbucket21.txt=[srcbucket23.txt], srcbucket22.txt=[srcbucket22.txt], srcbucket23.txt=[srcbucket23.txt]} - Alias Bucket File Name Mapping: - a {pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt=[pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket22.txt], pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt=[pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket23.txt], pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt=[pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket22.txt], pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt=[pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket23.txt]} - Alias Bucket Output File Name Mapping: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt 0 - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt 1 - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt 2 - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt 3 Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 [b] + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 [b] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 Partition base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat @@ -767,13 +683,13 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part name srcbucket_mapjoin_part partition_columns ds serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504946 + transient_lastDdlTime 1287175720 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -785,13 +701,13 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part name srcbucket_mapjoin_part partition_columns ds serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504946 + transient_lastDdlTime 1287175720 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: srcbucket_mapjoin_part name: srcbucket_mapjoin_part @@ -803,14 +719,14 @@ Move Operator files: hdfs directory: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-56-43_857_4954976106545022146/-ext-10002 - destination: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-56-43_857_4954976106545022146/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-49-36_540_8127189479772806338/-ext-10002 + destination: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-49-36_540_8127189479772806338/-ext-10000 Stage: Stage-0 Move Operator tables: replace: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-56-43_857_4954976106545022146/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-49-36_540_8127189479772806338/-ext-10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -820,7 +736,7 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result numFiles 1 numPartitions 0 @@ -829,23 +745,23 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 11067 - transient_lastDdlTime 1284504988 + transient_lastDdlTime 1287175763 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result - tmp directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-56-43_857_4954976106545022146/-ext-10001 + tmp directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-49-36_540_8127189479772806338/-ext-10001 Stage: Stage-2 Stats-Aggr Operator - Stats Aggregation Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-56-43_857_4954976106545022146/-ext-10000/ + Stats Aggregation Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-49-36_540_8127189479772806338/-ext-10000/ Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-56-43_857_4954976106545022146/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-49-36_540_8127189479772806338/-ext-10002 File Output Operator compressed: false GlobalTableId: 0 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-56-43_857_4954976106545022146/-ext-10000 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-49-36_540_8127189479772806338/-ext-10000 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -856,7 +772,7 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result numFiles 1 numPartitions 0 @@ -865,7 +781,7 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 11067 - transient_lastDdlTime 1284504988 + transient_lastDdlTime 1287175763 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -873,9 +789,9 @@ MultiFileSpray: false Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-56-43_857_4954976106545022146/-ext-10002 [pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-56-43_857_4954976106545022146/-ext-10002] + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-49-36_540_8127189479772806338/-ext-10002 [pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-49-36_540_8127189479772806338/-ext-10002] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-56-43_857_4954976106545022146/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-49-36_540_8127189479772806338/-ext-10002 Partition base file name: -ext-10002 input format: org.apache.hadoop.mapred.TextInputFormat @@ -886,7 +802,7 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result numFiles 1 numPartitions 0 @@ -895,7 +811,7 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 11067 - transient_lastDdlTime 1284504988 + transient_lastDdlTime 1287175763 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -906,7 +822,7 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result numFiles 1 numPartitions 0 @@ -915,7 +831,7 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 11067 - transient_lastDdlTime 1284504988 + transient_lastDdlTime 1287175763 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result name: bucketmapjoin_tmp_result @@ -955,11 +871,11 @@ PREHOOK: query: select count(1) from bucketmapjoin_tmp_result PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-56-57_421_9111351844773615576/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-49-51_691_7528584846455262099/-mr-10000 POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-56-57_421_9111351844773615576/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-49-51_691_7528584846455262099/-mr-10000 POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] @@ -1044,11 +960,11 @@ PREHOOK: query: select count(1) from bucketmapjoin_tmp_result PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-57-21_276_8220084583053039711/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-50-13_912_6674337133506539817/-mr-10000 POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-57-21_276_8220084583053039711/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-50-13_912_6674337133506539817/-mr-10000 POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] @@ -1111,14 +1027,14 @@ PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_hash_result_1 PREHOOK: Input: default@bucketmapjoin_hash_result_2 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-57-32_158_2712075681251012335/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-50-23_205_7592925947270849729/-mr-10000 POSTHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b on a.key = b.key POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_hash_result_1 POSTHOOK: Input: default@bucketmapjoin_hash_result_2 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-57-32_158_2712075681251012335/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-50-23_205_7592925947270849729/-mr-10000 POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] Index: ql/src/test/results/clientpositive/bucketmapjoin4.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin4.q.out (revision 1023076) +++ ql/src/test/results/clientpositive/bucketmapjoin4.q.out (working copy) @@ -84,7 +84,8 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF srcbucket_mapjoin a) (TOK_TABREF srcbucket_mapjoin b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB bucketmapjoin_tmp_result)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))))) STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-7 is a root stage + Stage-1 depends on stages: Stage-7 Stage-5 depends on stages: Stage-1 , consists of Stage-4, Stage-3 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3 @@ -92,6 +93,38 @@ Stage-3 STAGE PLANS: + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + b + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + b + TableScan + alias: b + GatherStats: false + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col5 + Position of Big Table: 0 + Bucket Mapjoin Context: + Alias Bucket Base File Name Mapping: + b {srcbucket20.txt=[srcbucket20.txt], srcbucket21.txt=[srcbucket21.txt]} + Alias Bucket File Name Mapping: + b {pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt=[pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt=[pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt]} + Alias Bucket Output File Name Mapping: + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt 0 + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt 1 + Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: @@ -132,9 +165,9 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-57-48_424_1970489996852752921/-ext-10002 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-50-37_713_8397106184600203842/-ext-10002 NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-57-48_424_1970489996852752921/-ext-10000/ + Stats Publishing Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-50-37_713_8397106184600203842/-ext-10000/ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -144,12 +177,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284505068 + transient_lastDdlTime 1287175837 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -157,84 +190,11 @@ MultiFileSpray: false Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - GatherStats: false - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {value} - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - outputColumnNames: _col0, _col1, _col5 - Position of Big Table: 0 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col5 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-57-48_424_1970489996852752921/-ext-10002 - NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-57-48_424_1970489996852752921/-ext-10000/ - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value1,value2 - columns.types string:string:string - file.inputformat org.apache.hadoop.mapred.TextInputFormat - file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result - name bucketmapjoin_tmp_result - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284505068 - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Bucket Mapjoin Context: - Alias Bucket Base File Name Mapping: - b {srcbucket20.txt=[srcbucket20.txt], srcbucket21.txt=[srcbucket21.txt]} - Alias Bucket File Name Mapping: - b {pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt=[pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt=[pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt]} - Alias Bucket Output File Name Mapping: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt 0 - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt 1 Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin [a] + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin [a] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin Partition base file name: srcbucket_mapjoin input format: org.apache.hadoop.mapred.TextInputFormat @@ -246,12 +206,12 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin name srcbucket_mapjoin serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284505061 + transient_lastDdlTime 1287175830 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -263,12 +223,12 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin name srcbucket_mapjoin serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284505061 + transient_lastDdlTime 1287175830 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: srcbucket_mapjoin name: srcbucket_mapjoin @@ -280,14 +240,14 @@ Move Operator files: hdfs directory: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-57-48_424_1970489996852752921/-ext-10002 - destination: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-57-48_424_1970489996852752921/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-50-37_713_8397106184600203842/-ext-10002 + destination: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-50-37_713_8397106184600203842/-ext-10000 Stage: Stage-0 Move Operator tables: replace: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-57-48_424_1970489996852752921/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-50-37_713_8397106184600203842/-ext-10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -297,28 +257,28 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284505068 + transient_lastDdlTime 1287175837 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result - tmp directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-57-48_424_1970489996852752921/-ext-10001 + tmp directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-50-37_713_8397106184600203842/-ext-10001 Stage: Stage-2 Stats-Aggr Operator - Stats Aggregation Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-57-48_424_1970489996852752921/-ext-10000/ + Stats Aggregation Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-50-37_713_8397106184600203842/-ext-10000/ Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-57-48_424_1970489996852752921/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-50-37_713_8397106184600203842/-ext-10002 File Output Operator compressed: false GlobalTableId: 0 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-57-48_424_1970489996852752921/-ext-10000 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-50-37_713_8397106184600203842/-ext-10000 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -329,12 +289,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284505068 + transient_lastDdlTime 1287175837 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -342,9 +302,9 @@ MultiFileSpray: false Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-57-48_424_1970489996852752921/-ext-10002 [pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-57-48_424_1970489996852752921/-ext-10002] + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-50-37_713_8397106184600203842/-ext-10002 [pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-50-37_713_8397106184600203842/-ext-10002] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-57-48_424_1970489996852752921/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-50-37_713_8397106184600203842/-ext-10002 Partition base file name: -ext-10002 input format: org.apache.hadoop.mapred.TextInputFormat @@ -355,12 +315,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284505068 + transient_lastDdlTime 1287175837 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -371,12 +331,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284505068 + transient_lastDdlTime 1287175837 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result name: bucketmapjoin_tmp_result @@ -402,11 +362,11 @@ PREHOOK: query: select count(1) from bucketmapjoin_tmp_result PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-57-59_596_661055811113352643/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-50-50_080_3250358574001886106/-mr-10000 POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-57-59_596_661055811113352643/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-50-50_080_3250358574001886106/-mr-10000 POSTHOOK: Lineage: bucketmapjoin_tmp_result.key SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin)b.FieldSchema(name:value, type:string, comment:null), ] @@ -453,11 +413,11 @@ PREHOOK: query: select count(1) from bucketmapjoin_tmp_result PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-58-20_532_4131677837767246501/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-51-12_750_5190209184978381690/-mr-10000 POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-58-20_532_4131677837767246501/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-51-12_750_5190209184978381690/-mr-10000 POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] @@ -496,14 +456,14 @@ PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_hash_result_1 PREHOOK: Input: default@bucketmapjoin_hash_result_2 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-58-31_183_5909440248209423381/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-51-22_308_4772430048114309901/-mr-10000 POSTHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b on a.key = b.key POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_hash_result_1 POSTHOOK: Input: default@bucketmapjoin_hash_result_2 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-58-31_183_5909440248209423381/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-51-22_308_4772430048114309901/-mr-10000 POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] @@ -545,7 +505,8 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF srcbucket_mapjoin a) (TOK_TABREF srcbucket_mapjoin b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB bucketmapjoin_tmp_result)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))))) STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-7 is a root stage + Stage-1 depends on stages: Stage-7 Stage-5 depends on stages: Stage-1 , consists of Stage-4, Stage-3 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3 @@ -553,6 +514,38 @@ Stage-3 STAGE PLANS: + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + a + TableScan + alias: a + GatherStats: false + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col5 + Position of Big Table: 1 + Bucket Mapjoin Context: + Alias Bucket Base File Name Mapping: + a {srcbucket20.txt=[srcbucket20.txt], srcbucket21.txt=[srcbucket21.txt]} + Alias Bucket File Name Mapping: + a {pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt=[pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt=[pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt]} + Alias Bucket Output File Name Mapping: + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt 0 + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt 1 + Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: @@ -593,9 +586,9 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-58-36_141_5221440347310438999/-ext-10002 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-51-26_294_4777526524357803096/-ext-10002 NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-58-36_141_5221440347310438999/-ext-10000/ + Stats Publishing Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-51-26_294_4777526524357803096/-ext-10000/ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -605,7 +598,7 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result numFiles 1 numPartitions 0 @@ -614,7 +607,7 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 8983 - transient_lastDdlTime 1284505100 + transient_lastDdlTime 1287175872 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -622,88 +615,11 @@ MultiFileSpray: false Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - a - TableScan - alias: a - GatherStats: false - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {value} - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - outputColumnNames: _col0, _col1, _col5 - Position of Big Table: 1 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col5 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-58-36_141_5221440347310438999/-ext-10002 - NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-58-36_141_5221440347310438999/-ext-10000/ - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value1,value2 - columns.types string:string:string - file.inputformat org.apache.hadoop.mapred.TextInputFormat - file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result - name bucketmapjoin_tmp_result - numFiles 1 - numPartitions 0 - numRows 464 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 8983 - transient_lastDdlTime 1284505100 - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Bucket Mapjoin Context: - Alias Bucket Base File Name Mapping: - a {srcbucket20.txt=[srcbucket20.txt], srcbucket21.txt=[srcbucket21.txt]} - Alias Bucket File Name Mapping: - a {pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt=[pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt=[pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt]} - Alias Bucket Output File Name Mapping: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt 0 - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt 1 Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin [b] + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin [b] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin Partition base file name: srcbucket_mapjoin input format: org.apache.hadoop.mapred.TextInputFormat @@ -715,12 +631,12 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin name srcbucket_mapjoin serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284505061 + transient_lastDdlTime 1287175830 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -732,12 +648,12 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin name srcbucket_mapjoin serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284505061 + transient_lastDdlTime 1287175830 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: srcbucket_mapjoin name: srcbucket_mapjoin @@ -749,14 +665,14 @@ Move Operator files: hdfs directory: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-58-36_141_5221440347310438999/-ext-10002 - destination: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-58-36_141_5221440347310438999/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-51-26_294_4777526524357803096/-ext-10002 + destination: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-51-26_294_4777526524357803096/-ext-10000 Stage: Stage-0 Move Operator tables: replace: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-58-36_141_5221440347310438999/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-51-26_294_4777526524357803096/-ext-10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -766,7 +682,7 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result numFiles 1 numPartitions 0 @@ -775,23 +691,23 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 8983 - transient_lastDdlTime 1284505100 + transient_lastDdlTime 1287175872 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result - tmp directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-58-36_141_5221440347310438999/-ext-10001 + tmp directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-51-26_294_4777526524357803096/-ext-10001 Stage: Stage-2 Stats-Aggr Operator - Stats Aggregation Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-58-36_141_5221440347310438999/-ext-10000/ + Stats Aggregation Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-51-26_294_4777526524357803096/-ext-10000/ Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-58-36_141_5221440347310438999/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-51-26_294_4777526524357803096/-ext-10002 File Output Operator compressed: false GlobalTableId: 0 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-58-36_141_5221440347310438999/-ext-10000 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-51-26_294_4777526524357803096/-ext-10000 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -802,7 +718,7 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result numFiles 1 numPartitions 0 @@ -811,7 +727,7 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 8983 - transient_lastDdlTime 1284505100 + transient_lastDdlTime 1287175872 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -819,9 +735,9 @@ MultiFileSpray: false Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-58-36_141_5221440347310438999/-ext-10002 [pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-58-36_141_5221440347310438999/-ext-10002] + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-51-26_294_4777526524357803096/-ext-10002 [pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-51-26_294_4777526524357803096/-ext-10002] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-58-36_141_5221440347310438999/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-51-26_294_4777526524357803096/-ext-10002 Partition base file name: -ext-10002 input format: org.apache.hadoop.mapred.TextInputFormat @@ -832,7 +748,7 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result numFiles 1 numPartitions 0 @@ -841,7 +757,7 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 8983 - transient_lastDdlTime 1284505100 + transient_lastDdlTime 1287175872 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -852,7 +768,7 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result numFiles 1 numPartitions 0 @@ -861,7 +777,7 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 8983 - transient_lastDdlTime 1284505100 + transient_lastDdlTime 1287175872 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result name: bucketmapjoin_tmp_result @@ -899,11 +815,11 @@ PREHOOK: query: select count(1) from bucketmapjoin_tmp_result PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-58-48_113_4754411707278279463/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-51-38_519_5227176789157682406/-mr-10000 POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-58-48_113_4754411707278279463/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-51-38_519_5227176789157682406/-mr-10000 POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] @@ -986,11 +902,11 @@ PREHOOK: query: select count(1) from bucketmapjoin_tmp_result PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-59-09_083_1925925122457264175/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-51-59_233_5568038317944335995/-mr-10000 POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-59-09_083_1925925122457264175/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-51-59_233_5568038317944335995/-mr-10000 POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] @@ -1053,14 +969,14 @@ PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_hash_result_1 PREHOOK: Input: default@bucketmapjoin_hash_result_2 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-59-20_133_505629120542659538/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-52-08_602_2192616005048999053/-mr-10000 POSTHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b on a.key = b.key POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_hash_result_1 POSTHOOK: Input: default@bucketmapjoin_hash_result_2 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_15-59-20_133_505629120542659538/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-52-08_602_2192616005048999053/-mr-10000 POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] Index: ql/src/test/results/clientpositive/bucketmapjoin5.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin5.q.out (revision 1023076) +++ ql/src/test/results/clientpositive/bucketmapjoin5.q.out (working copy) @@ -114,7 +114,8 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF srcbucket_mapjoin a) (TOK_TABREF srcbucket_mapjoin_part b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB bucketmapjoin_tmp_result)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))))) STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-7 is a root stage + Stage-1 depends on stages: Stage-7 Stage-5 depends on stages: Stage-1 , consists of Stage-4, Stage-3 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3 @@ -122,6 +123,44 @@ Stage-3 STAGE PLANS: + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + a + TableScan + alias: a + GatherStats: false + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col5 + Position of Big Table: 1 + Bucket Mapjoin Context: + Alias Bucket Base File Name Mapping: + a {srcbucket20.txt=[srcbucket20.txt], srcbucket21.txt=[srcbucket21.txt], srcbucket22.txt=[srcbucket20.txt], srcbucket23.txt=[srcbucket21.txt], ds=2008-04-09/srcbucket20.txt=[srcbucket20.txt], ds=2008-04-09/srcbucket21.txt=[srcbucket21.txt], ds=2008-04-09/srcbucket22.txt=[srcbucket20.txt], ds=2008-04-09/srcbucket23.txt=[srcbucket21.txt]} + Alias Bucket File Name Mapping: + a {pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt=[pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt=[pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt], pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt=[pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt=[pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt], pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket20.txt=[pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket21.txt=[pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt], pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket22.txt=[pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket23.txt=[pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt]} + Alias Bucket Output File Name Mapping: + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt 0 + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt 1 + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt 2 + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt 3 + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket20.txt 0 + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket21.txt 1 + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket22.txt 2 + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket23.txt 3 + Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: @@ -162,9 +201,9 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-59-42_577_2707452756229867945/-ext-10002 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-52-30_141_2228998998086893673/-ext-10002 NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-59-42_577_2707452756229867945/-ext-10000/ + Stats Publishing Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-52-30_141_2228998998086893673/-ext-10000/ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -174,12 +213,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284505182 + transient_lastDdlTime 1287175950 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -187,91 +226,12 @@ MultiFileSpray: false Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - a - TableScan - alias: a - GatherStats: false - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {value} - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - outputColumnNames: _col0, _col1, _col5 - Position of Big Table: 1 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col5 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-59-42_577_2707452756229867945/-ext-10002 - NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-59-42_577_2707452756229867945/-ext-10000/ - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value1,value2 - columns.types string:string:string - file.inputformat org.apache.hadoop.mapred.TextInputFormat - file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result - name bucketmapjoin_tmp_result - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284505182 - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Bucket Mapjoin Context: - Alias Bucket Base File Name Mapping: - a {srcbucket20.txt=[srcbucket20.txt], srcbucket21.txt=[srcbucket21.txt], srcbucket22.txt=[srcbucket20.txt], srcbucket23.txt=[srcbucket21.txt], ds=2008-04-09/srcbucket20.txt=[srcbucket20.txt], ds=2008-04-09/srcbucket21.txt=[srcbucket21.txt], ds=2008-04-09/srcbucket22.txt=[srcbucket20.txt], ds=2008-04-09/srcbucket23.txt=[srcbucket21.txt]} - Alias Bucket File Name Mapping: - a {pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt=[pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt=[pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt], pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt=[pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt=[pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt], pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket20.txt=[pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket21.txt=[pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt], pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket22.txt=[pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket23.txt=[pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt]} - Alias Bucket Output File Name Mapping: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt 0 - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt 1 - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt 2 - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt 3 - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket20.txt 0 - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket21.txt 1 - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket22.txt 2 - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09/srcbucket23.txt 3 Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 [b] - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09 [b] + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 [b] + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09 [b] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 Partition base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat @@ -285,13 +245,13 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part name srcbucket_mapjoin_part partition_columns ds serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284505168 + transient_lastDdlTime 1287175936 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -303,17 +263,17 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part name srcbucket_mapjoin_part partition_columns ds serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284505168 + transient_lastDdlTime 1287175936 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: srcbucket_mapjoin_part name: srcbucket_mapjoin_part - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09 + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-09 Partition base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat @@ -327,13 +287,13 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part name srcbucket_mapjoin_part partition_columns ds serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284505168 + transient_lastDdlTime 1287175936 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -345,13 +305,13 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part name srcbucket_mapjoin_part partition_columns ds serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284505168 + transient_lastDdlTime 1287175936 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: srcbucket_mapjoin_part name: srcbucket_mapjoin_part @@ -363,14 +323,14 @@ Move Operator files: hdfs directory: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-59-42_577_2707452756229867945/-ext-10002 - destination: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-59-42_577_2707452756229867945/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-52-30_141_2228998998086893673/-ext-10002 + destination: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-52-30_141_2228998998086893673/-ext-10000 Stage: Stage-0 Move Operator tables: replace: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-59-42_577_2707452756229867945/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-52-30_141_2228998998086893673/-ext-10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -380,28 +340,28 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284505182 + transient_lastDdlTime 1287175950 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result - tmp directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-59-42_577_2707452756229867945/-ext-10001 + tmp directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-52-30_141_2228998998086893673/-ext-10001 Stage: Stage-2 Stats-Aggr Operator - Stats Aggregation Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-59-42_577_2707452756229867945/-ext-10000/ + Stats Aggregation Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-52-30_141_2228998998086893673/-ext-10000/ Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-59-42_577_2707452756229867945/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-52-30_141_2228998998086893673/-ext-10002 File Output Operator compressed: false GlobalTableId: 0 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-59-42_577_2707452756229867945/-ext-10000 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-52-30_141_2228998998086893673/-ext-10000 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -412,12 +372,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284505182 + transient_lastDdlTime 1287175950 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -425,9 +385,9 @@ MultiFileSpray: false Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-59-42_577_2707452756229867945/-ext-10002 [pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-59-42_577_2707452756229867945/-ext-10002] + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-52-30_141_2228998998086893673/-ext-10002 [pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-52-30_141_2228998998086893673/-ext-10002] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_15-59-42_577_2707452756229867945/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-52-30_141_2228998998086893673/-ext-10002 Partition base file name: -ext-10002 input format: org.apache.hadoop.mapred.TextInputFormat @@ -438,12 +398,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284505182 + transient_lastDdlTime 1287175950 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -454,12 +414,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284505182 + transient_lastDdlTime 1287175950 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result name: bucketmapjoin_tmp_result @@ -489,11 +449,11 @@ PREHOOK: query: select count(1) from bucketmapjoin_tmp_result PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-00-00_607_4363184705399797060/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-52-50_870_6006343170250603326/-mr-10000 POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-00-00_607_4363184705399797060/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-52-50_870_6006343170250603326/-mr-10000 POSTHOOK: Lineage: bucketmapjoin_tmp_result.key SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_part)b.FieldSchema(name:value, type:string, comment:null), ] @@ -544,11 +504,11 @@ PREHOOK: query: select count(1) from bucketmapjoin_tmp_result PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-00-27_785_1420877540768328899/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-53-15_775_7748215988331220754/-mr-10000 POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-00-27_785_1420877540768328899/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-53-15_775_7748215988331220754/-mr-10000 POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] @@ -587,14 +547,14 @@ PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_hash_result_1 PREHOOK: Input: default@bucketmapjoin_hash_result_2 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-00-38_053_810358526331057404/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-53-26_222_4483260565055903691/-mr-10000 POSTHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b on a.key = b.key POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_hash_result_1 POSTHOOK: Input: default@bucketmapjoin_hash_result_2 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-00-38_053_810358526331057404/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-53-26_222_4483260565055903691/-mr-10000 POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] @@ -636,7 +596,8 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF srcbucket_mapjoin a) (TOK_TABREF srcbucket_mapjoin_part_2 b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB bucketmapjoin_tmp_result)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))))) STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-7 is a root stage + Stage-1 depends on stages: Stage-7 Stage-5 depends on stages: Stage-1 , consists of Stage-4, Stage-3 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3 @@ -644,6 +605,40 @@ Stage-3 STAGE PLANS: + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + a + TableScan + alias: a + GatherStats: false + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col5 + Position of Big Table: 1 + Bucket Mapjoin Context: + Alias Bucket Base File Name Mapping: + a {srcbucket22.txt=[srcbucket20.txt], srcbucket23.txt=[srcbucket21.txt], ds=2008-04-09/srcbucket22.txt=[srcbucket20.txt], ds=2008-04-09/srcbucket23.txt=[srcbucket21.txt]} + Alias Bucket File Name Mapping: + a {pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket22.txt=[pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket23.txt=[pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt], pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-09/srcbucket22.txt=[pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-09/srcbucket23.txt=[pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt]} + Alias Bucket Output File Name Mapping: + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket22.txt 0 + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket23.txt 1 + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-09/srcbucket22.txt 0 + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-09/srcbucket23.txt 1 + Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: @@ -684,9 +679,9 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-00-43_036_8977672461532956091/-ext-10002 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-53-30_282_2945779120241269950/-ext-10002 NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-00-43_036_8977672461532956091/-ext-10000/ + Stats Publishing Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-53-30_282_2945779120241269950/-ext-10000/ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -696,7 +691,7 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result numFiles 1 numPartitions 0 @@ -705,7 +700,7 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 17966 - transient_lastDdlTime 1284505227 + transient_lastDdlTime 1287175995 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -713,91 +708,12 @@ MultiFileSpray: false Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - a - TableScan - alias: a - GatherStats: false - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {value} - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - outputColumnNames: _col0, _col1, _col5 - Position of Big Table: 1 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col5 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-00-43_036_8977672461532956091/-ext-10002 - NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-00-43_036_8977672461532956091/-ext-10000/ - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value1,value2 - columns.types string:string:string - file.inputformat org.apache.hadoop.mapred.TextInputFormat - file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result - name bucketmapjoin_tmp_result - numFiles 1 - numPartitions 0 - numRows 928 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 17966 - transient_lastDdlTime 1284505227 - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Bucket Mapjoin Context: - Alias Bucket Base File Name Mapping: - a {srcbucket22.txt=[srcbucket20.txt], srcbucket23.txt=[srcbucket21.txt], ds=2008-04-09/srcbucket22.txt=[srcbucket20.txt], ds=2008-04-09/srcbucket23.txt=[srcbucket21.txt]} - Alias Bucket File Name Mapping: - a {pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket22.txt=[pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket23.txt=[pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt], pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-09/srcbucket22.txt=[pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-09/srcbucket23.txt=[pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt]} - Alias Bucket Output File Name Mapping: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket22.txt 0 - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08/srcbucket23.txt 1 - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-09/srcbucket22.txt 0 - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-09/srcbucket23.txt 1 Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08 [b] - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-09 [b] + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08 [b] + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-09 [b] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08 + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-08 Partition base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat @@ -811,13 +727,13 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2 name srcbucket_mapjoin_part_2 partition_columns ds serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284505177 + transient_lastDdlTime 1287175945 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -829,17 +745,17 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2 name srcbucket_mapjoin_part_2 partition_columns ds serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284505177 + transient_lastDdlTime 1287175945 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: srcbucket_mapjoin_part_2 name: srcbucket_mapjoin_part_2 - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-09 + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2/ds=2008-04-09 Partition base file name: ds=2008-04-09 input format: org.apache.hadoop.mapred.TextInputFormat @@ -853,13 +769,13 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2 name srcbucket_mapjoin_part_2 partition_columns ds serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284505177 + transient_lastDdlTime 1287175945 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -871,13 +787,13 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part_2 name srcbucket_mapjoin_part_2 partition_columns ds serialization.ddl struct srcbucket_mapjoin_part_2 { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284505177 + transient_lastDdlTime 1287175945 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: srcbucket_mapjoin_part_2 name: srcbucket_mapjoin_part_2 @@ -889,14 +805,14 @@ Move Operator files: hdfs directory: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-00-43_036_8977672461532956091/-ext-10002 - destination: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-00-43_036_8977672461532956091/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-53-30_282_2945779120241269950/-ext-10002 + destination: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-53-30_282_2945779120241269950/-ext-10000 Stage: Stage-0 Move Operator tables: replace: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-00-43_036_8977672461532956091/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-53-30_282_2945779120241269950/-ext-10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -906,7 +822,7 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result numFiles 1 numPartitions 0 @@ -915,23 +831,23 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 17966 - transient_lastDdlTime 1284505227 + transient_lastDdlTime 1287175995 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result - tmp directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-00-43_036_8977672461532956091/-ext-10001 + tmp directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-53-30_282_2945779120241269950/-ext-10001 Stage: Stage-2 Stats-Aggr Operator - Stats Aggregation Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-00-43_036_8977672461532956091/-ext-10000/ + Stats Aggregation Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-53-30_282_2945779120241269950/-ext-10000/ Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-00-43_036_8977672461532956091/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-53-30_282_2945779120241269950/-ext-10002 File Output Operator compressed: false GlobalTableId: 0 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-00-43_036_8977672461532956091/-ext-10000 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-53-30_282_2945779120241269950/-ext-10000 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -942,7 +858,7 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result numFiles 1 numPartitions 0 @@ -951,7 +867,7 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 17966 - transient_lastDdlTime 1284505227 + transient_lastDdlTime 1287175995 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -959,9 +875,9 @@ MultiFileSpray: false Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-00-43_036_8977672461532956091/-ext-10002 [pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-00-43_036_8977672461532956091/-ext-10002] + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-53-30_282_2945779120241269950/-ext-10002 [pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-53-30_282_2945779120241269950/-ext-10002] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-00-43_036_8977672461532956091/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-53-30_282_2945779120241269950/-ext-10002 Partition base file name: -ext-10002 input format: org.apache.hadoop.mapred.TextInputFormat @@ -972,7 +888,7 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result numFiles 1 numPartitions 0 @@ -981,7 +897,7 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 17966 - transient_lastDdlTime 1284505227 + transient_lastDdlTime 1287175995 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -992,7 +908,7 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result numFiles 1 numPartitions 0 @@ -1001,7 +917,7 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 17966 - transient_lastDdlTime 1284505227 + transient_lastDdlTime 1287175995 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result name: bucketmapjoin_tmp_result @@ -1043,11 +959,11 @@ PREHOOK: query: select count(1) from bucketmapjoin_tmp_result PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-00-56_352_1173075773351651295/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-53-44_102_8640488366520187814/-mr-10000 POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-00-56_352_1173075773351651295/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-53-44_102_8640488366520187814/-mr-10000 POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] @@ -1134,11 +1050,11 @@ PREHOOK: query: select count(1) from bucketmapjoin_tmp_result PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-01-19_467_5081261362458339468/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-54-06_228_7401219913133286014/-mr-10000 POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-01-19_467_5081261362458339468/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-54-06_228_7401219913133286014/-mr-10000 POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] @@ -1201,14 +1117,14 @@ PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_hash_result_1 PREHOOK: Input: default@bucketmapjoin_hash_result_2 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-01-29_267_3333079687141827583/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-54-15_709_5668417992730114788/-mr-10000 POSTHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b on a.key = b.key POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_hash_result_1 POSTHOOK: Input: default@bucketmapjoin_hash_result_2 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-01-29_267_3333079687141827583/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_13-54-15_709_5668417992730114788/-mr-10000 POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] Index: ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out (revision 1023076) +++ ql/src/test/results/clientpositive/bucketmapjoin_negative.q.out (working copy) @@ -54,7 +54,8 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF srcbucket_mapjoin a) (TOK_TABREF srcbucket_mapjoin_part b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB bucketmapjoin_tmp_result)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL b) ds) "2008-04-08")))) STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-7 is a root stage + Stage-1 depends on stages: Stage-7 Stage-5 depends on stages: Stage-1 , consists of Stage-4, Stage-3 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3 @@ -62,6 +63,35 @@ Stage-3 STAGE PLANS: + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + b + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + b + TableScan + alias: b + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (ds = '2008-04-08') + type: boolean + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} {ds} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col5, _col6 + Position of Big Table: 0 + Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: @@ -109,9 +139,9 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-01-41_683_3418559117121417207/-ext-10002 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-54-27_070_1981489966915749364/-ext-10002 NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-01-41_683_3418559117121417207/-ext-10000/ + Stats Publishing Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-54-27_070_1981489966915749364/-ext-10000/ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -121,12 +151,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284505301 + transient_lastDdlTime 1287176067 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -134,88 +164,11 @@ MultiFileSpray: false Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: - expr: (ds = '2008-04-08') - type: boolean - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {value} {ds} - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - outputColumnNames: _col0, _col1, _col5, _col6 - Position of Big Table: 0 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col5 - type: string - expr: _col6 - type: string - outputColumnNames: _col0, _col1, _col5, _col6 - Filter Operator - isSamplingPred: false - predicate: - expr: (_col6 = '2008-04-08') - type: boolean - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-01-41_683_3418559117121417207/-ext-10002 - NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-01-41_683_3418559117121417207/-ext-10000/ - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value1,value2 - columns.types string:string:string - file.inputformat org.apache.hadoop.mapred.TextInputFormat - file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result - name bucketmapjoin_tmp_result - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284505301 - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin [a] + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin [a] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin Partition base file name: srcbucket_mapjoin input format: org.apache.hadoop.mapred.TextInputFormat @@ -227,12 +180,12 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin name srcbucket_mapjoin serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284505298 + transient_lastDdlTime 1287176063 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -244,12 +197,12 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin name srcbucket_mapjoin serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284505298 + transient_lastDdlTime 1287176063 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: srcbucket_mapjoin name: srcbucket_mapjoin @@ -261,14 +214,14 @@ Move Operator files: hdfs directory: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-01-41_683_3418559117121417207/-ext-10002 - destination: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-01-41_683_3418559117121417207/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-54-27_070_1981489966915749364/-ext-10002 + destination: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-54-27_070_1981489966915749364/-ext-10000 Stage: Stage-0 Move Operator tables: replace: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-01-41_683_3418559117121417207/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-54-27_070_1981489966915749364/-ext-10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -278,28 +231,28 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284505301 + transient_lastDdlTime 1287176067 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result - tmp directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-01-41_683_3418559117121417207/-ext-10001 + tmp directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-54-27_070_1981489966915749364/-ext-10001 Stage: Stage-2 Stats-Aggr Operator - Stats Aggregation Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-01-41_683_3418559117121417207/-ext-10000/ + Stats Aggregation Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-54-27_070_1981489966915749364/-ext-10000/ Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-01-41_683_3418559117121417207/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-54-27_070_1981489966915749364/-ext-10002 File Output Operator compressed: false GlobalTableId: 0 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-01-41_683_3418559117121417207/-ext-10000 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-54-27_070_1981489966915749364/-ext-10000 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -310,12 +263,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284505301 + transient_lastDdlTime 1287176067 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -323,9 +276,9 @@ MultiFileSpray: false Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-01-41_683_3418559117121417207/-ext-10002 [pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-01-41_683_3418559117121417207/-ext-10002] + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-54-27_070_1981489966915749364/-ext-10002 [pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-54-27_070_1981489966915749364/-ext-10002] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-01-41_683_3418559117121417207/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-54-27_070_1981489966915749364/-ext-10002 Partition base file name: -ext-10002 input format: org.apache.hadoop.mapred.TextInputFormat @@ -336,12 +289,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284505301 + transient_lastDdlTime 1287176067 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -352,12 +305,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284505301 + transient_lastDdlTime 1287176067 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result name: bucketmapjoin_tmp_result Index: ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out =================================================================== --- ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out (revision 1023076) +++ ql/src/test/results/clientpositive/bucketmapjoin_negative2.q.out (working copy) @@ -59,7 +59,8 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF srcbucket_mapjoin a) (TOK_TABREF srcbucket_mapjoin_part_2 b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB bucketmapjoin_tmp_result)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))))) STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-7 is a root stage + Stage-1 depends on stages: Stage-7 Stage-5 depends on stages: Stage-1 , consists of Stage-4, Stage-3 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3 @@ -67,6 +68,30 @@ Stage-3 STAGE PLANS: + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + b + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + b + TableScan + alias: b + GatherStats: false + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col5 + Position of Big Table: 0 + Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: @@ -107,9 +132,9 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-01-50_250_8645820883627828082/-ext-10002 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-54-35_414_8962621251239618733/-ext-10002 NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-01-50_250_8645820883627828082/-ext-10000/ + Stats Publishing Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-54-35_414_8962621251239618733/-ext-10000/ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -119,12 +144,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284505310 + transient_lastDdlTime 1287176075 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -132,76 +157,11 @@ MultiFileSpray: false Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - GatherStats: false - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {value} - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - outputColumnNames: _col0, _col1, _col5 - Position of Big Table: 0 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col5 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-01-50_250_8645820883627828082/-ext-10002 - NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-01-50_250_8645820883627828082/-ext-10000/ - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value1,value2 - columns.types string:string:string - file.inputformat org.apache.hadoop.mapred.TextInputFormat - file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result - name bucketmapjoin_tmp_result - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284505310 - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin [a] + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin [a] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin Partition base file name: srcbucket_mapjoin input format: org.apache.hadoop.mapred.TextInputFormat @@ -213,12 +173,12 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin name srcbucket_mapjoin serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284505305 + transient_lastDdlTime 1287176070 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -230,12 +190,12 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin name srcbucket_mapjoin serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284505305 + transient_lastDdlTime 1287176070 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: srcbucket_mapjoin name: srcbucket_mapjoin @@ -247,14 +207,14 @@ Move Operator files: hdfs directory: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-01-50_250_8645820883627828082/-ext-10002 - destination: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-01-50_250_8645820883627828082/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-54-35_414_8962621251239618733/-ext-10002 + destination: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-54-35_414_8962621251239618733/-ext-10000 Stage: Stage-0 Move Operator tables: replace: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-01-50_250_8645820883627828082/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-54-35_414_8962621251239618733/-ext-10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -264,28 +224,28 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284505310 + transient_lastDdlTime 1287176075 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result - tmp directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-01-50_250_8645820883627828082/-ext-10001 + tmp directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-54-35_414_8962621251239618733/-ext-10001 Stage: Stage-2 Stats-Aggr Operator - Stats Aggregation Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-01-50_250_8645820883627828082/-ext-10000/ + Stats Aggregation Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-54-35_414_8962621251239618733/-ext-10000/ Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-01-50_250_8645820883627828082/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-54-35_414_8962621251239618733/-ext-10002 File Output Operator compressed: false GlobalTableId: 0 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-01-50_250_8645820883627828082/-ext-10000 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-54-35_414_8962621251239618733/-ext-10000 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -296,12 +256,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284505310 + transient_lastDdlTime 1287176075 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -309,9 +269,9 @@ MultiFileSpray: false Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-01-50_250_8645820883627828082/-ext-10002 [pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-01-50_250_8645820883627828082/-ext-10002] + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-54-35_414_8962621251239618733/-ext-10002 [pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-54-35_414_8962621251239618733/-ext-10002] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-01-50_250_8645820883627828082/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_13-54-35_414_8962621251239618733/-ext-10002 Partition base file name: -ext-10002 input format: org.apache.hadoop.mapred.TextInputFormat @@ -322,12 +282,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284505310 + transient_lastDdlTime 1287176075 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -338,12 +298,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284505310 + transient_lastDdlTime 1287176075 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result name: bucketmapjoin_tmp_result Index: ql/src/test/results/clientpositive/join25.q.out =================================================================== --- ql/src/test/results/clientpositive/join25.q.out (revision 1023076) +++ ql/src/test/results/clientpositive/join25.q.out (working copy) @@ -17,7 +17,8 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF src1 x) (TOK_TABREF src y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB dest_j1)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value))))) STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-7 is a root stage + Stage-1 depends on stages: Stage-7 Stage-5 depends on stages: Stage-1 , consists of Stage-4, Stage-3 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3 @@ -25,6 +26,29 @@ Stage-3 STAGE PLANS: + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + x + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + x + TableScan + alias: x + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col5 + Position of Big Table: 1 + Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: @@ -80,61 +104,6 @@ name: dest_j1 Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - x - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - x - TableScan - alias: x - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {value} - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - outputColumnNames: _col0, _col1, _col5 - Position of Big Table: 1 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col5 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string - expr: _col2 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: dest_j1 Stage: Stage-5 Conditional Operator @@ -143,7 +112,7 @@ Move Operator files: hdfs directory: true - destination: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-38-36_073_5985571747516774307/-ext-10000 + destination: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-31-18_284_5431009683335318359/-ext-10000 Stage: Stage-0 Move Operator @@ -161,7 +130,7 @@ Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-38-36_073_5985571747516774307/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-31-18_284_5431009683335318359/-ext-10002 File Output Operator compressed: false GlobalTableId: 0 @@ -192,11 +161,11 @@ PREHOOK: query: select * from dest_j1 x order by x.key PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-38-43_960_1122058681238312225/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_14-31-25_895_7020388466103281499/-mr-10000 POSTHOOK: query: select * from dest_j1 x order by x.key POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_j1 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-38-43_960_1122058681238312225/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_14-31-25_895_7020388466103281499/-mr-10000 POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src1)x.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] Index: ql/src/test/results/clientpositive/join26.q.out =================================================================== --- ql/src/test/results/clientpositive/join26.q.out (revision 1023076) +++ ql/src/test/results/clientpositive/join26.q.out (working copy) @@ -19,7 +19,8 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF src1 x) (TOK_TABREF src y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key))) (TOK_TABREF srcpart z) (and (and (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL z) key)) (= (. (TOK_TABLE_OR_COL z) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL z) hr) 11)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB dest_j1)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x y))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value))))) STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-8 is a root stage + Stage-1 depends on stages: Stage-8 Stage-5 depends on stages: Stage-1 , consists of Stage-4, Stage-3 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3 @@ -27,6 +28,55 @@ Stage-3 STAGE PLANS: + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + x + Fetch Operator + limit: -1 + y + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + x + TableScan + alias: x + GatherStats: false + Common Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {key} + 1 {value} + 2 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + 2 [Column[key]] + outputColumnNames: _col0, _col5, _col9 + Position of Big Table: 2 + y + TableScan + alias: y + GatherStats: false + Common Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {key} + 1 {value} + 2 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + 2 [Column[key]] + outputColumnNames: _col0, _col5, _col9 + Position of Big Table: 2 + Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: @@ -85,9 +135,9 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-38-48_342_7099969216988377302/-ext-10002 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-31-30_153_3830217572995616164/-ext-10002 NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-38-48_342_7099969216988377302/-ext-10000/ + Stats Publishing Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-31-30_153_3830217572995616164/-ext-10000/ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -97,12 +147,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dest_j1 name dest_j1 serialization.ddl struct dest_j1 { string key, string value, string val2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284507528 + transient_lastDdlTime 1287178290 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: dest_j1 TotalFiles: 1 @@ -110,145 +160,11 @@ MultiFileSpray: false Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - x - Fetch Operator - limit: -1 - y - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - x - TableScan - alias: x - GatherStats: false - Common Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {key} - 1 {value} - 2 {value} - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] - outputColumnNames: _col0, _col5, _col9 - Position of Big Table: 2 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string - expr: _col9 - type: string - outputColumnNames: _col0, _col5, _col9 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col9 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-38-48_342_7099969216988377302/-ext-10002 - NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-38-48_342_7099969216988377302/-ext-10000/ - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:string - file.inputformat org.apache.hadoop.mapred.TextInputFormat - file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 - name dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284507528 - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: dest_j1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - y - TableScan - alias: y - GatherStats: false - Common Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {key} - 1 {value} - 2 {value} - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] - outputColumnNames: _col0, _col5, _col9 - Position of Big Table: 2 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string - expr: _col9 - type: string - outputColumnNames: _col0, _col5, _col9 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col9 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-38-48_342_7099969216988377302/-ext-10002 - NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-38-48_342_7099969216988377302/-ext-10000/ - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:string - file.inputformat org.apache.hadoop.mapred.TextInputFormat - file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 - name dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284507528 - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: dest_j1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 [z] + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 [z] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 Partition base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat @@ -262,13 +178,13 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcpart + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcpart name srcpart partition_columns ds/hr serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504421 + transient_lastDdlTime 1287175206 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -279,13 +195,13 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcpart + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcpart name srcpart partition_columns ds/hr serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504421 + transient_lastDdlTime 1287175206 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: srcpart name: srcpart @@ -297,14 +213,14 @@ Move Operator files: hdfs directory: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-38-48_342_7099969216988377302/-ext-10002 - destination: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-38-48_342_7099969216988377302/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-31-30_153_3830217572995616164/-ext-10002 + destination: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-31-30_153_3830217572995616164/-ext-10000 Stage: Stage-0 Move Operator tables: replace: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-38-48_342_7099969216988377302/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-31-30_153_3830217572995616164/-ext-10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -314,28 +230,28 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dest_j1 name dest_j1 serialization.ddl struct dest_j1 { string key, string value, string val2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284507528 + transient_lastDdlTime 1287178290 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: dest_j1 - tmp directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-38-48_342_7099969216988377302/-ext-10001 + tmp directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-31-30_153_3830217572995616164/-ext-10001 Stage: Stage-2 Stats-Aggr Operator - Stats Aggregation Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-38-48_342_7099969216988377302/-ext-10000/ + Stats Aggregation Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-31-30_153_3830217572995616164/-ext-10000/ Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-38-48_342_7099969216988377302/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-31-30_153_3830217572995616164/-ext-10002 File Output Operator compressed: false GlobalTableId: 0 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-38-48_342_7099969216988377302/-ext-10000 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-31-30_153_3830217572995616164/-ext-10000 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -346,12 +262,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dest_j1 name dest_j1 serialization.ddl struct dest_j1 { string key, string value, string val2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284507528 + transient_lastDdlTime 1287178290 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: dest_j1 TotalFiles: 1 @@ -359,9 +275,9 @@ MultiFileSpray: false Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-38-48_342_7099969216988377302/-ext-10002 [pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-38-48_342_7099969216988377302/-ext-10002] + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-31-30_153_3830217572995616164/-ext-10002 [pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-31-30_153_3830217572995616164/-ext-10002] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-38-48_342_7099969216988377302/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-31-30_153_3830217572995616164/-ext-10002 Partition base file name: -ext-10002 input format: org.apache.hadoop.mapred.TextInputFormat @@ -372,12 +288,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dest_j1 name dest_j1 serialization.ddl struct dest_j1 { string key, string value, string val2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284507528 + transient_lastDdlTime 1287178290 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -388,12 +304,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dest_j1 name dest_j1 serialization.ddl struct dest_j1 { string key, string value, string val2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284507528 + transient_lastDdlTime 1287178290 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: dest_j1 name: dest_j1 @@ -423,11 +339,11 @@ PREHOOK: query: select * from dest_j1 x order by x.key PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-38-55_538_7925205774034730305/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_14-31-39_028_7287646671067109906/-mr-10000 POSTHOOK: query: select * from dest_j1 x order by x.key POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_j1 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-38-55_538_7925205774034730305/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_14-31-39_028_7287646671067109906/-mr-10000 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] Index: ql/src/test/results/clientpositive/join27.q.out =================================================================== --- ql/src/test/results/clientpositive/join27.q.out (revision 1023076) +++ ql/src/test/results/clientpositive/join27.q.out (working copy) @@ -17,7 +17,8 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF src1 x) (TOK_TABREF src y) (= (. (TOK_TABLE_OR_COL x) value) (. (TOK_TABLE_OR_COL y) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB dest_j1)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value))))) STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-7 is a root stage + Stage-1 depends on stages: Stage-7 Stage-5 depends on stages: Stage-1 , consists of Stage-4, Stage-3 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3 @@ -25,6 +26,29 @@ Stage-3 STAGE PLANS: + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + x + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + x + TableScan + alias: x + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[value]] + 1 [Column[value]] + outputColumnNames: _col0, _col1, _col5 + Position of Big Table: 1 + Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: @@ -80,61 +104,6 @@ name: dest_j1 Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - x - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - x - TableScan - alias: x - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {value} - handleSkewJoin: false - keys: - 0 [Column[value]] - 1 [Column[value]] - outputColumnNames: _col0, _col1, _col5 - Position of Big Table: 1 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col5 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string - expr: _col2 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: dest_j1 Stage: Stage-5 Conditional Operator @@ -143,7 +112,7 @@ Move Operator files: hdfs directory: true - destination: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-38-59_915_4313549870418004995/-ext-10000 + destination: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-31-43_285_3681320153889217550/-ext-10000 Stage: Stage-0 Move Operator @@ -161,7 +130,7 @@ Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-38-59_915_4313549870418004995/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-31-43_285_3681320153889217550/-ext-10002 File Output Operator compressed: false GlobalTableId: 0 @@ -192,11 +161,11 @@ PREHOOK: query: select * from dest_j1 x order by x.key, x.value PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-39-06_541_7268389461594626671/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_14-31-50_775_1978397625792737963/-mr-10000 POSTHOOK: query: select * from dest_j1 x order by x.key, x.value POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_j1 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-39-06_541_7268389461594626671/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_14-31-50_775_1978397625792737963/-mr-10000 POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src1)x.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] Index: ql/src/test/results/clientpositive/join28.q.out =================================================================== --- ql/src/test/results/clientpositive/join28.q.out (revision 1023076) +++ ql/src/test/results/clientpositive/join28.q.out (working copy) @@ -23,7 +23,8 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF src1 x) (TOK_TABREF src y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value) value2)))) subq) (TOK_TABREF srcpart z) (and (and (= (. (TOK_TABLE_OR_COL subq) key1) (. (TOK_TABLE_OR_COL z) key)) (= (. (TOK_TABLE_OR_COL z) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL z) hr) 11)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB dest_j1)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST z))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) key1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) value))))) STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-8 is a root stage + Stage-1 depends on stages: Stage-8 Stage-5 depends on stages: Stage-1 , consists of Stage-4, Stage-3 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3 @@ -31,6 +32,59 @@ Stage-3 STAGE PLANS: + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + subq:x + Fetch Operator + limit: -1 + z + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + subq:x + TableScan + alias: x + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 1 + z + TableScan + alias: z + Filter Operator + predicate: + expr: ((ds = '2008-04-08') and (hr = 11)) + type: boolean + Filter Operator + predicate: + expr: (ds = '2008-04-08') + type: boolean + Filter Operator + predicate: + expr: (hr = 11) + type: boolean + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[key]] + outputColumnNames: _col0, _col5 + Position of Big Table: 0 + Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: @@ -95,122 +149,6 @@ name: dest_j1 Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - subq:x - Fetch Operator - limit: -1 - z - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - subq:x - TableScan - alias: x - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - outputColumnNames: _col0 - Position of Big Table: 1 - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} - 1 {value} - handleSkewJoin: false - keys: - 0 [Column[_col0]] - 1 [Column[key]] - outputColumnNames: _col0, _col5 - Position of Big Table: 0 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col5 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: dest_j1 - z - TableScan - alias: z - Filter Operator - predicate: - expr: ((ds = '2008-04-08') and (hr = 11)) - type: boolean - Filter Operator - predicate: - expr: (ds = '2008-04-08') - type: boolean - Filter Operator - predicate: - expr: (hr = 11) - type: boolean - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} - 1 {value} - handleSkewJoin: false - keys: - 0 [Column[_col0]] - 1 [Column[key]] - outputColumnNames: _col0, _col5 - Position of Big Table: 0 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col5 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: dest_j1 Stage: Stage-5 Conditional Operator @@ -219,7 +157,7 @@ Move Operator files: hdfs directory: true - destination: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-39-10_819_1600312528863286394/-ext-10000 + destination: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-31-55_041_5240007570355394643/-ext-10000 Stage: Stage-0 Move Operator @@ -237,7 +175,7 @@ Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-39-10_819_1600312528863286394/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-31-55_041_5240007570355394643/-ext-10002 File Output Operator compressed: false GlobalTableId: 0 @@ -275,11 +213,11 @@ PREHOOK: query: select * from dest_j1 x order by x.key PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-39-19_032_8638055362103004417/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_14-32-03_549_3185919038816513700/-mr-10000 POSTHOOK: query: select * from dest_j1 x order by x.key POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_j1 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-39-19_032_8638055362103004417/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_14-32-03_549_3185919038816513700/-mr-10000 POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src1)x.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] 128 val_128 Index: ql/src/test/results/clientpositive/join29.q.out =================================================================== --- ql/src/test/results/clientpositive/join29.q.out (revision 1023076) +++ ql/src/test/results/clientpositive/join29.q.out (working copy) @@ -20,7 +20,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-7 + Stage-9 depends on stages: Stage-1, Stage-7 + Stage-2 depends on stages: Stage-9 Stage-6 depends on stages: Stage-2 , consists of Stage-5, Stage-4 Stage-5 Stage-0 depends on stages: Stage-5, Stage-4 @@ -85,10 +86,31 @@ input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Stage: Stage-9 + Map Reduce Local Work + Alias -> Map Local Tables: + file:/tmp/liyintang/hive_2010-10-15_14-32-07_814_3143780899472392520/-mr-10004 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + file:/tmp/liyintang/hive_2010-10-15_14-32-07_814_3143780899472392520/-mr-10004 + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} {_col1} + 1 {_col1} + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[_col0]] + outputColumnNames: _col0, _col1, _col3 + Position of Big Table: 1 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/nzhang/hive_2010-09-14_16-39-23_363_5550537730462856534/-mr-10002 + file:/tmp/liyintang/hive_2010-10-15_14-32-07_814_3143780899472392520/-mr-10002 Common Join Operator condition map: Inner Join 0 to 1 @@ -138,59 +160,6 @@ name: dest_j1 Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - file:/tmp/nzhang/hive_2010-09-14_16-39-23_363_5550537730462856534/-mr-10004 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - file:/tmp/nzhang/hive_2010-09-14_16-39-23_363_5550537730462856534/-mr-10004 - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} {_col1} - 1 {_col1} - handleSkewJoin: false - keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - outputColumnNames: _col0, _col1, _col3 - Position of Big Table: 1 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col3 - type: bigint - outputColumnNames: _col0, _col1, _col3 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: bigint - expr: _col3 - type: bigint - outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: - expr: _col0 - type: string - expr: UDFToInteger(_col1) - type: int - expr: UDFToInteger(_col2) - type: int - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: dest_j1 Stage: Stage-6 Conditional Operator @@ -199,7 +168,7 @@ Move Operator files: hdfs directory: true - destination: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-39-23_363_5550537730462856534/-ext-10000 + destination: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-32-07_814_3143780899472392520/-ext-10000 Stage: Stage-0 Move Operator @@ -217,7 +186,7 @@ Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-39-23_363_5550537730462856534/-ext-10003 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-32-07_814_3143780899472392520/-ext-10003 File Output Operator compressed: false GlobalTableId: 0 @@ -306,11 +275,11 @@ PREHOOK: query: select * from dest_j1 x order by x.key PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-39-36_282_980898119383375655/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_14-32-22_334_4402386459290348391/-mr-10000 POSTHOOK: query: select * from dest_j1 x order by x.key POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_j1 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-39-36_282_980898119383375655/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_14-32-22_334_4402386459290348391/-mr-10000 POSTHOOK: Lineage: dest_j1.cnt1 EXPRESSION [(src1)x.null, ] POSTHOOK: Lineage: dest_j1.cnt2 EXPRESSION [(src)y.null, ] POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] Index: ql/src/test/results/clientpositive/join30.q.out =================================================================== --- ql/src/test/results/clientpositive/join30.q.out (revision 1023076) +++ ql/src/test/results/clientpositive/join30.q.out (working copy) @@ -15,12 +15,36 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF src1 x) (TOK_TABREF src y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB dest_j1)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (TOK_FUNCTION count 1))) (TOK_GROUPBY (. (TOK_TABLE_OR_COL x) key)))) STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-5 is a root stage + Stage-1 depends on stages: Stage-5 Stage-2 depends on stages: Stage-1 Stage-0 depends on stages: Stage-2 Stage-3 depends on stages: Stage-0 STAGE PLANS: + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: + x + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + x + TableScan + alias: x + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 1 + Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: @@ -47,37 +71,11 @@ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - x - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - x - TableScan - alias: x - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - outputColumnNames: _col0 - Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/nzhang/hive_2010-09-14_16-39-49_545_7124720180109288751/-mr-10002 + file:/tmp/liyintang/hive_2010-10-15_14-32-34_888_4407278154932585093/-mr-10002 Select Operator expressions: expr: _col0 @@ -173,11 +171,11 @@ PREHOOK: query: select * from dest_j1 x order by x.key PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-39-59_225_1677785212354840326/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_14-32-46_431_2542622721154091589/-mr-10000 POSTHOOK: query: select * from dest_j1 x order by x.key POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_j1 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-39-59_225_1677785212354840326/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_14-32-46_431_2542622721154091589/-mr-10000 POSTHOOK: Lineage: dest_j1.cnt EXPRESSION [(src1)x.null, (src)y.null, ] POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src1)x.FieldSchema(name:key, type:string, comment:default), ] 66 1 Index: ql/src/test/results/clientpositive/join31.q.out =================================================================== --- ql/src/test/results/clientpositive/join31.q.out (revision 1023076) +++ ql/src/test/results/clientpositive/join31.q.out (working copy) @@ -22,7 +22,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-5 + Stage-7 depends on stages: Stage-1, Stage-5 + Stage-2 depends on stages: Stage-7 Stage-3 depends on stages: Stage-2 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 @@ -83,10 +84,31 @@ input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + file:/tmp/liyintang/hive_2010-10-15_14-32-50_666_6877978110782114347/-mr-10004 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + file:/tmp/liyintang/hive_2010-10-15_14-32-50_666_6877978110782114347/-mr-10004 + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[_col0]] + outputColumnNames: _col0 + Position of Big Table: 1 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/nzhang/hive_2010-09-14_16-40-03_429_6963096500471959468/-mr-10002 + file:/tmp/liyintang/hive_2010-10-15_14-32-50_666_6877978110782114347/-mr-10002 Common Join Operator condition map: Inner Join 0 to 1 @@ -107,35 +129,11 @@ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - file:/tmp/nzhang/hive_2010-09-14_16-40-03_429_6963096500471959468/-mr-10004 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - file:/tmp/nzhang/hive_2010-09-14_16-40-03_429_6963096500471959468/-mr-10004 - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} - 1 - handleSkewJoin: false - keys: - 0 [Column[_col0]] - 1 [Column[_col0]] - outputColumnNames: _col0 - Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: - file:/tmp/nzhang/hive_2010-09-14_16-40-03_429_6963096500471959468/-mr-10003 + file:/tmp/liyintang/hive_2010-10-15_14-32-50_666_6877978110782114347/-mr-10003 Select Operator expressions: expr: _col0 @@ -291,11 +289,11 @@ PREHOOK: query: select * from dest_j1 x order by x.key PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-40-20_215_8713512212643872558/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_14-33-08_831_2955053647120281072/-mr-10000 POSTHOOK: query: select * from dest_j1 x order by x.key POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_j1 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-40-20_215_8713512212643872558/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_14-33-08_831_2955053647120281072/-mr-10000 POSTHOOK: Lineage: dest_j1.cnt EXPRESSION [(src1)x.null, (src)y.null, ] POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] 128 1 Index: ql/src/test/results/clientpositive/join32.q.out =================================================================== --- ql/src/test/results/clientpositive/join32.q.out (revision 1023076) +++ ql/src/test/results/clientpositive/join32.q.out (working copy) @@ -19,8 +19,10 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF src1 x) (TOK_TABREF src y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key))) (TOK_TABREF srcpart z) (and (and (= (. (TOK_TABLE_OR_COL x) value) (. (TOK_TABLE_OR_COL z) value)) (= (. (TOK_TABLE_OR_COL z) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL z) hr) 11)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB dest_j1)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x z))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value))))) STAGE DEPENDENCIES: - Stage-6 is a root stage - Stage-1 depends on stages: Stage-6 + Stage-10 is a root stage + Stage-6 depends on stages: Stage-10 + Stage-9 depends on stages: Stage-6 + Stage-1 depends on stages: Stage-9 Stage-5 depends on stages: Stage-1 , consists of Stage-4, Stage-3 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3 @@ -28,6 +30,30 @@ Stage-3 STAGE PLANS: + Stage: Stage-10 + Map Reduce Local Work + Alias -> Map Local Tables: + x + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + x + TableScan + alias: x + GatherStats: false + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col5 + Position of Big Table: 1 + Stage: Stage-6 Map Reduce Alias -> Map Operator Tree: @@ -50,7 +76,7 @@ File Output Operator compressed: false GlobalTableId: 0 - directory: file:/tmp/nzhang/hive_2010-09-14_16-40-24_670_614734799076435404/-mr-10003 + directory: file:/tmp/liyintang/hive_2010-10-15_14-33-13_100_2523244936110597735/-mr-10003 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -64,47 +90,11 @@ MultiFileSpray: false Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - x - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - x - TableScan - alias: x - GatherStats: false - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {value} - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - outputColumnNames: _col0, _col1, _col5 - Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 - directory: file:/tmp/nzhang/hive_2010-09-14_16-40-24_670_614734799076435404/-mr-10003 - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col5 - columns.types string,string,string - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/src [y] + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/src [y] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/src + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/src Partition base file name: src input format: org.apache.hadoop.mapred.TextInputFormat @@ -115,12 +105,12 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/src + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/src name src serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504429 + transient_lastDdlTime 1287175215 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -131,20 +121,59 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/src + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/src name src serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504429 + transient_lastDdlTime 1287175215 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: src name: src + Stage: Stage-9 + Map Reduce Local Work + Alias -> Map Local Tables: + z + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + z + TableScan + alias: z + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: ((ds = '2008-04-08') and (hr = 11)) + type: boolean + Filter Operator + isSamplingPred: false + predicate: + expr: (ds = '2008-04-08') + type: boolean + Filter Operator + isSamplingPred: false + predicate: + expr: (hr = 11) + type: boolean + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col5} {_col0} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[_col1]] + 1 [Column[value]] + outputColumnNames: _col1, _col4, _col9 + Position of Big Table: 0 + Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: - file:/tmp/nzhang/hive_2010-09-14_16-40-24_670_614734799076435404/-mr-10003 + file:/tmp/liyintang/hive_2010-10-15_14-33-13_100_2523244936110597735/-mr-10003 Select Operator expressions: expr: _col0 @@ -187,9 +216,9 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-40-24_670_614734799076435404/-ext-10002 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-13_100_2523244936110597735/-ext-10002 NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-40-24_670_614734799076435404/-ext-10000/ + Stats Publishing Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-13_100_2523244936110597735/-ext-10000/ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -199,12 +228,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dest_j1 name dest_j1 serialization.ddl struct dest_j1 { string key, string value, string val2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284507624 + transient_lastDdlTime 1287178393 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: dest_j1 TotalFiles: 1 @@ -212,91 +241,11 @@ MultiFileSpray: false Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - z - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - z - TableScan - alias: z - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: - expr: ((ds = '2008-04-08') and (hr = 11)) - type: boolean - Filter Operator - isSamplingPred: false - predicate: - expr: (ds = '2008-04-08') - type: boolean - Filter Operator - isSamplingPred: false - predicate: - expr: (hr = 11) - type: boolean - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col5} {_col0} - 1 {value} - handleSkewJoin: false - keys: - 0 [Column[_col1]] - 1 [Column[value]] - outputColumnNames: _col1, _col4, _col9 - Position of Big Table: 0 - Select Operator - expressions: - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col9 - type: string - outputColumnNames: _col1, _col4, _col9 - Select Operator - expressions: - expr: _col4 - type: string - expr: _col9 - type: string - expr: _col1 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-40-24_670_614734799076435404/-ext-10002 - NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-40-24_670_614734799076435404/-ext-10000/ - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:string - file.inputformat org.apache.hadoop.mapred.TextInputFormat - file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 - name dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284507624 - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: dest_j1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false Needs Tagging: false Path -> Alias: - file:/tmp/nzhang/hive_2010-09-14_16-40-24_670_614734799076435404/-mr-10003 [file:/tmp/nzhang/hive_2010-09-14_16-40-24_670_614734799076435404/-mr-10003] + file:/tmp/liyintang/hive_2010-10-15_14-33-13_100_2523244936110597735/-mr-10003 [file:/tmp/liyintang/hive_2010-10-15_14-33-13_100_2523244936110597735/-mr-10003] Path -> Partition: - file:/tmp/nzhang/hive_2010-09-14_16-40-24_670_614734799076435404/-mr-10003 + file:/tmp/liyintang/hive_2010-10-15_14-33-13_100_2523244936110597735/-mr-10003 Partition base file name: -mr-10003 input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -320,14 +269,14 @@ Move Operator files: hdfs directory: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-40-24_670_614734799076435404/-ext-10002 - destination: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-40-24_670_614734799076435404/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-13_100_2523244936110597735/-ext-10002 + destination: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-13_100_2523244936110597735/-ext-10000 Stage: Stage-0 Move Operator tables: replace: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-40-24_670_614734799076435404/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-13_100_2523244936110597735/-ext-10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -337,28 +286,28 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dest_j1 name dest_j1 serialization.ddl struct dest_j1 { string key, string value, string val2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284507624 + transient_lastDdlTime 1287178393 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: dest_j1 - tmp directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-40-24_670_614734799076435404/-ext-10001 + tmp directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-13_100_2523244936110597735/-ext-10001 Stage: Stage-2 Stats-Aggr Operator - Stats Aggregation Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-40-24_670_614734799076435404/-ext-10000/ + Stats Aggregation Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-13_100_2523244936110597735/-ext-10000/ Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-40-24_670_614734799076435404/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-13_100_2523244936110597735/-ext-10002 File Output Operator compressed: false GlobalTableId: 0 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-40-24_670_614734799076435404/-ext-10000 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-13_100_2523244936110597735/-ext-10000 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -369,12 +318,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dest_j1 name dest_j1 serialization.ddl struct dest_j1 { string key, string value, string val2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284507624 + transient_lastDdlTime 1287178393 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: dest_j1 TotalFiles: 1 @@ -382,9 +331,9 @@ MultiFileSpray: false Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-40-24_670_614734799076435404/-ext-10002 [pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-40-24_670_614734799076435404/-ext-10002] + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-13_100_2523244936110597735/-ext-10002 [pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-13_100_2523244936110597735/-ext-10002] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-40-24_670_614734799076435404/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-13_100_2523244936110597735/-ext-10002 Partition base file name: -ext-10002 input format: org.apache.hadoop.mapred.TextInputFormat @@ -395,12 +344,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dest_j1 name dest_j1 serialization.ddl struct dest_j1 { string key, string value, string val2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284507624 + transient_lastDdlTime 1287178393 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -411,12 +360,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dest_j1 name dest_j1 serialization.ddl struct dest_j1 { string key, string value, string val2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284507624 + transient_lastDdlTime 1287178393 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: dest_j1 name: dest_j1 @@ -446,11 +395,11 @@ PREHOOK: query: select * from dest_j1 x order by x.key PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-40-34_819_6730779352355044928/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_14-33-24_814_6081031942704322290/-mr-10000 POSTHOOK: query: select * from dest_j1 x order by x.key POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_j1 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-40-34_819_6730779352355044928/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_14-33-24_814_6081031942704322290/-mr-10000 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] Index: ql/src/test/results/clientpositive/join33.q.out =================================================================== --- ql/src/test/results/clientpositive/join33.q.out (revision 1023076) +++ ql/src/test/results/clientpositive/join33.q.out (working copy) @@ -19,12 +19,37 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF src1 x) (TOK_TABREF src y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key))) (TOK_TABREF srcpart z) (and (and (= (. (TOK_TABLE_OR_COL x) value) (. (TOK_TABLE_OR_COL z) value)) (= (. (TOK_TABLE_OR_COL z) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL z) hr) 11)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB dest_j1)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value))))) STAGE DEPENDENCIES: - Stage-3 is a root stage + Stage-6 is a root stage + Stage-3 depends on stages: Stage-6 Stage-1 depends on stages: Stage-3 Stage-0 depends on stages: Stage-1 Stage-2 depends on stages: Stage-0 STAGE PLANS: + Stage: Stage-6 + Map Reduce Local Work + Alias -> Map Local Tables: + x + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + x + TableScan + alias: x + GatherStats: false + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col5 + Position of Big Table: 1 + Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: @@ -47,7 +72,7 @@ File Output Operator compressed: false GlobalTableId: 0 - directory: file:/tmp/nzhang/hive_2010-09-14_16-40-39_182_1358797628502389193/-mr-10002 + directory: file:/tmp/liyintang/hive_2010-10-15_14-33-29_034_439499547329430891/-mr-10002 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -61,47 +86,11 @@ MultiFileSpray: false Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - x - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - x - TableScan - alias: x - GatherStats: false - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {value} - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - outputColumnNames: _col0, _col1, _col5 - Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 - directory: file:/tmp/nzhang/hive_2010-09-14_16-40-39_182_1358797628502389193/-mr-10002 - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col5 - columns.types string,string,string - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/src [y] + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/src [y] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/src + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/src Partition base file name: src input format: org.apache.hadoop.mapred.TextInputFormat @@ -112,12 +101,12 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/src + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/src name src serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504429 + transient_lastDdlTime 1287175215 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -128,12 +117,12 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/src + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/src name src serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504429 + transient_lastDdlTime 1287175215 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: src name: src @@ -141,7 +130,7 @@ Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: - file:/tmp/nzhang/hive_2010-09-14_16-40-39_182_1358797628502389193/-mr-10002 + file:/tmp/liyintang/hive_2010-10-15_14-33-29_034_439499547329430891/-mr-10002 Select Operator expressions: expr: _col0 @@ -198,10 +187,10 @@ type: string Needs Tagging: true Path -> Alias: - file:/tmp/nzhang/hive_2010-09-14_16-40-39_182_1358797628502389193/-mr-10002 [file:/tmp/nzhang/hive_2010-09-14_16-40-39_182_1358797628502389193/-mr-10002] - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 [z] + file:/tmp/liyintang/hive_2010-10-15_14-33-29_034_439499547329430891/-mr-10002 [file:/tmp/liyintang/hive_2010-10-15_14-33-29_034_439499547329430891/-mr-10002] + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 [z] Path -> Partition: - file:/tmp/nzhang/hive_2010-09-14_16-40-39_182_1358797628502389193/-mr-10002 + file:/tmp/liyintang/hive_2010-10-15_14-33-29_034_439499547329430891/-mr-10002 Partition base file name: -mr-10002 input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -217,7 +206,7 @@ columns _col0,_col1,_col5 columns.types string,string,string escape.delim \ - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 Partition base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat @@ -231,13 +220,13 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcpart + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcpart name srcpart partition_columns ds/hr serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504421 + transient_lastDdlTime 1287175206 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -248,13 +237,13 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcpart + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcpart name srcpart partition_columns ds/hr serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504421 + transient_lastDdlTime 1287175206 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: srcpart name: srcpart @@ -279,9 +268,9 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-40-39_182_1358797628502389193/-ext-10000 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-29_034_439499547329430891/-ext-10000 NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-40-39_182_1358797628502389193/-ext-10000/ + Stats Publishing Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-29_034_439499547329430891/-ext-10000/ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -291,12 +280,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dest_j1 name dest_j1 serialization.ddl struct dest_j1 { string key, string value, string val2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284507639 + transient_lastDdlTime 1287178409 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: dest_j1 TotalFiles: 1 @@ -307,7 +296,7 @@ Move Operator tables: replace: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-40-39_182_1358797628502389193/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-29_034_439499547329430891/-ext-10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -317,19 +306,19 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dest_j1 name dest_j1 serialization.ddl struct dest_j1 { string key, string value, string val2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284507639 + transient_lastDdlTime 1287178409 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: dest_j1 - tmp directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-40-39_182_1358797628502389193/-ext-10001 + tmp directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-29_034_439499547329430891/-ext-10001 Stage: Stage-2 Stats-Aggr Operator - Stats Aggregation Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-40-39_182_1358797628502389193/-ext-10000/ + Stats Aggregation Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-29_034_439499547329430891/-ext-10000/ PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 @@ -356,11 +345,11 @@ PREHOOK: query: select * from dest_j1 x order by x.key PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-40-49_500_7236030535168569116/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_14-33-40_799_4171231384741458714/-mr-10000 POSTHOOK: query: select * from dest_j1 x order by x.key POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_j1 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-40-49_500_7236030535168569116/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_14-33-40_799_4171231384741458714/-mr-10000 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)y.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] Index: ql/src/test/results/clientpositive/join34.q.out =================================================================== --- ql/src/test/results/clientpositive/join34.q.out (revision 1023076) +++ ql/src/test/results/clientpositive/join34.q.out (working copy) @@ -27,7 +27,8 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF src x)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value)) (TOK_WHERE (< (. (TOK_TABLE_OR_COL x) key) 20)))) (TOK_QUERY (TOK_FROM (TOK_TABREF src x1)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) key) key) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x1) value) value)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL x1) key) 100))))) subq1) (TOK_TABREF src1 x) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL subq1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB dest_j1)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq1) value))))) STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-8 is a root stage + Stage-1 depends on stages: Stage-8 Stage-5 depends on stages: Stage-1 , consists of Stage-4, Stage-3 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3 @@ -35,6 +36,30 @@ Stage-3 STAGE PLANS: + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + x + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + x + TableScan + alias: x + GatherStats: false + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col1} + 1 {key} {value} + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[key]] + outputColumnNames: _col1, _col2, _col3 + Position of Big Table: 0 + Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: @@ -93,9 +118,9 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-40-53_747_9048108600140235824/-ext-10002 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-45_048_2682927428088725309/-ext-10002 NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-40-53_747_9048108600140235824/-ext-10000/ + Stats Publishing Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-45_048_2682927428088725309/-ext-10000/ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -105,12 +130,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dest_j1 name dest_j1 serialization.ddl struct dest_j1 { string key, string value, string val2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284507653 + transient_lastDdlTime 1287178425 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: dest_j1 TotalFiles: 1 @@ -171,9 +196,9 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-40-53_747_9048108600140235824/-ext-10002 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-45_048_2682927428088725309/-ext-10002 NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-40-53_747_9048108600140235824/-ext-10000/ + Stats Publishing Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-45_048_2682927428088725309/-ext-10000/ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -183,12 +208,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dest_j1 name dest_j1 serialization.ddl struct dest_j1 { string key, string value, string val2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284507653 + transient_lastDdlTime 1287178425 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: dest_j1 TotalFiles: 1 @@ -196,76 +221,11 @@ MultiFileSpray: false Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - x - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - x - TableScan - alias: x - GatherStats: false - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col1} - 1 {key} {value} - handleSkewJoin: false - keys: - 0 [Column[_col0]] - 1 [Column[key]] - outputColumnNames: _col1, _col2, _col3 - Position of Big Table: 0 - Select Operator - expressions: - expr: _col1 - type: string - expr: _col2 - type: string - expr: _col3 - type: string - outputColumnNames: _col1, _col2, _col3 - Select Operator - expressions: - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col1 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-40-53_747_9048108600140235824/-ext-10002 - NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-40-53_747_9048108600140235824/-ext-10000/ - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:string - file.inputformat org.apache.hadoop.mapred.TextInputFormat - file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 - name dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284507653 - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: dest_j1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/src [null-subquery1:subq1-subquery1:x, null-subquery2:subq1-subquery2:x1] + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/src [null-subquery1:subq1-subquery1:x, null-subquery2:subq1-subquery2:x1] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/src + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/src Partition base file name: src input format: org.apache.hadoop.mapred.TextInputFormat @@ -276,12 +236,12 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/src + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/src name src serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504429 + transient_lastDdlTime 1287175215 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -292,12 +252,12 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/src + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/src name src serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504429 + transient_lastDdlTime 1287175215 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: src name: src @@ -309,14 +269,14 @@ Move Operator files: hdfs directory: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-40-53_747_9048108600140235824/-ext-10002 - destination: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-40-53_747_9048108600140235824/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-45_048_2682927428088725309/-ext-10002 + destination: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-45_048_2682927428088725309/-ext-10000 Stage: Stage-0 Move Operator tables: replace: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-40-53_747_9048108600140235824/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-45_048_2682927428088725309/-ext-10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -326,28 +286,28 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dest_j1 name dest_j1 serialization.ddl struct dest_j1 { string key, string value, string val2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284507653 + transient_lastDdlTime 1287178425 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: dest_j1 - tmp directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-40-53_747_9048108600140235824/-ext-10001 + tmp directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-45_048_2682927428088725309/-ext-10001 Stage: Stage-2 Stats-Aggr Operator - Stats Aggregation Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-40-53_747_9048108600140235824/-ext-10000/ + Stats Aggregation Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-45_048_2682927428088725309/-ext-10000/ Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-40-53_747_9048108600140235824/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-45_048_2682927428088725309/-ext-10002 File Output Operator compressed: false GlobalTableId: 0 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-40-53_747_9048108600140235824/-ext-10000 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-45_048_2682927428088725309/-ext-10000 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -358,12 +318,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dest_j1 name dest_j1 serialization.ddl struct dest_j1 { string key, string value, string val2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284507653 + transient_lastDdlTime 1287178425 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: dest_j1 TotalFiles: 1 @@ -371,9 +331,9 @@ MultiFileSpray: false Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-40-53_747_9048108600140235824/-ext-10002 [pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-40-53_747_9048108600140235824/-ext-10002] + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-45_048_2682927428088725309/-ext-10002 [pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-45_048_2682927428088725309/-ext-10002] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-40-53_747_9048108600140235824/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-45_048_2682927428088725309/-ext-10002 Partition base file name: -ext-10002 input format: org.apache.hadoop.mapred.TextInputFormat @@ -384,12 +344,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dest_j1 name dest_j1 serialization.ddl struct dest_j1 { string key, string value, string val2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284507653 + transient_lastDdlTime 1287178425 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -400,12 +360,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dest_j1 name dest_j1 serialization.ddl struct dest_j1 { string key, string value, string val2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284507653 + transient_lastDdlTime 1287178425 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: dest_j1 name: dest_j1 @@ -441,11 +401,11 @@ PREHOOK: query: select * from dest_j1 x order by x.key PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-41-01_352_8712511675304640347/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_14-33-52_692_2534487245950966765/-mr-10000 POSTHOOK: query: select * from dest_j1 x order by x.key POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_j1 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-41-01_352_8712511675304640347/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_14-33-52_692_2534487245950966765/-mr-10000 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)x.FieldSchema(name:value, type:string, comment:default), (src)x1.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] Index: ql/src/test/results/clientpositive/join35.q.out =================================================================== --- ql/src/test/results/clientpositive/join35.q.out (revision 1023076) +++ ql/src/test/results/clientpositive/join35.q.out (working copy) @@ -28,7 +28,8 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1, Stage-7 + Stage-9 depends on stages: Stage-1, Stage-7 + Stage-2 depends on stages: Stage-9 Stage-6 depends on stages: Stage-2 , consists of Stage-5, Stage-4 Stage-5 Stage-0 depends on stages: Stage-5, Stage-4 @@ -82,9 +83,9 @@ type: bigint Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/src [null-subquery1:subq1-subquery1:x] + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/src [null-subquery1:subq1-subquery1:x] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/src + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/src Partition base file name: src input format: org.apache.hadoop.mapred.TextInputFormat @@ -95,12 +96,12 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/src + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/src name src serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504429 + transient_lastDdlTime 1287175215 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -111,12 +112,12 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/src + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/src name src serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504429 + transient_lastDdlTime 1287175215 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: src name: src @@ -140,7 +141,7 @@ File Output Operator compressed: false GlobalTableId: 0 - directory: file:/tmp/nzhang/hive_2010-09-14_16-41-06_235_1465217120659994288/-mr-10002 + directory: file:/tmp/liyintang/hive_2010-10-15_14-33-56_901_5834690759021421387/-mr-10002 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -153,10 +154,34 @@ GatherStats: false MultiFileSpray: false + Stage: Stage-9 + Map Reduce Local Work + Alias -> Map Local Tables: + x + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + x + TableScan + alias: x + GatherStats: false + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col1} + 1 {key} {value} + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[key]] + outputColumnNames: _col1, _col2, _col3 + Position of Big Table: 0 + Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/nzhang/hive_2010-09-14_16-41-06_235_1465217120659994288/-mr-10002 + file:/tmp/liyintang/hive_2010-10-15_14-33-56_901_5834690759021421387/-mr-10002 Union Common Join Operator condition map: @@ -200,9 +225,9 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-41-06_235_1465217120659994288/-ext-10003 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-56_901_5834690759021421387/-ext-10003 NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-41-06_235_1465217120659994288/-ext-10000/ + Stats Publishing Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-56_901_5834690759021421387/-ext-10000/ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -212,18 +237,18 @@ columns.types string:string:int file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dest_j1 name dest_j1 serialization.ddl struct dest_j1 { string key, string value, i32 val2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284507666 + transient_lastDdlTime 1287178436 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: dest_j1 TotalFiles: 1 GatherStats: true MultiFileSpray: false - file:/tmp/nzhang/hive_2010-09-14_16-41-06_235_1465217120659994288/-mr-10004 + file:/tmp/liyintang/hive_2010-10-15_14-33-56_901_5834690759021421387/-mr-10004 Union Common Join Operator condition map: @@ -267,9 +292,9 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-41-06_235_1465217120659994288/-ext-10003 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-56_901_5834690759021421387/-ext-10003 NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-41-06_235_1465217120659994288/-ext-10000/ + Stats Publishing Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-56_901_5834690759021421387/-ext-10000/ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -279,12 +304,12 @@ columns.types string:string:int file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dest_j1 name dest_j1 serialization.ddl struct dest_j1 { string key, string value, i32 val2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284507666 + transient_lastDdlTime 1287178436 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: dest_j1 TotalFiles: 1 @@ -292,86 +317,12 @@ MultiFileSpray: false Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - x - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - x - TableScan - alias: x - GatherStats: false - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col1} - 1 {key} {value} - handleSkewJoin: false - keys: - 0 [Column[_col0]] - 1 [Column[key]] - outputColumnNames: _col1, _col2, _col3 - Position of Big Table: 0 - Select Operator - expressions: - expr: _col1 - type: bigint - expr: _col2 - type: string - expr: _col3 - type: string - outputColumnNames: _col1, _col2, _col3 - Select Operator - expressions: - expr: _col2 - type: string - expr: _col3 - type: string - expr: _col1 - type: bigint - outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: UDFToInteger(_col2) - type: int - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-41-06_235_1465217120659994288/-ext-10003 - NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-41-06_235_1465217120659994288/-ext-10000/ - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:int - file.inputformat org.apache.hadoop.mapred.TextInputFormat - file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 - name dest_j1 - serialization.ddl struct dest_j1 { string key, string value, i32 val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284507666 - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: dest_j1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false Needs Tagging: false Path -> Alias: - file:/tmp/nzhang/hive_2010-09-14_16-41-06_235_1465217120659994288/-mr-10002 [file:/tmp/nzhang/hive_2010-09-14_16-41-06_235_1465217120659994288/-mr-10002] - file:/tmp/nzhang/hive_2010-09-14_16-41-06_235_1465217120659994288/-mr-10004 [file:/tmp/nzhang/hive_2010-09-14_16-41-06_235_1465217120659994288/-mr-10004] + file:/tmp/liyintang/hive_2010-10-15_14-33-56_901_5834690759021421387/-mr-10002 [file:/tmp/liyintang/hive_2010-10-15_14-33-56_901_5834690759021421387/-mr-10002] + file:/tmp/liyintang/hive_2010-10-15_14-33-56_901_5834690759021421387/-mr-10004 [file:/tmp/liyintang/hive_2010-10-15_14-33-56_901_5834690759021421387/-mr-10004] Path -> Partition: - file:/tmp/nzhang/hive_2010-09-14_16-41-06_235_1465217120659994288/-mr-10002 + file:/tmp/liyintang/hive_2010-10-15_14-33-56_901_5834690759021421387/-mr-10002 Partition base file name: -mr-10002 input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -387,7 +338,7 @@ columns _col0,_col1 columns.types string,bigint escape.delim \ - file:/tmp/nzhang/hive_2010-09-14_16-41-06_235_1465217120659994288/-mr-10004 + file:/tmp/liyintang/hive_2010-10-15_14-33-56_901_5834690759021421387/-mr-10004 Partition base file name: -mr-10004 input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -411,14 +362,14 @@ Move Operator files: hdfs directory: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-41-06_235_1465217120659994288/-ext-10003 - destination: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-41-06_235_1465217120659994288/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-56_901_5834690759021421387/-ext-10003 + destination: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-56_901_5834690759021421387/-ext-10000 Stage: Stage-0 Move Operator tables: replace: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-41-06_235_1465217120659994288/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-56_901_5834690759021421387/-ext-10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -428,28 +379,28 @@ columns.types string:string:int file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dest_j1 name dest_j1 serialization.ddl struct dest_j1 { string key, string value, i32 val2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284507666 + transient_lastDdlTime 1287178436 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: dest_j1 - tmp directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-41-06_235_1465217120659994288/-ext-10001 + tmp directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-56_901_5834690759021421387/-ext-10001 Stage: Stage-3 Stats-Aggr Operator - Stats Aggregation Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-41-06_235_1465217120659994288/-ext-10000/ + Stats Aggregation Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-56_901_5834690759021421387/-ext-10000/ Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-41-06_235_1465217120659994288/-ext-10003 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-56_901_5834690759021421387/-ext-10003 File Output Operator compressed: false GlobalTableId: 0 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-41-06_235_1465217120659994288/-ext-10000 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-56_901_5834690759021421387/-ext-10000 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -460,12 +411,12 @@ columns.types string:string:int file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dest_j1 name dest_j1 serialization.ddl struct dest_j1 { string key, string value, i32 val2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284507666 + transient_lastDdlTime 1287178436 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: dest_j1 TotalFiles: 1 @@ -473,9 +424,9 @@ MultiFileSpray: false Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-41-06_235_1465217120659994288/-ext-10003 [pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-41-06_235_1465217120659994288/-ext-10003] + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-56_901_5834690759021421387/-ext-10003 [pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-56_901_5834690759021421387/-ext-10003] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-41-06_235_1465217120659994288/-ext-10003 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-33-56_901_5834690759021421387/-ext-10003 Partition base file name: -ext-10003 input format: org.apache.hadoop.mapred.TextInputFormat @@ -486,12 +437,12 @@ columns.types string:string:int file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dest_j1 name dest_j1 serialization.ddl struct dest_j1 { string key, string value, i32 val2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284507666 + transient_lastDdlTime 1287178436 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -502,12 +453,12 @@ columns.types string:string:int file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dest_j1 name dest_j1 serialization.ddl struct dest_j1 { string key, string value, i32 val2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284507666 + transient_lastDdlTime 1287178436 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: dest_j1 name: dest_j1 @@ -557,9 +508,9 @@ type: bigint Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/src [null-subquery2:subq1-subquery2:x1] + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/src [null-subquery2:subq1-subquery2:x1] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/src + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/src Partition base file name: src input format: org.apache.hadoop.mapred.TextInputFormat @@ -570,12 +521,12 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/src + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/src name src serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504429 + transient_lastDdlTime 1287175215 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -586,12 +537,12 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/src + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/src name src serialization.ddl struct src { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504429 + transient_lastDdlTime 1287175215 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: src name: src @@ -615,7 +566,7 @@ File Output Operator compressed: false GlobalTableId: 0 - directory: file:/tmp/nzhang/hive_2010-09-14_16-41-06_235_1465217120659994288/-mr-10004 + directory: file:/tmp/liyintang/hive_2010-10-15_14-33-56_901_5834690759021421387/-mr-10004 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -659,11 +610,11 @@ PREHOOK: query: select * from dest_j1 x order by x.key PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-41-23_904_6990538085820162143/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_14-34-15_245_607185286374488826/-mr-10000 POSTHOOK: query: select * from dest_j1 x order by x.key POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_j1 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-41-23_904_6990538085820162143/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_14-34-15_245_607185286374488826/-mr-10000 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.val2 EXPRESSION [(src)x.null, (src)x1.null, ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] Index: ql/src/test/results/clientpositive/join36.q.out =================================================================== --- ql/src/test/results/clientpositive/join36.q.out (revision 1023076) +++ ql/src/test/results/clientpositive/join36.q.out (working copy) @@ -57,7 +57,8 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF tmp1 x) (TOK_TABREF tmp2 y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB dest_j1)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) cnt)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) cnt))))) STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-7 is a root stage + Stage-1 depends on stages: Stage-7 Stage-5 depends on stages: Stage-1 , consists of Stage-4, Stage-3 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3 @@ -65,6 +66,29 @@ Stage-3 STAGE PLANS: + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + x + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + x + TableScan + alias: x + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {cnt} + 1 {cnt} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col5 + Position of Big Table: 1 + Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: @@ -111,52 +135,6 @@ name: dest_j1 Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - x - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - x - TableScan - alias: x - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {cnt} - 1 {cnt} - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - outputColumnNames: _col0, _col1, _col5 - Position of Big Table: 1 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col5 - type: int - outputColumnNames: _col0, _col1, _col5 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: int - expr: _col5 - type: int - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: dest_j1 Stage: Stage-5 Conditional Operator @@ -165,7 +143,7 @@ Move Operator files: hdfs directory: true - destination: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-41-43_154_2888049314872131139/-ext-10000 + destination: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-34-33_684_5611117980279032391/-ext-10000 Stage: Stage-0 Move Operator @@ -183,7 +161,7 @@ Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-41-43_154_2888049314872131139/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-34-33_684_5611117980279032391/-ext-10002 File Output Operator compressed: false GlobalTableId: 0 @@ -218,11 +196,11 @@ PREHOOK: query: select * from dest_j1 x order by x.key PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-41-50_562_502275187594181738/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_14-34-41_401_2982529179982198007/-mr-10000 POSTHOOK: query: select * from dest_j1 x order by x.key POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_j1 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-41-50_562_502275187594181738/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_14-34-41_401_2982529179982198007/-mr-10000 POSTHOOK: Lineage: dest_j1.key SIMPLE [(tmp1)x.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(tmp2)y.FieldSchema(name:cnt, type:int, comment:null), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(tmp1)x.FieldSchema(name:cnt, type:int, comment:null), ] Index: ql/src/test/results/clientpositive/join37.q.out =================================================================== --- ql/src/test/results/clientpositive/join37.q.out (revision 1023076) +++ ql/src/test/results/clientpositive/join37.q.out (working copy) @@ -17,7 +17,8 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF src1 x) (TOK_TABREF src y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB dest_j1)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST X))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value))))) STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-7 is a root stage + Stage-1 depends on stages: Stage-7 Stage-5 depends on stages: Stage-1 , consists of Stage-4, Stage-3 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3 @@ -25,6 +26,29 @@ Stage-3 STAGE PLANS: + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + x + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + x + TableScan + alias: x + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col5 + Position of Big Table: 1 + Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: @@ -80,61 +104,6 @@ name: dest_j1 Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - x - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - x - TableScan - alias: x - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {value} - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - outputColumnNames: _col0, _col1, _col5 - Position of Big Table: 1 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col5 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col2 - Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string - expr: _col2 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: dest_j1 Stage: Stage-5 Conditional Operator @@ -143,7 +112,7 @@ Move Operator files: hdfs directory: true - destination: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-41-55_090_7852258381958275908/-ext-10000 + destination: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-34-45_814_1606970852322632805/-ext-10000 Stage: Stage-0 Move Operator @@ -161,7 +130,7 @@ Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-41-55_090_7852258381958275908/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-34-45_814_1606970852322632805/-ext-10002 File Output Operator compressed: false GlobalTableId: 0 @@ -192,11 +161,11 @@ PREHOOK: query: select * from dest_j1 x order by x.key PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-42-02_401_8110052503148408708/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_14-34-53_374_832161609076546752/-mr-10000 POSTHOOK: query: select * from dest_j1 x order by x.key POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_j1 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-42-02_401_8110052503148408708/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_14-34-53_374_832161609076546752/-mr-10000 POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src1)x.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(src1)x.FieldSchema(name:value, type:string, comment:default), ] Index: ql/src/test/results/clientpositive/join38.q.out =================================================================== --- ql/src/test/results/clientpositive/join38.q.out (revision 1023076) +++ ql/src/test/results/clientpositive/join38.q.out (working copy) @@ -26,11 +26,11 @@ PREHOOK: query: select * from tmp PREHOOK: type: QUERY PREHOOK: Input: default@tmp -PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-00-50_066_9191331330135572186/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_14-35-03_529_8041528917330220501/-mr-10000 POSTHOOK: query: select * from tmp POSTHOOK: type: QUERY POSTHOOK: Input: default@tmp -POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-00-50_066_9191331330135572186/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_14-35-03_529_8041528917330220501/-mr-10000 POSTHOOK: Lineage: tmp.col0 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tmp.col1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tmp.col10 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] @@ -73,11 +73,35 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF src a) (TOK_TABREF tmp b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) col11)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) col5)) (TOK_SELEXPR (TOK_FUNCTION count 1) count)) (TOK_WHERE (= (. (TOK_TABLE_OR_COL b) col11) 111)) (TOK_GROUPBY (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) col5)))) STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + a + TableScan + alias: a + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {value} + 1 {col5} {col11} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[col11]] + outputColumnNames: _col1, _col9, _col15 + Position of Big Table: 1 + Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: @@ -108,37 +132,11 @@ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - a - TableScan - alias: a - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {value} - 1 {col5} {col11} - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[col11]] - outputColumnNames: _col1, _col9, _col15 - Position of Big Table: 1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/jsichi/hive_2010-08-26_16-00-50_333_7198447322152213728/-mr-10002 + file:/tmp/liyintang/hive_2010-10-15_14-35-03_891_2596651658573027267/-mr-10002 Select Operator expressions: expr: _col1 @@ -226,7 +224,7 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Input: default@tmp -PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-00-50_441_1604865558910370709/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_14-35-04_026_4932987435433548006/-mr-10000 POSTHOOK: query: FROM src a JOIN tmp b ON (a.key = b.col11) SELECT /*+ MAPJOIN(a) */ a.value, b.col5, count(1) as count where b.col11 = 111 @@ -234,7 +232,7 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@tmp -POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-00-50_441_1604865558910370709/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_14-35-04_026_4932987435433548006/-mr-10000 POSTHOOK: Lineage: tmp.col0 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tmp.col1 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: tmp.col10 EXPRESSION [(src)src.FieldSchema(name:key, type:string, comment:default), ] Index: ql/src/test/results/clientpositive/join39.q.out =================================================================== --- ql/src/test/results/clientpositive/join39.q.out (revision 1023076) +++ ql/src/test/results/clientpositive/join39.q.out (working copy) @@ -3,6 +3,154 @@ POSTHOOK: query: CREATE TABLE dest_j1(key STRING, value STRING, key1 string, val2 STRING) STORED AS TEXTFILE POSTHOOK: type: CREATETABLE POSTHOOK: Output: default@dest_j1 +PREHOOK: query: explain +INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(y) */ x.key, x.value, y.key, y.value +FROM src x left outer JOIN (select * from src where key <= 100) y ON (x.key = y.key) +PREHOOK: type: QUERY +POSTHOOK: query: explain +INSERT OVERWRITE TABLE dest_j1 +SELECT /*+ MAPJOIN(y) */ x.key, x.value, y.key, y.value +FROM src x left outer JOIN (select * from src where key <= 100) y ON (x.key = y.key) +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF src x) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF src)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (<= (TOK_TABLE_OR_COL key) 100)))) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB dest_j1)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST y))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value))))) + +STAGE DEPENDENCIES: + Stage-7 is a root stage + Stage-1 depends on stages: Stage-7 + Stage-5 depends on stages: Stage-1 , consists of Stage-4, Stage-3 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3 + Stage-2 depends on stages: Stage-0 + Stage-3 + +STAGE PLANS: + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + y:src + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + y:src + TableScan + alias: src + Filter Operator + predicate: + expr: (key <= 100) + type: boolean + Filter Operator + predicate: + expr: (key <= 100) + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Common Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 {_col0} {_col1} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[_col0]] + outputColumnNames: _col0, _col1, _col4, _col5 + Position of Big Table: 0 + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + x + TableScan + alias: x + Common Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 {_col0} {_col1} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[_col0]] + outputColumnNames: _col0, _col1, _col4, _col5 + Position of Big Table: 0 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col4 + type: string + expr: _col5 + type: string + outputColumnNames: _col0, _col1, _col4, _col5 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col4 + type: string + expr: _col5 + type: string + outputColumnNames: _col0, _col1, _col2, _col3 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest_j1 + Local Work: + Map Reduce Local Work + + Stage: Stage-5 + Conditional Operator + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true + destination: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-35-12_770_665160760349140672/-ext-10000 + + Stage: Stage-0 + Move Operator + tables: + replace: true + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest_j1 + + Stage: Stage-2 + Stats-Aggr Operator + + Stage: Stage-3 + Map Reduce + Alias -> Map Operator Tree: + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-35-12_770_665160760349140672/-ext-10002 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + name: dest_j1 + + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(y) */ x.key, x.value, y.key, y.value FROM src x left outer JOIN (select * from src where key <= 100) y ON (x.key = y.key) @@ -22,11 +170,11 @@ PREHOOK: query: select * from dest_j1 x order by x.key PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 -PREHOOK: Output: file:/tmp/jssarma/hive_2010-07-21_11-34-38_483_2873236764151910976/10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_14-35-20_561_5505086574145624502/-mr-10000 POSTHOOK: query: select * from dest_j1 x order by x.key POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_j1 -POSTHOOK: Output: file:/tmp/jssarma/hive_2010-07-21_11-34-38_483_2873236764151910976/10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_14-35-20_561_5505086574145624502/-mr-10000 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src)x.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.key1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] Index: ql/src/test/results/clientpositive/join40.q.out =================================================================== --- ql/src/test/results/clientpositive/join40.q.out (revision 1023076) +++ ql/src/test/results/clientpositive/join40.q.out (working copy) @@ -100,12 +100,12 @@ FROM src x left outer JOIN (select * from src where key <= 100) y ON (x.key = y.key) PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-07-29_13-00-06_210_967311768247652763/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-02-31_124_6927903071980513683/-mr-10000 POSTHOOK: query: SELECT x.key, x.value, y.key, y.value FROM src x left outer JOIN (select * from src where key <= 100) y ON (x.key = y.key) POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-07-29_13-00-06_210_967311768247652763/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-02-31_124_6927903071980513683/-mr-10000 0 val_0 0 val_0 0 val_0 0 val_0 0 val_0 0 val_0 @@ -751,12 +751,12 @@ FROM src src1 JOIN src src2 ON (src1.key = src2.key) PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-07-29_13-00-17_142_417700842326185495/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-02-37_946_7958640688746987562/-mr-10000 POSTHOOK: query: select src1.key, src2.value FROM src src1 JOIN src src2 ON (src1.key = src2.key) POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-07-29_13-00-17_142_417700842326185495/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-02-37_946_7958640688746987562/-mr-10000 0 val_0 0 val_0 0 val_0 @@ -1904,7 +1904,7 @@ Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-07-29_13-00-31_857_1059875095406707508/-mr-10002 + file:/tmp/liyintang/hive_2010-10-16_20-02-47_615_5874602744426579650/-mr-10002 Reduce Output Operator key expressions: expr: _col0 @@ -1952,12 +1952,12 @@ SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-07-29_13-00-31_970_2552113638239570524/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-02-47_757_232126715240534957/-mr-10000 POSTHOOK: query: SELECT * FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10) RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 20) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-07-29_13-00-31_970_2552113638239570524/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-02-47_757_232126715240534957/-mr-10000 NULL NULL NULL NULL 10 val_10 NULL NULL NULL NULL 100 val_100 NULL NULL NULL NULL 100 val_100 @@ -2633,7 +2633,7 @@ Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-07-29_13-00-48_236_188923787391871879/-mr-10002 + file:/tmp/liyintang/hive_2010-10-16_20-03-00_961_3638553023871821513/-mr-10002 Reduce Output Operator key expressions: expr: _col0 @@ -2681,12 +2681,12 @@ SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-07-29_13-00-48_363_6899226935845466819/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-03-01_106_5886120209091224363/-mr-10000 POSTHOOK: query: SELECT * FROM src src1 JOIN src src2 ON (src1.key = src2.key AND src1.key < 10 AND src2.key < 15) RIGHT OUTER JOIN src src3 ON (src1.key = src3.key AND src3.key < 20) SORT BY src1.key, src1.value, src2.key, src2.value, src3.key, src3.value POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-07-29_13-00-48_363_6899226935845466819/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-03-01_106_5886120209091224363/-mr-10000 NULL NULL NULL NULL 10 val_10 NULL NULL NULL NULL 100 val_100 NULL NULL NULL NULL 100 val_100 @@ -3247,10 +3247,49 @@ (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF src x) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF src)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (<= (TOK_TABLE_OR_COL key) 100)))) y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST y))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value))))) STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: + Stage: Stage-3 + Map Reduce Local Work + Alias -> Map Local Tables: + y:src + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + y:src + TableScan + alias: src + Filter Operator + predicate: + expr: (key <= 100) + type: boolean + Filter Operator + predicate: + expr: (key <= 100) + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: value + type: string + outputColumnNames: _col0, _col1 + Common Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {key} {value} + 1 {_col0} {_col1} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[_col0]] + outputColumnNames: _col0, _col1, _col4, _col5 + Position of Big Table: 0 + Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: @@ -3299,85 +3338,21 @@ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - y:src - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - y:src - TableScan - alias: src - Filter Operator - predicate: - expr: (key <= 100) - type: boolean - Filter Operator - predicate: - expr: (key <= 100) - type: boolean - Select Operator - expressions: - expr: key - type: string - expr: value - type: string - outputColumnNames: _col0, _col1 - Common Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {key} {value} - 1 {_col0} {_col1} - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[_col0]] - outputColumnNames: _col0, _col1, _col4, _col5 - Position of Big Table: 0 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col4, _col5 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - expr: _col4 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col2, _col3 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Stage: Stage-0 Fetch Operator limit: -1 - PREHOOK: query: SELECT /*+ MAPJOIN(y) */ x.key, x.value, y.key, y.value FROM src x left outer JOIN (select * from src where key <= 100) y ON (x.key = y.key) PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-07-29_13-01-00_497_7810479910052206762/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-03-11_195_45527429920277744/-mr-10000 POSTHOOK: query: SELECT /*+ MAPJOIN(y) */ x.key, x.value, y.key, y.value FROM src x left outer JOIN (select * from src where key <= 100) y ON (x.key = y.key) POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-07-29_13-01-00_497_7810479910052206762/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-03-11_195_45527429920277744/-mr-10000 238 val_238 NULL NULL 86 val_86 86 val_86 311 val_311 NULL NULL @@ -4011,7 +3986,7 @@ Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-07-29_13-01-06_156_3096923421519776221/-mr-10002 + file:/tmp/liyintang/hive_2010-10-16_20-03-16_243_8188098789715192859/-mr-10002 Reduce Output Operator sort order: tag: -1 @@ -4045,9 +4020,9 @@ PREHOOK: query: SELECT COUNT(1) FROM SRC A JOIN SRC B ON (A.KEY=B.KEY) PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-07-29_13-01-06_237_3766500628681283400/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-03-16_359_4312483358519343610/-mr-10000 POSTHOOK: query: SELECT COUNT(1) FROM SRC A JOIN SRC B ON (A.KEY=B.KEY) POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/var/folders/6g/6grtCwPMEf4sqHUPpy6xQG9ByHg/-Tmp-/heyongqiang/hive_2010-07-29_13-01-06_237_3766500628681283400/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-03-16_359_4312483358519343610/-mr-10000 1028 Index: ql/src/test/results/clientpositive/join_map_ppr.q.out =================================================================== --- ql/src/test/results/clientpositive/join_map_ppr.q.out (revision 1023076) +++ ql/src/test/results/clientpositive/join_map_ppr.q.out (working copy) @@ -21,7 +21,8 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF src1 x) (TOK_TABREF src y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key))) (TOK_TABREF srcpart z) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL z) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB dest_j1)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x y))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL z) ds) '2008-04-08') (= (. (TOK_TABLE_OR_COL z) hr) 11))))) STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-8 is a root stage + Stage-1 depends on stages: Stage-8 Stage-5 depends on stages: Stage-1 , consists of Stage-4, Stage-3 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3 @@ -29,6 +30,55 @@ Stage-3 STAGE PLANS: + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + x + Fetch Operator + limit: -1 + y + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + x + TableScan + alias: x + GatherStats: false + Common Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {key} + 1 {value} + 2 {value} {ds} {hr} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + 2 [Column[key]] + outputColumnNames: _col0, _col5, _col9, _col10, _col11 + Position of Big Table: 2 + y + TableScan + alias: y + GatherStats: false + Common Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {key} + 1 {value} + 2 {value} {ds} {hr} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + 2 [Column[key]] + outputColumnNames: _col0, _col5, _col9, _col10, _col11 + Position of Big Table: 2 + Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: @@ -86,9 +136,9 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-44-30_437_7008487967784489062/-ext-10002 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-46-25_565_4458155958654291457/-ext-10002 NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-44-30_437_7008487967784489062/-ext-10000/ + Stats Publishing Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-46-25_565_4458155958654291457/-ext-10000/ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -98,12 +148,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dest_j1 name dest_j1 serialization.ddl struct dest_j1 { string key, string value, string val2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284507870 + transient_lastDdlTime 1287179185 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: dest_j1 TotalFiles: 1 @@ -111,163 +161,11 @@ MultiFileSpray: false Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - x - Fetch Operator - limit: -1 - y - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - x - TableScan - alias: x - GatherStats: false - Common Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {key} - 1 {value} - 2 {value} {ds} {hr} - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] - outputColumnNames: _col0, _col5, _col9, _col10, _col11 - Position of Big Table: 2 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string - expr: _col9 - type: string - expr: _col10 - type: string - expr: _col11 - type: string - outputColumnNames: _col0, _col5, _col9, _col10, _col11 - Filter Operator - isSamplingPred: false - predicate: - expr: ((_col10 = '2008-04-08') and (_col11 = 11)) - type: boolean - Select Operator - expressions: - expr: _col0 - type: string - expr: _col9 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-44-30_437_7008487967784489062/-ext-10002 - NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-44-30_437_7008487967784489062/-ext-10000/ - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:string - file.inputformat org.apache.hadoop.mapred.TextInputFormat - file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 - name dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284507870 - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: dest_j1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - y - TableScan - alias: y - GatherStats: false - Common Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {key} - 1 {value} - 2 {value} {ds} {hr} - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - 2 [Column[key]] - outputColumnNames: _col0, _col5, _col9, _col10, _col11 - Position of Big Table: 2 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string - expr: _col9 - type: string - expr: _col10 - type: string - expr: _col11 - type: string - outputColumnNames: _col0, _col5, _col9, _col10, _col11 - Filter Operator - isSamplingPred: false - predicate: - expr: ((_col10 = '2008-04-08') and (_col11 = 11)) - type: boolean - Select Operator - expressions: - expr: _col0 - type: string - expr: _col9 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-44-30_437_7008487967784489062/-ext-10002 - NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-44-30_437_7008487967784489062/-ext-10000/ - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:string - file.inputformat org.apache.hadoop.mapred.TextInputFormat - file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 - name dest_j1 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284507870 - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: dest_j1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 [z] + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 [z] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 Partition base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat @@ -281,13 +179,13 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcpart + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcpart name srcpart partition_columns ds/hr serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504421 + transient_lastDdlTime 1287175206 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -298,13 +196,13 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcpart + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcpart name srcpart partition_columns ds/hr serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504421 + transient_lastDdlTime 1287175206 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: srcpart name: srcpart @@ -316,14 +214,14 @@ Move Operator files: hdfs directory: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-44-30_437_7008487967784489062/-ext-10002 - destination: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-44-30_437_7008487967784489062/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-46-25_565_4458155958654291457/-ext-10002 + destination: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-46-25_565_4458155958654291457/-ext-10000 Stage: Stage-0 Move Operator tables: replace: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-44-30_437_7008487967784489062/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-46-25_565_4458155958654291457/-ext-10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -333,28 +231,28 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dest_j1 name dest_j1 serialization.ddl struct dest_j1 { string key, string value, string val2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284507870 + transient_lastDdlTime 1287179185 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: dest_j1 - tmp directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-44-30_437_7008487967784489062/-ext-10001 + tmp directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-46-25_565_4458155958654291457/-ext-10001 Stage: Stage-2 Stats-Aggr Operator - Stats Aggregation Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-44-30_437_7008487967784489062/-ext-10000/ + Stats Aggregation Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-46-25_565_4458155958654291457/-ext-10000/ Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-44-30_437_7008487967784489062/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-46-25_565_4458155958654291457/-ext-10002 File Output Operator compressed: false GlobalTableId: 0 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-44-30_437_7008487967784489062/-ext-10000 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-46-25_565_4458155958654291457/-ext-10000 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -365,12 +263,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dest_j1 name dest_j1 serialization.ddl struct dest_j1 { string key, string value, string val2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284507870 + transient_lastDdlTime 1287179185 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: dest_j1 TotalFiles: 1 @@ -378,9 +276,9 @@ MultiFileSpray: false Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-44-30_437_7008487967784489062/-ext-10002 [pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-44-30_437_7008487967784489062/-ext-10002] + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-46-25_565_4458155958654291457/-ext-10002 [pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-46-25_565_4458155958654291457/-ext-10002] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-44-30_437_7008487967784489062/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-46-25_565_4458155958654291457/-ext-10002 Partition base file name: -ext-10002 input format: org.apache.hadoop.mapred.TextInputFormat @@ -391,12 +289,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dest_j1 name dest_j1 serialization.ddl struct dest_j1 { string key, string value, string val2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284507870 + transient_lastDdlTime 1287179185 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -407,16 +305,17 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dest_j1 name dest_j1 serialization.ddl struct dest_j1 { string key, string value, string val2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284507870 + transient_lastDdlTime 1287179185 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: dest_j1 name: dest_j1 + PREHOOK: query: INSERT OVERWRITE TABLE dest_j1 SELECT /*+ MAPJOIN(x,y) */ x.key, z.value, y.value FROM src1 x JOIN src y ON (x.key = y.key) @@ -443,11 +342,11 @@ PREHOOK: query: select * from dest_j1 x order by x.key PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-44-38_226_829082970114385403/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_14-46-33_967_3390466593997914415/-mr-10000 POSTHOOK: query: select * from dest_j1 x order by x.key POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_j1 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-44-38_226_829082970114385403/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_14-46-33_967_3390466593997914415/-mr-10000 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(srcpart)z.FieldSchema(name:value, type:string, comment:default), ] @@ -627,7 +526,8 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF src1_copy x) (TOK_TABREF src_copy y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key))) (TOK_TABREF srcpart z) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL z) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB dest_j1)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x y))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value))) (TOK_WHERE (and (= (. (TOK_TABLE_OR_COL z) ds) '2008-04-08') (= (. (TOK_TABLE_OR_COL z) hr) 11))))) STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-8 is a root stage + Stage-1 depends on stages: Stage-8 Stage-5 depends on stages: Stage-1 , consists of Stage-4, Stage-3 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3 @@ -635,6 +535,55 @@ Stage-3 STAGE PLANS: + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + x + Fetch Operator + limit: -1 + y + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + x + TableScan + alias: x + GatherStats: false + Common Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {key} + 1 {value} + 2 {value} {ds} {hr} + handleSkewJoin: false + keys: + 0 [class org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge(Column[key]()] + 1 [class org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge(Column[key]()] + 2 [class org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge(Column[key]()] + outputColumnNames: _col0, _col5, _col9, _col10, _col11 + Position of Big Table: 2 + y + TableScan + alias: y + GatherStats: false + Common Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {key} + 1 {value} + 2 {value} {ds} {hr} + handleSkewJoin: false + keys: + 0 [class org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge(Column[key]()] + 1 [class org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge(Column[key]()] + 2 [class org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge(Column[key]()] + outputColumnNames: _col0, _col5, _col9, _col10, _col11 + Position of Big Table: 2 + Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: @@ -692,9 +641,9 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-44-54_999_5430658658503744271/-ext-10002 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-46-49_066_8646032217905227141/-ext-10002 NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-44-54_999_5430658658503744271/-ext-10000/ + Stats Publishing Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-46-49_066_8646032217905227141/-ext-10000/ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -704,7 +653,7 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dest_j1 name dest_j1 numFiles 1 numPartitions 0 @@ -713,7 +662,7 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 2125 - transient_lastDdlTime 1284507878 + transient_lastDdlTime 1287179193 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: dest_j1 TotalFiles: 1 @@ -721,171 +670,11 @@ MultiFileSpray: false Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - x - Fetch Operator - limit: -1 - y - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - x - TableScan - alias: x - GatherStats: false - Common Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {key} - 1 {value} - 2 {value} {ds} {hr} - handleSkewJoin: false - keys: - 0 [class org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge(Column[key]()] - 1 [class org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge(Column[key]()] - 2 [class org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge(Column[key]()] - outputColumnNames: _col0, _col5, _col9, _col10, _col11 - Position of Big Table: 2 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string - expr: _col9 - type: string - expr: _col10 - type: string - expr: _col11 - type: string - outputColumnNames: _col0, _col5, _col9, _col10, _col11 - Filter Operator - isSamplingPred: false - predicate: - expr: ((_col10 = '2008-04-08') and (_col11 = 11)) - type: boolean - Select Operator - expressions: - expr: _col0 - type: string - expr: _col9 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-44-54_999_5430658658503744271/-ext-10002 - NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-44-54_999_5430658658503744271/-ext-10000/ - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:string - file.inputformat org.apache.hadoop.mapred.TextInputFormat - file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 - name dest_j1 - numFiles 1 - numPartitions 0 - numRows 107 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2125 - transient_lastDdlTime 1284507878 - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: dest_j1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - y - TableScan - alias: y - GatherStats: false - Common Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {key} - 1 {value} - 2 {value} {ds} {hr} - handleSkewJoin: false - keys: - 0 [class org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge(Column[key]()] - 1 [class org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge(Column[key]()] - 2 [class org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge(Column[key]()] - outputColumnNames: _col0, _col5, _col9, _col10, _col11 - Position of Big Table: 2 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string - expr: _col9 - type: string - expr: _col10 - type: string - expr: _col11 - type: string - outputColumnNames: _col0, _col5, _col9, _col10, _col11 - Filter Operator - isSamplingPred: false - predicate: - expr: ((_col10 = '2008-04-08') and (_col11 = 11)) - type: boolean - Select Operator - expressions: - expr: _col0 - type: string - expr: _col9 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-44-54_999_5430658658503744271/-ext-10002 - NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-44-54_999_5430658658503744271/-ext-10000/ - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value,val2 - columns.types string:string:string - file.inputformat org.apache.hadoop.mapred.TextInputFormat - file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 - name dest_j1 - numFiles 1 - numPartitions 0 - numRows 107 - serialization.ddl struct dest_j1 { string key, string value, string val2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 2125 - transient_lastDdlTime 1284507878 - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: dest_j1 - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 [z] + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 [z] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcpart/ds=2008-04-08/hr=11 Partition base file name: hr=11 input format: org.apache.hadoop.mapred.TextInputFormat @@ -899,13 +688,13 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcpart + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcpart name srcpart partition_columns ds/hr serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504421 + transient_lastDdlTime 1287175206 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -916,13 +705,13 @@ columns.types string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcpart + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcpart name srcpart partition_columns ds/hr serialization.ddl struct srcpart { string key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284504421 + transient_lastDdlTime 1287175206 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: srcpart name: srcpart @@ -934,14 +723,14 @@ Move Operator files: hdfs directory: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-44-54_999_5430658658503744271/-ext-10002 - destination: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-44-54_999_5430658658503744271/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-46-49_066_8646032217905227141/-ext-10002 + destination: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-46-49_066_8646032217905227141/-ext-10000 Stage: Stage-0 Move Operator tables: replace: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-44-54_999_5430658658503744271/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-46-49_066_8646032217905227141/-ext-10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -951,7 +740,7 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dest_j1 name dest_j1 numFiles 1 numPartitions 0 @@ -960,23 +749,23 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 2125 - transient_lastDdlTime 1284507878 + transient_lastDdlTime 1287179193 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: dest_j1 - tmp directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-44-54_999_5430658658503744271/-ext-10001 + tmp directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-46-49_066_8646032217905227141/-ext-10001 Stage: Stage-2 Stats-Aggr Operator - Stats Aggregation Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-44-54_999_5430658658503744271/-ext-10000/ + Stats Aggregation Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-46-49_066_8646032217905227141/-ext-10000/ Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-44-54_999_5430658658503744271/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-46-49_066_8646032217905227141/-ext-10002 File Output Operator compressed: false GlobalTableId: 0 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-44-54_999_5430658658503744271/-ext-10000 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-46-49_066_8646032217905227141/-ext-10000 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -987,7 +776,7 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dest_j1 name dest_j1 numFiles 1 numPartitions 0 @@ -996,7 +785,7 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 2125 - transient_lastDdlTime 1284507878 + transient_lastDdlTime 1287179193 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: dest_j1 TotalFiles: 1 @@ -1004,9 +793,9 @@ MultiFileSpray: false Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-44-54_999_5430658658503744271/-ext-10002 [pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-44-54_999_5430658658503744271/-ext-10002] + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-46-49_066_8646032217905227141/-ext-10002 [pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-46-49_066_8646032217905227141/-ext-10002] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-14_16-44-54_999_5430658658503744271/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_14-46-49_066_8646032217905227141/-ext-10002 Partition base file name: -ext-10002 input format: org.apache.hadoop.mapred.TextInputFormat @@ -1017,7 +806,7 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dest_j1 name dest_j1 numFiles 1 numPartitions 0 @@ -1026,7 +815,7 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 2125 - transient_lastDdlTime 1284507878 + transient_lastDdlTime 1287179193 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -1037,7 +826,7 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dest_j1 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dest_j1 name dest_j1 numFiles 1 numPartitions 0 @@ -1046,7 +835,7 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 2125 - transient_lastDdlTime 1284507878 + transient_lastDdlTime 1287179193 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: dest_j1 name: dest_j1 @@ -1085,11 +874,11 @@ PREHOOK: query: select * from dest_j1 x order by x.key PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-45-02_649_8915711725845197169/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_14-46-57_402_5384425936145554197/-mr-10000 POSTHOOK: query: select * from dest_j1 x order by x.key POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_j1 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_16-45-02_649_8915711725845197169/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_14-46-57_402_5384425936145554197/-mr-10000 POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1)x.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.key SIMPLE [(src1_copy)x.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: dest_j1.val2 SIMPLE [(src)y.FieldSchema(name:value, type:string, comment:default), ] Index: ql/src/test/results/clientpositive/mapjoin1.q.out =================================================================== --- ql/src/test/results/clientpositive/mapjoin1.q.out (revision 1023076) +++ ql/src/test/results/clientpositive/mapjoin1.q.out (working copy) @@ -7,7 +7,7 @@ PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-24_21-14-16_226_3903968223903966652/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-17_18-28-45_483_4882005137956924467/-mr-10000 POSTHOOK: query: SELECT /*+ MAPJOIN(b) */ sum(a.key) as sum_a FROM srcpart a JOIN src b ON a.key = b.key where a.ds is not null @@ -17,5 +17,5 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-24_21-14-16_226_3903968223903966652/-mr-10000 -76260.0 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-17_18-28-45_483_4882005137956924467/-mr-10000 +1114788.0 Index: ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out =================================================================== --- ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out (revision 1023076) +++ ql/src/test/results/clientpositive/mapjoin_mapjoin.q.out (working copy) @@ -6,11 +6,36 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF srcpart) (TOK_TABREF src) (= (. (TOK_TABLE_OR_COL srcpart) value) (. (TOK_TABLE_OR_COL src) value))) (TOK_TABREF src1) (= (. (TOK_TABLE_OR_COL srcpart) key) (. (TOK_TABLE_OR_COL src1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST src src1))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL srcpart) key))))) STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-6 is a root stage + Stage-1 depends on stages: Stage-6 + Stage-5 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-5 Stage-0 is a root stage STAGE PLANS: + Stage: Stage-6 + Map Reduce Local Work + Alias -> Map Local Tables: + src + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + src + TableScan + alias: src + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[value]] + 1 [Column[value]] + outputColumnNames: _col0 + Position of Big Table: 0 + Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: @@ -37,37 +62,34 @@ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - src - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - src - TableScan - alias: src - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 - handleSkewJoin: false - keys: - 0 [Column[value]] - 1 [Column[value]] - outputColumnNames: _col0 - Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: + src1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + src1 + TableScan + alias: src1 + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 0 Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/amarsri/hive_2010-10-03_23-32-11_193_4494089607373072455/-mr-10002 + file:/tmp/liyintang/hive_2010-10-17_18-28-58_205_3059857123809567795/-mr-10002 Select Operator expressions: expr: _col0 @@ -103,42 +125,6 @@ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - src1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - src1 - TableScan - alias: src1 - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} - 1 - handleSkewJoin: false - keys: - 0 [Column[_col0]] - 1 [Column[key]] - outputColumnNames: _col0 - Position of Big Table: 0 - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Stage: Stage-0 Fetch Operator @@ -153,12 +139,37 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_JOIN (TOK_TABREF srcpart) (TOK_TABREF src) (= (. (TOK_TABLE_OR_COL srcpart) value) (. (TOK_TABLE_OR_COL src) value))) (TOK_TABREF src1) (= (. (TOK_TABLE_OR_COL srcpart) key) (. (TOK_TABLE_OR_COL src1) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST src src1))) (TOK_SELEXPR (TOK_FUNCTIONSTAR count))) (TOK_GROUPBY (TOK_TABLE_OR_COL ds)))) STAGE DEPENDENCIES: - Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 + Stage-7 is a root stage + Stage-1 depends on stages: Stage-7 + Stage-6 depends on stages: Stage-1 + Stage-2 depends on stages: Stage-6 Stage-3 depends on stages: Stage-2 Stage-0 is a root stage STAGE PLANS: + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + src + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + src + TableScan + alias: src + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {ds} + 1 + handleSkewJoin: false + keys: + 0 [Column[value]] + 1 [Column[value]] + outputColumnNames: _col0, _col2 + Position of Big Table: 0 + Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: @@ -185,37 +196,34 @@ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - src - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - src - TableScan - alias: src - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {ds} - 1 - handleSkewJoin: false - keys: - 0 [Column[value]] - 1 [Column[value]] - outputColumnNames: _col0, _col2 - Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-6 + Map Reduce Local Work + Alias -> Map Local Tables: + src1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + src1 + TableScan + alias: src1 + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col2} + 1 + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[key]] + outputColumnNames: _col2 + Position of Big Table: 0 Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/amarsri/hive_2010-10-03_23-32-11_566_3712502381018295609/-mr-10002 + file:/tmp/liyintang/hive_2010-10-17_18-28-58_559_4154233080683989694/-mr-10002 Select Operator expressions: expr: _col0 @@ -243,37 +251,11 @@ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - src1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - src1 - TableScan - alias: src1 - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col2} - 1 - handleSkewJoin: false - keys: - 0 [Column[_col0]] - 1 [Column[key]] - outputColumnNames: _col2 - Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: - file:/tmp/amarsri/hive_2010-10-03_23-32-11_566_3712502381018295609/-mr-10003 + file:/tmp/liyintang/hive_2010-10-17_18-28-58_559_4154233080683989694/-mr-10003 Select Operator expressions: expr: _col2 @@ -339,7 +321,7 @@ PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -PREHOOK: Output: file:/tmp/amarsri/hive_2010-10-03_23-32-11_744_4150005716053861543/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-17_18-28-58_899_5613342207704467139/-mr-10000 POSTHOOK: query: select /*+MAPJOIN(src, src1) */ count(*) from srcpart join src src on (srcpart.value=src.value) join src src1 on (srcpart.key=src1.key) group by ds POSTHOOK: type: QUERY POSTHOOK: Input: default@src @@ -347,6 +329,6 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -POSTHOOK: Output: file:/tmp/amarsri/hive_2010-10-03_23-32-11_744_4150005716053861543/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-17_18-28-58_899_5613342207704467139/-mr-10000 5308 5308 Index: ql/src/test/results/clientpositive/mapjoin_subquery.q.out =================================================================== --- ql/src/test/results/clientpositive/mapjoin_subquery.q.out (revision 1023076) +++ ql/src/test/results/clientpositive/mapjoin_subquery.q.out (working copy) @@ -16,10 +16,64 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF src1 x) (TOK_TABREF src y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value) value2)))) subq) (TOK_TABREF srcpart z) (and (and (= (. (TOK_TABLE_OR_COL subq) key1) (. (TOK_TABLE_OR_COL z) key)) (= (. (TOK_TABLE_OR_COL z) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL z) hr) 11)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST z))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) key1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) value))))) STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-0 is a root stage STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + subq:x + Fetch Operator + limit: -1 + z + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + subq:x + TableScan + alias: x + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 1 + z + TableScan + alias: z + Filter Operator + predicate: + expr: ((ds = '2008-04-08') and (hr = 11)) + type: boolean + Filter Operator + predicate: + expr: (ds = '2008-04-08') + type: boolean + Filter Operator + predicate: + expr: (hr = 11) + type: boolean + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[key]] + outputColumnNames: _col0, _col5 + Position of Big Table: 0 + Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: @@ -82,118 +136,6 @@ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - subq:x - Fetch Operator - limit: -1 - z - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - subq:x - TableScan - alias: x - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - outputColumnNames: _col0 - Position of Big Table: 1 - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} - 1 {value} - handleSkewJoin: false - keys: - 0 [Column[_col0]] - 1 [Column[key]] - outputColumnNames: _col0, _col5 - Position of Big Table: 0 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col5 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - z - TableScan - alias: z - Filter Operator - predicate: - expr: ((ds = '2008-04-08') and (hr = 11)) - type: boolean - Filter Operator - predicate: - expr: (ds = '2008-04-08') - type: boolean - Filter Operator - predicate: - expr: (hr = 11) - type: boolean - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} - 1 {value} - handleSkewJoin: false - keys: - 0 [Column[_col0]] - 1 [Column[key]] - outputColumnNames: _col0, _col5 - Position of Big Table: 0 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col5 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Stage: Stage-0 Fetch Operator @@ -209,7 +151,7 @@ PREHOOK: Input: default@src PREHOOK: Input: default@src1 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-14-14_212_7052833350565905154/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_15-04-47_409_128786361280541785/-mr-10000 POSTHOOK: query: SELECT /*+ MAPJOIN(z) */ subq.key1, z.value FROM (SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 @@ -219,7 +161,7 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-14-14_212_7052833350565905154/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_15-04-47_409_128786361280541785/-mr-10000 238 val_238 238 val_238 311 val_311 @@ -347,11 +289,65 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF src1 x) (TOK_TABREF src y) (= (. (TOK_TABLE_OR_COL x) key) (. (TOK_TABLE_OR_COL y) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST x))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) key) key1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL x) value) value1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) key) key2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL y) value) value2)))) subq) (TOK_TABREF srcpart z) (and (and (= (. (TOK_TABLE_OR_COL subq) key1) (. (TOK_TABLE_OR_COL z) key)) (= (. (TOK_TABLE_OR_COL z) ds) '2008-04-08')) (= (. (TOK_TABLE_OR_COL z) hr) 11)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST z))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL subq) key1)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL z) value))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL subq) key1))))) STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-5 is a root stage + Stage-1 depends on stages: Stage-5 Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: + subq:x + Fetch Operator + limit: -1 + z + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + subq:x + TableScan + alias: x + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0 + Position of Big Table: 1 + z + TableScan + alias: z + Filter Operator + predicate: + expr: ((ds = '2008-04-08') and (hr = 11)) + type: boolean + Filter Operator + predicate: + expr: (ds = '2008-04-08') + type: boolean + Filter Operator + predicate: + expr: (hr = 11) + type: boolean + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {_col0} + 1 {value} + handleSkewJoin: false + keys: + 0 [Column[_col0]] + 1 [Column[key]] + outputColumnNames: _col0, _col5 + Position of Big Table: 0 + Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: @@ -400,95 +396,11 @@ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - subq:x - Fetch Operator - limit: -1 - z - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - subq:x - TableScan - alias: x - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} - 1 - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - outputColumnNames: _col0 - Position of Big Table: 1 - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Select Operator - expressions: - expr: _col0 - type: string - outputColumnNames: _col0 - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} - 1 {value} - handleSkewJoin: false - keys: - 0 [Column[_col0]] - 1 [Column[key]] - outputColumnNames: _col0, _col5 - Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - z - TableScan - alias: z - Filter Operator - predicate: - expr: ((ds = '2008-04-08') and (hr = 11)) - type: boolean - Filter Operator - predicate: - expr: (ds = '2008-04-08') - type: boolean - Filter Operator - predicate: - expr: (hr = 11) - type: boolean - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {_col0} - 1 {value} - handleSkewJoin: false - keys: - 0 [Column[_col0]] - 1 [Column[key]] - outputColumnNames: _col0, _col5 - Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/jsichi/hive_2010-08-26_16-14-18_425_4598079873799409050/-mr-10002 + file:/tmp/liyintang/hive_2010-10-15_15-04-53_319_3769577272445368796/-mr-10002 Select Operator expressions: expr: _col0 @@ -538,7 +450,7 @@ PREHOOK: Input: default@src PREHOOK: Input: default@src1 PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-14-18_581_5217007605308966204/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_15-04-53_504_7399621961470089400/-mr-10000 POSTHOOK: query: SELECT /*+ MAPJOIN(z) */ subq.key1, z.value FROM (SELECT /*+ MAPJOIN(x) */ x.key as key1, x.value as value1, y.key as key2, y.value as value2 @@ -549,7 +461,7 @@ POSTHOOK: Input: default@src POSTHOOK: Input: default@src1 POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 -POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-14-18_581_5217007605308966204/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_15-04-53_504_7399621961470089400/-mr-10000 128 val_128 128 val_128 128 val_128 Index: ql/src/test/results/clientpositive/select_transform_hint.q.out =================================================================== --- ql/src/test/results/clientpositive/select_transform_hint.q.out (revision 1023076) +++ ql/src/test/results/clientpositive/select_transform_hint.q.out (working copy) @@ -14,10 +14,34 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF src a) (TOK_TABREF src b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (TOK_TRANSFORM (TOK_EXPLIST (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL a) value)) TOK_SERDE TOK_RECORDWRITER '/bin/cat' TOK_SERDE TOK_RECORDREADER (TOK_ALIASLIST tkey tvalue)))))) STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-3 is a root stage + Stage-1 depends on stages: Stage-3 Stage-0 is a root stage STAGE PLANS: + Stage: Stage-3 + Map Reduce Local Work + Alias -> Map Local Tables: + a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + a + TableScan + alias: a + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1 + Position of Big Table: 1 + Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: @@ -63,51 +87,6 @@ output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - a - TableScan - alias: a - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - outputColumnNames: _col0, _col1 - Position of Big Table: 1 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - outputColumnNames: _col0, _col1 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col1 - type: string - outputColumnNames: _col0, _col1 - Transform Operator - command: /bin/cat - output info: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat Stage: Stage-0 Fetch Operator @@ -120,14 +99,14 @@ on a.key = b.key PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-16_18-09-59_313_6684536786654133017/10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_15-28-13_781_8569397391886762548/-mr-10000 POSTHOOK: query: SELECT /*+MAPJOIN(a)*/ TRANSFORM(a.key, a.value) USING '/bin/cat' AS (tkey, tvalue) FROM src a join src b on a.key = b.key POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-16_18-09-59_313_6684536786654133017/10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_15-28-13_781_8569397391886762548/-mr-10000 238 val_238 238 val_238 86 val_86 @@ -1247,14 +1226,14 @@ on a.key = b.key PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-16_18-10-07_522_7541183111575060436/10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_15-28-19_149_1815239476955309395/-mr-10000 POSTHOOK: query: SELECT /*+STREAMTABLE(a)*/ TRANSFORM(a.key, a.value) USING '/bin/cat' AS (tkey, tvalue) FROM src a join src b on a.key = b.key POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/Users/heyongqiang/Documents/workspace/Hive_RCFile/build/ql/scratchdir/hive_2010-03-16_18-10-07_522_7541183111575060436/10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_15-28-19_149_1815239476955309395/-mr-10000 0 val_0 0 val_0 0 val_0 Index: ql/src/test/results/clientpositive/semijoin.q.out =================================================================== --- ql/src/test/results/clientpositive/semijoin.q.out (revision 1023076) +++ ql/src/test/results/clientpositive/semijoin.q.out (working copy) @@ -8,11 +8,11 @@ PREHOOK: query: select * from t1 sort by key PREHOOK: type: QUERY PREHOOK: Input: default@t1 -PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-30-50_167_4710362643764304912/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-54-48_453_7580246906147447007/-mr-10000 POSTHOOK: query: select * from t1 sort by key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 -POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-30-50_167_4710362643764304912/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-54-48_453_7580246906147447007/-mr-10000 0 val_0 0 val_0 0 val_0 @@ -34,11 +34,11 @@ PREHOOK: query: select * from t2 sort by key PREHOOK: type: QUERY PREHOOK: Input: default@t2 -PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-30-56_490_1305333221881884322/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-54-55_870_6093540767773075041/-mr-10000 POSTHOOK: query: select * from t2 sort by key POSTHOOK: type: QUERY POSTHOOK: Input: default@t2 -POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-30-56_490_1305333221881884322/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-54-55_870_6093540767773075041/-mr-10000 0 val_0 0 val_0 0 val_0 @@ -62,11 +62,11 @@ PREHOOK: query: select * from t3 sort by key, value PREHOOK: type: QUERY PREHOOK: Input: default@t3 -PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-31-05_939_5708575653370225816/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-55-06_872_5514286542584251869/-mr-10000 POSTHOOK: query: select * from t3 sort by key, value POSTHOOK: type: QUERY POSTHOOK: Input: default@t3 -POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-31-05_939_5708575653370225816/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-55-06_872_5514286542584251869/-mr-10000 0 val_0 0 val_0 0 val_0 @@ -97,11 +97,11 @@ PREHOOK: query: select * from t4 PREHOOK: type: QUERY PREHOOK: Input: default@t4 -PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-31-09_089_5243149004986287422/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-55-10_517_8043595902141289587/-mr-10000 POSTHOOK: query: select * from t4 POSTHOOK: type: QUERY POSTHOOK: Input: default@t4 -POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-31-09_089_5243149004986287422/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-55-10_517_8043595902141289587/-mr-10000 PREHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value PREHOOK: type: QUERY POSTHOOK: query: explain select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value @@ -185,7 +185,7 @@ Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/jsichi/hive_2010-08-26_16-31-09_221_5886345330918235176/-mr-10002 + file:/tmp/liyintang/hive_2010-10-16_20-55-10_683_6224851131670102704/-mr-10002 Reduce Output Operator key expressions: expr: _col0 @@ -217,12 +217,12 @@ PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 -PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-31-09_324_7400916651438089839/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-55-10_800_2513299296698295975/-mr-10000 POSTHOOK: query: select * from t1 a left semi join t2 b on a.key=b.key sort by a.key, a.value POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 -POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-31-09_324_7400916651438089839/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-55-10_800_2513299296698295975/-mr-10000 0 val_0 0 val_0 0 val_0 @@ -312,7 +312,7 @@ Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/jsichi/hive_2010-08-26_16-31-15_398_6184531515584998832/-mr-10002 + file:/tmp/liyintang/hive_2010-10-16_20-55-17_836_5547579604305564817/-mr-10002 Reduce Output Operator key expressions: expr: _col0 @@ -344,12 +344,12 @@ PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 -PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-31-15_505_8084865171579203366/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-55-17_952_8107824929568696805/-mr-10000 POSTHOOK: query: select * from t2 a left semi join t1 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 -POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-31-15_505_8084865171579203366/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-55-17_952_8107824929568696805/-mr-10000 0 val_0 0 val_0 0 val_0 @@ -441,7 +441,7 @@ Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/jsichi/hive_2010-08-26_16-31-21_590_8928586964867939357/-mr-10002 + file:/tmp/liyintang/hive_2010-10-16_20-55-25_023_4437489275956384567/-mr-10002 Reduce Output Operator key expressions: expr: _col0 @@ -473,12 +473,12 @@ PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t4 -PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-31-21_694_7949704596850731671/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-55-25_142_359943279087785463/-mr-10000 POSTHOOK: query: select * from t1 a left semi join t4 b on b.key=a.key sort by a.key, a.value POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t4 -POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-31-21_694_7949704596850731671/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-55-25_142_359943279087785463/-mr-10000 PREHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value PREHOOK: type: QUERY POSTHOOK: query: explain select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value @@ -568,7 +568,7 @@ Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/jsichi/hive_2010-08-26_16-31-27_718_982050412756211456/-mr-10002 + file:/tmp/liyintang/hive_2010-10-16_20-55-32_006_1648677446700970744/-mr-10002 Reduce Output Operator key expressions: expr: _col0 @@ -596,12 +596,12 @@ PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t3 -PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-31-27_822_1350911842616975350/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-55-32_125_2593078127467219576/-mr-10000 POSTHOOK: query: select a.value from t1 a left semi join t3 b on (b.key = a.key and b.key < '15') sort by a.value POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t3 -POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-31-27_822_1350911842616975350/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-55-32_125_2593078127467219576/-mr-10000 val_0 val_0 val_0 @@ -708,7 +708,7 @@ Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/jsichi/hive_2010-08-26_16-31-33_923_9214921681958097012/-mr-10002 + file:/tmp/liyintang/hive_2010-10-16_20-55-40_123_4389202735836518462/-mr-10002 Reduce Output Operator key expressions: expr: _col0 @@ -740,12 +740,12 @@ PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 -PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-31-34_031_5185180184218172844/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-55-40_243_7850910287841643376/-mr-10000 POSTHOOK: query: select * from t1 a left semi join t2 b on a.key = b.key and b.value < "val_10" sort by a.key, a.value POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 -POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-31-34_031_5185180184218172844/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-55-40_243_7850910287841643376/-mr-10000 0 val_0 0 val_0 0 val_0 @@ -841,7 +841,7 @@ Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/jsichi/hive_2010-08-26_16-31-40_153_883351923688146736/-mr-10002 + file:/tmp/liyintang/hive_2010-10-16_20-55-48_323_8119296433999603117/-mr-10002 Reduce Output Operator key expressions: expr: _col0 @@ -869,12 +869,12 @@ PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t3 -PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-31-40_268_6201543529188681953/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-55-48_445_7415332140514709784/-mr-10000 POSTHOOK: query: select a.value from t1 a left semi join (select key from t3 where key > 5) b on a.key = b.key sort by a.value POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t3 -POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-31-40_268_6201543529188681953/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-55-48_445_7415332140514709784/-mr-10000 val_10 val_8 val_9 @@ -980,7 +980,7 @@ Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/jsichi/hive_2010-08-26_16-31-46_312_3993050304389236152/-mr-10002 + file:/tmp/liyintang/hive_2010-10-16_20-55-56_537_2903311238836289590/-mr-10002 Reduce Output Operator key expressions: expr: _col0 @@ -1008,12 +1008,12 @@ PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 -PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-31-46_425_3381490323707009880/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-55-56_664_5020769161087651831/-mr-10000 POSTHOOK: query: select a.value from t1 a left semi join (select key , value from t2 where key > 5) b on a.key = b.key and b.value <= 'val_20' sort by a.value POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 -POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-31-46_425_3381490323707009880/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-55-56_664_5020769161087651831/-mr-10000 PREHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value PREHOOK: type: QUERY POSTHOOK: query: explain select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value @@ -1110,7 +1110,7 @@ Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/jsichi/hive_2010-08-26_16-31-52_549_6755746237152848554/-mr-10002 + file:/tmp/liyintang/hive_2010-10-16_20-56-04_059_355987900517518175/-mr-10002 Reduce Output Operator key expressions: expr: _col0 @@ -1142,12 +1142,12 @@ PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 -PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-31-52_659_5464756953472677859/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-56-04_183_8670610688043673425/-mr-10000 POSTHOOK: query: select * from t2 a left semi join (select key , value from t1 where key > 2) b on a.key = b.key sort by a.key, a.value POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 -POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-31-52_659_5464756953472677859/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-56-04_183_8670610688043673425/-mr-10000 4 val_2 8 val_4 10 val_5 @@ -1161,11 +1161,47 @@ (TOK_QUERY (TOK_FROM (TOK_LEFTSEMIJOIN (TOK_TABREF t3 a) (TOK_TABREF t1 b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))))) STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + b + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + b + TableScan + alias: b + Select Operator + expressions: + expr: key + type: int + outputColumnNames: key + Group By Operator + bucketGroup: false + keys: + expr: key + type: int + mode: hash + outputColumnNames: _col0 + Common Join Operator + condition map: + Left Semi Join 0 to 1 + condition expressions: + 0 {key} + 1 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[_col0]] + outputColumnNames: _col0 + Position of Big Table: 0 + Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: @@ -1192,49 +1228,11 @@ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Select Operator - expressions: - expr: key - type: int - outputColumnNames: key - Group By Operator - bucketGroup: false - keys: - expr: key - type: int - mode: hash - outputColumnNames: _col0 - Common Join Operator - condition map: - Left Semi Join 0 to 1 - condition expressions: - 0 {key} - 1 - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[_col0]] - outputColumnNames: _col0 - Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/jsichi/hive_2010-08-26_16-31-58_752_2377854145526588776/-mr-10002 + file:/tmp/liyintang/hive_2010-10-16_20-56-12_336_1953660551209643271/-mr-10002 Select Operator expressions: expr: _col0 @@ -1272,12 +1270,12 @@ PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t3 -PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-31-58_854_7200265512841921451/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-56-12_453_4269654069294497779/-mr-10000 POSTHOOK: query: select /*+ mapjoin(b) */ a.key from t3 a left semi join t1 b on a.key = b.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t3 -POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-31-58_854_7200265512841921451/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-56-12_453_4269654069294497779/-mr-10000 0 0 0 @@ -1380,7 +1378,7 @@ Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/jsichi/hive_2010-08-26_16-32-04_643_2338407986226403980/-mr-10002 + file:/tmp/liyintang/hive_2010-10-16_20-56-19_517_1787680149236807917/-mr-10002 Reduce Output Operator key expressions: expr: _col0 @@ -1412,12 +1410,12 @@ PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 -PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-32-04_744_623541634907516161/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-56-19_645_8738010680958797763/-mr-10000 POSTHOOK: query: select * from t1 a left semi join t2 b on a.key = 2*b.key sort by a.key, a.value POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 -POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-32-04_744_623541634907516161/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-56-19_645_8738010680958797763/-mr-10000 0 val_0 0 val_0 0 val_0 @@ -1528,7 +1526,7 @@ Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/jsichi/hive_2010-08-26_16-32-10_767_3479460229051595720/-mr-10002 + file:/tmp/liyintang/hive_2010-10-16_20-56-27_826_6825405805902610028/-mr-10002 Reduce Output Operator key expressions: expr: _col0 @@ -1565,13 +1563,13 @@ PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 -PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-32-10_884_4563457848546641930/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-56-27_976_3972434369204782156/-mr-10000 POSTHOOK: query: select * from t1 a join t2 b on a.key = b.key left semi join t3 c on b.key = c.key sort by a.key, a.value POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 -POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-32-10_884_4563457848546641930/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-56-27_976_3972434369204782156/-mr-10000 0 val_0 0 val_0 0 val_0 0 val_0 0 val_0 0 val_0 @@ -1681,7 +1679,7 @@ Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/jsichi/hive_2010-08-26_16-32-18_210_7397281763659818293/-mr-10002 + file:/tmp/liyintang/hive_2010-10-16_20-56-36_330_6779914873043487516/-mr-10002 Reduce Output Operator key expressions: expr: _col0 @@ -1713,12 +1711,12 @@ PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t3 -PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-32-18_313_6529433048035998250/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-56-36_446_531074999473537299/-mr-10000 POSTHOOK: query: select * from t3 a left semi join t1 b on a.key = b.key and a.value=b.value sort by a.key, a.value POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t3 -POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-32-18_313_6529433048035998250/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-56-36_446_531074999473537299/-mr-10000 0 val_0 0 val_0 0 val_0 @@ -1741,11 +1739,83 @@ (TOK_QUERY (TOK_FROM (TOK_LEFTSEMIJOIN (TOK_LEFTSEMIJOIN (TOK_TABREF t3 a) (TOK_TABREF t1 b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key))) (TOK_TABREF t2 c) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL c) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b c))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key))) (TOK_SORTBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))))) STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-5 is a root stage + Stage-1 depends on stages: Stage-5 Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: + b + Fetch Operator + limit: -1 + c + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + b + TableScan + alias: b + Select Operator + expressions: + expr: key + type: int + outputColumnNames: key + Group By Operator + bucketGroup: false + keys: + expr: key + type: int + mode: hash + outputColumnNames: _col0 + Common Join Operator + condition map: + Left Semi Join 0 to 1 + Left Semi Join 0 to 2 + condition expressions: + 0 {key} + 1 + 2 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[_col0]] + 2 [Column[_col0]] + outputColumnNames: _col0 + Position of Big Table: 0 + c + TableScan + alias: c + Select Operator + expressions: + expr: key + type: int + outputColumnNames: key + Group By Operator + bucketGroup: false + keys: + expr: key + type: int + mode: hash + outputColumnNames: _col0 + Common Join Operator + condition map: + Left Semi Join 0 to 1 + Left Semi Join 0 to 2 + condition expressions: + 0 {key} + 1 + 2 + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[_col0]] + 2 [Column[_col0]] + outputColumnNames: _col0 + Position of Big Table: 0 + Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: @@ -1775,91 +1845,11 @@ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - c - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - Select Operator - expressions: - expr: key - type: int - outputColumnNames: key - Group By Operator - bucketGroup: false - keys: - expr: key - type: int - mode: hash - outputColumnNames: _col0 - Common Join Operator - condition map: - Left Semi Join 0 to 1 - Left Semi Join 0 to 2 - condition expressions: - 0 {key} - 1 - 2 - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[_col0]] - 2 [Column[_col0]] - outputColumnNames: _col0 - Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - c - TableScan - alias: c - Select Operator - expressions: - expr: key - type: int - outputColumnNames: key - Group By Operator - bucketGroup: false - keys: - expr: key - type: int - mode: hash - outputColumnNames: _col0 - Common Join Operator - condition map: - Left Semi Join 0 to 1 - Left Semi Join 0 to 2 - condition expressions: - 0 {key} - 1 - 2 - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[_col0]] - 2 [Column[_col0]] - outputColumnNames: _col0 - Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/jsichi/hive_2010-08-26_16-32-24_354_7818817395350959657/-mr-10002 + file:/tmp/liyintang/hive_2010-10-16_20-56-43_461_1825225512272201729/-mr-10002 Select Operator expressions: expr: _col0 @@ -1898,13 +1888,13 @@ PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 -PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-32-24_474_1477633509986082418/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-56-43_597_7730476012893391191/-mr-10000 POSTHOOK: query: select /*+ mapjoin(b, c) */ a.key from t3 a left semi join t1 b on a.key = b.key left semi join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 -POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-32-24_474_1477633509986082418/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-56-43_597_7730476012893391191/-mr-10000 0 0 0 @@ -2012,7 +2002,7 @@ Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/jsichi/hive_2010-08-26_16-32-30_284_6051624992600368837/-mr-10002 + file:/tmp/liyintang/hive_2010-10-16_20-56-51_193_4971480791101556085/-mr-10002 Reduce Output Operator key expressions: expr: _col0 @@ -2041,13 +2031,13 @@ PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 -PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-32-30_400_7335870280927099010/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-56-51_329_5949380496953796907/-mr-10000 POSTHOOK: query: select a.key from t3 a left outer join t1 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 -POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-32-30_400_7335870280927099010/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-56-51_329_5949380496953796907/-mr-10000 0 0 0 @@ -2167,7 +2157,7 @@ Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/jsichi/hive_2010-08-26_16-32-37_758_3950677286047860567/-mr-10002 + file:/tmp/liyintang/hive_2010-10-16_20-56-59_679_8191924060733337482/-mr-10002 Reduce Output Operator key expressions: expr: _col0 @@ -2196,13 +2186,13 @@ PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 -PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-32-37_871_2454366781942838266/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-56-59_811_3831038390861954254/-mr-10000 POSTHOOK: query: select a.key from t1 a right outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 -POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-32-37_871_2454366781942838266/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-56-59_811_3831038390861954254/-mr-10000 NULL NULL NULL @@ -2325,7 +2315,7 @@ Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/jsichi/hive_2010-08-26_16-32-45_262_5992955238945717538/-mr-10002 + file:/tmp/liyintang/hive_2010-10-16_20-57-08_142_4862659123810822662/-mr-10002 Reduce Output Operator key expressions: expr: _col0 @@ -2354,13 +2344,13 @@ PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 -PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-32-45_380_6139341223545736201/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-57-08_274_1271914331764289510/-mr-10000 POSTHOOK: query: select a.key from t1 a full outer join t3 b on a.key = b.key left semi join t2 c on b.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 -POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-32-45_380_6139341223545736201/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-57-08_274_1271914331764289510/-mr-10000 NULL NULL NULL @@ -2483,7 +2473,7 @@ Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/jsichi/hive_2010-08-26_16-32-52_808_8193104458011818678/-mr-10002 + file:/tmp/liyintang/hive_2010-10-16_20-57-16_618_1799543107598292752/-mr-10002 Reduce Output Operator key expressions: expr: _col0 @@ -2512,13 +2502,13 @@ PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 -PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-32-52_924_3358318124911522745/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-57-16_749_4595962962752839059/-mr-10000 POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 -POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-32-52_924_3358318124911522745/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-57-16_749_4595962962752839059/-mr-10000 0 0 0 @@ -2641,7 +2631,7 @@ Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/jsichi/hive_2010-08-26_16-33-00_334_3947388254240326821/-mr-10002 + file:/tmp/liyintang/hive_2010-10-16_20-57-25_108_1492275708770566570/-mr-10002 Reduce Output Operator key expressions: expr: _col0 @@ -2670,13 +2660,13 @@ PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 -PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-33-00_450_7944705166831975761/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-57-25_243_1263591880561616930/-mr-10000 POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key right outer join t1 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 -POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-33-00_450_7944705166831975761/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-57-25_243_1263591880561616930/-mr-10000 NULL NULL NULL @@ -2801,7 +2791,7 @@ Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/jsichi/hive_2010-08-26_16-33-07_801_4655936433608376440/-mr-10002 + file:/tmp/liyintang/hive_2010-10-16_20-57-33_674_2234020911596435252/-mr-10002 Reduce Output Operator key expressions: expr: _col0 @@ -2830,13 +2820,13 @@ PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 -PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-33-07_917_7933086183001476835/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-57-33_808_3288750373349451656/-mr-10000 POSTHOOK: query: select a.key from t3 a left semi join t1 b on a.key = b.key full outer join t2 c on a.key = c.key sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 -POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-33-07_917_7933086183001476835/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-57-33_808_3288750373349451656/-mr-10000 NULL NULL NULL @@ -3005,7 +2995,7 @@ Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: - file:/tmp/jsichi/hive_2010-08-26_16-33-15_319_6054233081828350725/-mr-10003 + file:/tmp/liyintang/hive_2010-10-16_20-57-42_172_571318165223667427/-mr-10003 Reduce Output Operator key expressions: expr: _col0 @@ -3034,13 +3024,13 @@ PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 -PREHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-33-15_444_3464952629462504820/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-57-42_311_595775460689974599/-mr-10000 POSTHOOK: query: select a.key from t3 a left semi join t2 b on a.key = b.key left outer join t1 c on a.value = c.value sort by a.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 -POSTHOOK: Output: file:/tmp/jsichi/hive_2010-08-26_16-33-15_444_3464952629462504820/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-16_20-57-42_311_595775460689974599/-mr-10000 0 0 0 Index: ql/src/test/results/clientpositive/skewjoin.q.out =================================================================== --- ql/src/test/results/clientpositive/skewjoin.q.out (revision 1023076) +++ ql/src/test/results/clientpositive/skewjoin.q.out (working copy) @@ -56,8 +56,9 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-5 depends on stages: Stage-1 , consists of Stage-4 - Stage-4 + Stage-5 depends on stages: Stage-1 , consists of Stage-6 + Stage-6 + Stage-4 depends on stages: Stage-6 Stage-0 depends on stages: Stage-1, Stage-4 Stage-2 depends on stages: Stage-0 @@ -130,6 +131,27 @@ Stage: Stage-5 Conditional Operator + Stage: Stage-6 + Map Reduce Local Work + Alias -> Map Local Tables: + 1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + 1 + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {0_VALUE_0} + 1 {1_VALUE_0} + handleSkewJoin: false + keys: + 0 [Column[joinkey0]] + 1 [Column[joinkey0]] + outputColumnNames: _col0, _col5 + Position of Big Table: 0 + Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: @@ -170,46 +192,6 @@ name: dest_j1 Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - 1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - 1 - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {0_VALUE_0} - 1 {1_VALUE_0} - handleSkewJoin: false - keys: - 0 [Column[joinkey0]] - 1 [Column[joinkey0]] - outputColumnNames: _col0, _col5 - Position of Big Table: 0 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1 - Select Operator - expressions: - expr: UDFToInteger(_col0) - type: int - expr: _col1 - type: string - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 1 - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: dest_j1 Stage: Stage-0 Move Operator @@ -240,11 +222,11 @@ PREHOOK: query: SELECT sum(hash(key)), sum(hash(value)) FROM dest_j1 PREHOOK: type: QUERY PREHOOK: Input: default@dest_j1 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_17-32-20_088_265990891413321286/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-19_10-04-50_849_4926018945681458170/-mr-10000 POSTHOOK: query: SELECT sum(hash(key)), sum(hash(value)) FROM dest_j1 POSTHOOK: type: QUERY POSTHOOK: Input: default@dest_j1 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_17-32-20_088_265990891413321286/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-19_10-04-50_849_4926018945681458170/-mr-10000 POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] 278697 101852390308 @@ -394,7 +376,7 @@ PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 PREHOOK: Input: default@t4 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_17-32-59_862_2469201922490952543/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-19_10-05-30_958_5269065287490367280/-mr-10000 POSTHOOK: query: SELECT /*+ STREAMTABLE(a) */ * FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c ON b.key = c.key @@ -404,7 +386,7 @@ POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 POSTHOOK: Input: default@t4 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_17-32-59_862_2469201922490952543/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-19_10-05-30_958_5269065287490367280/-mr-10000 POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] 2 12 2 22 2 12 2 12 @@ -554,7 +536,7 @@ PREHOOK: Input: default@t2 PREHOOK: Input: default@t3 PREHOOK: Input: default@t4 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_17-33-05_555_320549944562504832/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-19_10-05-36_527_8632422920697698077/-mr-10000 POSTHOOK: query: SELECT /*+ STREAMTABLE(a,c) */ * FROM T1 a JOIN T2 b ON a.key = b.key JOIN T3 c ON b.key = c.key @@ -564,7 +546,7 @@ POSTHOOK: Input: default@t2 POSTHOOK: Input: default@t3 POSTHOOK: Input: default@t4 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_17-33-05_555_320549944562504832/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-19_10-05-36_527_8632422920697698077/-mr-10000 POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] 2 12 2 22 2 12 2 12 @@ -654,7 +636,7 @@ Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/nzhang/hive_2010-09-14_17-33-11_763_3320930002757015267/-mr-10002 + file:/tmp/liyintang/hive_2010-10-19_10-05-42_041_7344594961437099384/-mr-10002 Reduce Output Operator sort order: tag: -1 @@ -699,12 +681,12 @@ PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Input: default@t1 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_17-33-11_947_4948549999809455027/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-19_10-05-42_198_1840888490716635874/-mr-10000 POSTHOOK: query: FROM T1 a JOIN src c ON c.key+1=a.key SELECT /*+ STREAMTABLE(a) */ sum(hash(a.key)), sum(hash(a.val)), sum(hash(c.key)) POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Input: default@t1 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_17-33-11_947_4948549999809455027/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-19_10-05-42_198_1840888490716635874/-mr-10000 POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] 198 6274 194 @@ -729,8 +711,9 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-5 depends on stages: Stage-1 , consists of Stage-4 - Stage-4 + Stage-5 depends on stages: Stage-1 , consists of Stage-6 + Stage-6 + Stage-4 depends on stages: Stage-6 Stage-2 depends on stages: Stage-1, Stage-4 Stage-0 is a root stage @@ -812,6 +795,27 @@ Stage: Stage-5 Conditional Operator + Stage: Stage-6 + Map Reduce Local Work + Alias -> Map Local Tables: + 1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + 1 + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {1_VALUE_0} {1_VALUE_1} + handleSkewJoin: false + keys: + 0 [Column[joinkey0]] + 1 [Column[joinkey0]] + outputColumnNames: _col2, _col3 + Position of Big Table: 0 + Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: @@ -850,49 +854,11 @@ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - 1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - 1 - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 {1_VALUE_0} {1_VALUE_1} - handleSkewJoin: false - keys: - 0 [Column[joinkey0]] - 1 [Column[joinkey0]] - outputColumnNames: _col2, _col3 - Position of Big Table: 0 - Select Operator - expressions: - expr: _col2 - type: string - expr: _col3 - type: string - outputColumnNames: _col2, _col3 - Group By Operator - aggregations: - expr: sum(hash(_col2)) - expr: sum(hash(_col3)) - bucketGroup: false - mode: hash - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/nzhang/hive_2010-09-14_17-33-21_233_135416400611149220/-mr-10002 + file:/tmp/liyintang/hive_2010-10-19_10-05-50_405_6110056714768738373/-mr-10002 Reduce Output Operator sort order: tag: -1 @@ -936,7 +902,7 @@ SELECT sum(hash(Y.key)), sum(hash(Y.value)) PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_17-33-21_509_6518293255572217703/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-19_10-05-50_724_433499985548447694/-mr-10000 POSTHOOK: query: FROM (SELECT src.* FROM src) x JOIN @@ -945,7 +911,7 @@ SELECT sum(hash(Y.key)), sum(hash(Y.value)) POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_17-33-21_509_6518293255572217703/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-19_10-05-50_724_433499985548447694/-mr-10000 POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] 44481300 101852390308 @@ -970,8 +936,9 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-5 depends on stages: Stage-1 , consists of Stage-4 - Stage-4 + Stage-5 depends on stages: Stage-1 , consists of Stage-6 + Stage-6 + Stage-4 depends on stages: Stage-6 Stage-2 depends on stages: Stage-1, Stage-4 Stage-0 is a root stage @@ -1063,6 +1030,27 @@ Stage: Stage-5 Conditional Operator + Stage: Stage-6 + Map Reduce Local Work + Alias -> Map Local Tables: + 1 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + 1 + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 + 1 {1_VALUE_0} {1_VALUE_1} + handleSkewJoin: false + keys: + 0 [Column[joinkey0], Column[joinkey1]] + 1 [Column[joinkey0], Column[joinkey1]] + outputColumnNames: _col2, _col3 + Position of Big Table: 0 + Stage: Stage-4 Map Reduce Alias -> Map Operator Tree: @@ -1101,49 +1089,11 @@ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - 1 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - 1 - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 - 1 {1_VALUE_0} {1_VALUE_1} - handleSkewJoin: false - keys: - 0 [Column[joinkey0], Column[joinkey1]] - 1 [Column[joinkey0], Column[joinkey1]] - outputColumnNames: _col2, _col3 - Position of Big Table: 0 - Select Operator - expressions: - expr: _col2 - type: string - expr: _col3 - type: string - outputColumnNames: _col2, _col3 - Group By Operator - aggregations: - expr: sum(hash(_col2)) - expr: sum(hash(_col3)) - bucketGroup: false - mode: hash - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/nzhang/hive_2010-09-14_17-35-01_721_1099039694801681089/-mr-10002 + file:/tmp/liyintang/hive_2010-10-19_10-06-47_474_6216702342566228377/-mr-10002 Reduce Output Operator sort order: tag: -1 @@ -1187,7 +1137,7 @@ SELECT sum(hash(Y.key)), sum(hash(Y.value)) PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_17-35-02_103_1296780027454091580/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-19_10-06-47_800_6350809369258245956/-mr-10000 POSTHOOK: query: FROM (SELECT src.* FROM src) x JOIN @@ -1196,7 +1146,7 @@ SELECT sum(hash(Y.key)), sum(hash(Y.value)) POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_17-35-02_103_1296780027454091580/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-19_10-06-47_800_6350809369258245956/-mr-10000 POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] NULL NULL @@ -1229,10 +1179,12 @@ STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-7 depends on stages: Stage-1 , consists of Stage-5, Stage-6 - Stage-5 + Stage-7 depends on stages: Stage-1 , consists of Stage-8, Stage-9 + Stage-8 + Stage-5 depends on stages: Stage-8 Stage-2 depends on stages: Stage-1, Stage-5, Stage-6 - Stage-6 + Stage-9 + Stage-6 depends on stages: Stage-9 Stage-0 is a root stage STAGE PLANS: @@ -1349,6 +1301,49 @@ Stage: Stage-7 Conditional Operator + Stage: Stage-8 + Map Reduce Local Work + Alias -> Map Local Tables: + 1 + Fetch Operator + limit: -1 + 2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + 1 + Common Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {0_VALUE_0} + 1 {1_VALUE_0} + 2 + handleSkewJoin: false + keys: + 0 [Column[joinkey0]] + 1 [Column[joinkey0]] + 2 [Column[joinkey0]] + outputColumnNames: _col0, _col3 + Position of Big Table: 0 + 2 + Common Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {0_VALUE_0} + 1 {1_VALUE_0} + 2 + handleSkewJoin: false + keys: + 0 [Column[joinkey0]] + 1 [Column[joinkey0]] + 2 [Column[joinkey0]] + outputColumnNames: _col0, _col3 + Position of Big Table: 0 + Stage: Stage-5 Map Reduce Alias -> Map Operator Tree: @@ -1390,91 +1385,11 @@ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - 1 - Fetch Operator - limit: -1 - 2 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - 1 - Common Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {0_VALUE_0} - 1 {1_VALUE_0} - 2 - handleSkewJoin: false - keys: - 0 [Column[joinkey0]] - 1 [Column[joinkey0]] - 2 [Column[joinkey0]] - outputColumnNames: _col0, _col3 - Position of Big Table: 0 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col3 - type: string - outputColumnNames: _col0, _col3 - Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col3)) - bucketGroup: false - mode: hash - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - 2 - Common Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {0_VALUE_0} - 1 {1_VALUE_0} - 2 - handleSkewJoin: false - keys: - 0 [Column[joinkey0]] - 1 [Column[joinkey0]] - 2 [Column[joinkey0]] - outputColumnNames: _col0, _col3 - Position of Big Table: 0 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col3 - type: string - outputColumnNames: _col0, _col3 - Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col3)) - bucketGroup: false - mode: hash - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/nzhang/hive_2010-09-14_17-36-00_558_8264505707294012856/-mr-10002 + file:/tmp/liyintang/hive_2010-10-19_10-07-40_748_888888270570061665/-mr-10002 Reduce Output Operator sort order: tag: -1 @@ -1505,6 +1420,49 @@ input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + Stage: Stage-9 + Map Reduce Local Work + Alias -> Map Local Tables: + 0 + Fetch Operator + limit: -1 + 2 + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + 0 + Common Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {0_VALUE_0} + 1 {1_VALUE_0} + 2 + handleSkewJoin: false + keys: + 0 [Column[joinkey0]] + 1 [Column[joinkey0]] + 2 [Column[joinkey0]] + outputColumnNames: _col0, _col3 + Position of Big Table: 1 + 2 + Common Join Operator + condition map: + Inner Join 0 to 1 + Inner Join 0 to 2 + condition expressions: + 0 {0_VALUE_0} + 1 {1_VALUE_0} + 2 + handleSkewJoin: false + keys: + 0 [Column[joinkey0]] + 1 [Column[joinkey0]] + 2 [Column[joinkey0]] + outputColumnNames: _col0, _col3 + Position of Big Table: 1 + Stage: Stage-6 Map Reduce Alias -> Map Operator Tree: @@ -1546,86 +1504,6 @@ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - 0 - Fetch Operator - limit: -1 - 2 - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - 0 - Common Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {0_VALUE_0} - 1 {1_VALUE_0} - 2 - handleSkewJoin: false - keys: - 0 [Column[joinkey0]] - 1 [Column[joinkey0]] - 2 [Column[joinkey0]] - outputColumnNames: _col0, _col3 - Position of Big Table: 1 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col3 - type: string - outputColumnNames: _col0, _col3 - Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col3)) - bucketGroup: false - mode: hash - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - 2 - Common Join Operator - condition map: - Inner Join 0 to 1 - Inner Join 0 to 2 - condition expressions: - 0 {0_VALUE_0} - 1 {1_VALUE_0} - 2 - handleSkewJoin: false - keys: - 0 [Column[joinkey0]] - 1 [Column[joinkey0]] - 2 [Column[joinkey0]] - outputColumnNames: _col0, _col3 - Position of Big Table: 1 - Select Operator - expressions: - expr: _col0 - type: string - expr: _col3 - type: string - outputColumnNames: _col0, _col3 - Group By Operator - aggregations: - expr: sum(hash(_col0)) - expr: sum(hash(_col3)) - bucketGroup: false - mode: hash - outputColumnNames: _col0, _col1 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-0 Fetch Operator @@ -1643,7 +1521,7 @@ ON src1.c1 = src3.c5 AND src3.c5 < 80 PREHOOK: type: QUERY PREHOOK: Input: default@src -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_17-36-01_143_2592368341555694197/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-19_10-07-41_326_108115999211054386/-mr-10000 POSTHOOK: query: SELECT sum(hash(src1.c1)), sum(hash(src2.c4)) FROM (SELECT src.key as c1, src.value as c2 from src) src1 @@ -1655,7 +1533,7 @@ ON src1.c1 = src3.c5 AND src3.c5 < 80 POSTHOOK: type: QUERY POSTHOOK: Input: default@src -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_17-36-01_143_2592368341555694197/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-19_10-07-41_326_108115999211054386/-mr-10000 POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] 293143 -136853010385 @@ -1671,11 +1549,35 @@ (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF T1 k) (TOK_TABREF T1 v) (= (+ (. (TOK_TABLE_OR_COL k) key) 1) (. (TOK_TABLE_OR_COL v) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST v))) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION hash (. (TOK_TABLE_OR_COL k) key)))) (TOK_SELEXPR (TOK_FUNCTION sum (TOK_FUNCTION hash (. (TOK_TABLE_OR_COL v) val))))))) STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-4 is a root stage + Stage-1 depends on stages: Stage-4 Stage-2 depends on stages: Stage-1 Stage-0 is a root stage STAGE PLANS: + Stage: Stage-4 + Map Reduce Local Work + Alias -> Map Local Tables: + v + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + v + TableScan + alias: v + Common Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {key} + 1 {val} + handleSkewJoin: false + keys: + 0 [class org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge(Column[key], Const int 1()] + 1 [class org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge(Column[key]()] + outputColumnNames: _col0, _col5 + Position of Big Table: 0 + Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: @@ -1702,37 +1604,11 @@ output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - v - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - v - TableScan - alias: v - Common Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {key} - 1 {val} - handleSkewJoin: false - keys: - 0 [class org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge(Column[key], Const int 1()] - 1 [class org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge(Column[key]()] - outputColumnNames: _col0, _col5 - Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat Stage: Stage-2 Map Reduce Alias -> Map Operator Tree: - file:/tmp/nzhang/hive_2010-09-14_17-37-07_971_7104147537843225861/-mr-10002 + file:/tmp/liyintang/hive_2010-10-19_10-08-41_351_6278160534430301243/-mr-10002 Select Operator expressions: expr: _col0 @@ -1792,55 +1668,55 @@ PREHOOK: query: SELECT /*+ mapjoin(v)*/ sum(hash(k.key)), sum(hash(v.val)) FROM T1 k LEFT OUTER JOIN T1 v ON k.key+1=v.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_17-37-08_099_6986157948200697425/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-19_10-08-41_490_8646731125667777254/-mr-10000 POSTHOOK: query: SELECT /*+ mapjoin(v)*/ sum(hash(k.key)), sum(hash(v.val)) FROM T1 k LEFT OUTER JOIN T1 v ON k.key+1=v.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_17-37-08_099_6986157948200697425/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-19_10-08-41_490_8646731125667777254/-mr-10000 POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] 372 6320 PREHOOK: query: select /*+ mapjoin(k)*/ sum(hash(k.key)), sum(hash(v.val)) from T1 k join T1 v on k.key=v.val PREHOOK: type: QUERY PREHOOK: Input: default@t1 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_17-37-15_103_7565149712452268536/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-19_10-08-49_667_1957853727227558594/-mr-10000 POSTHOOK: query: select /*+ mapjoin(k)*/ sum(hash(k.key)), sum(hash(v.val)) from T1 k join T1 v on k.key=v.val POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_17-37-15_103_7565149712452268536/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-19_10-08-49_667_1957853727227558594/-mr-10000 POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] NULL NULL PREHOOK: query: select /*+ mapjoin(k)*/ sum(hash(k.key)), sum(hash(v.val)) from T1 k join T1 v on k.key=v.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_17-37-21_720_2771757907682080403/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-19_10-08-57_949_3421678461487010281/-mr-10000 POSTHOOK: query: select /*+ mapjoin(k)*/ sum(hash(k.key)), sum(hash(v.val)) from T1 k join T1 v on k.key=v.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_17-37-21_720_2771757907682080403/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-19_10-08-57_949_3421678461487010281/-mr-10000 POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] 429 12643 PREHOOK: query: select sum(hash(k.key)), sum(hash(v.val)) from T1 k join T1 v on k.key=v.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_17-37-28_452_6093434418535192386/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-19_10-09-06_106_9105927732182868271/-mr-10000 POSTHOOK: query: select sum(hash(k.key)), sum(hash(v.val)) from T1 k join T1 v on k.key=v.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_17-37-28_452_6093434418535192386/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-19_10-09-06_106_9105927732182868271/-mr-10000 POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] 429 12643 PREHOOK: query: select count(1) from T1 a join T1 b on a.key = b.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_17-37-38_889_1047609014892238447/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-19_10-09-17_819_6956454420254919929/-mr-10000 POSTHOOK: query: select count(1) from T1 a join T1 b on a.key = b.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_17-37-38_889_1047609014892238447/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-19_10-09-17_819_6956454420254919929/-mr-10000 POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] 8 @@ -1848,12 +1724,12 @@ PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_17-37-49_221_8506210790545243082/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-19_10-09-29_643_1471344657735837027/-mr-10000 POSTHOOK: query: FROM T1 a LEFT OUTER JOIN T2 c ON c.key+1=a.key SELECT sum(hash(a.key)), sum(hash(a.val)), sum(hash(c.key)) POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_17-37-49_221_8506210790545243082/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-19_10-09-29_643_1471344657735837027/-mr-10000 POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] 317 9462 50 @@ -1861,12 +1737,12 @@ PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_17-37-56_340_6168859629052833187/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-19_10-09-37_719_1766810325799201220/-mr-10000 POSTHOOK: query: FROM T1 a RIGHT OUTER JOIN T2 c ON c.key+1=a.key SELECT /*+ STREAMTABLE(a) */ sum(hash(a.key)), sum(hash(a.val)), sum(hash(c.key)) POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_17-37-56_340_6168859629052833187/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-19_10-09-37_719_1766810325799201220/-mr-10000 POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] 51 1570 318 @@ -1874,12 +1750,12 @@ PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_17-38-03_577_4249269048370838540/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-19_10-09-45_768_5626269091454711692/-mr-10000 POSTHOOK: query: FROM T1 a FULL OUTER JOIN T2 c ON c.key+1=a.key SELECT /*+ STREAMTABLE(a) */ sum(hash(a.key)), sum(hash(a.val)), sum(hash(c.key)) POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_17-38-03_577_4249269048370838540/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-19_10-09-45_768_5626269091454711692/-mr-10000 POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] 317 9462 318 @@ -1887,12 +1763,12 @@ PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_17-38-10_756_6811760235799500968/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-19_10-09-53_877_4064218124982922608/-mr-10000 POSTHOOK: query: SELECT sum(hash(src1.key)), sum(hash(src1.val)), sum(hash(src2.key)) FROM T1 src1 LEFT OUTER JOIN T2 src2 ON src1.key+1 = src2.key RIGHT OUTER JOIN T2 src3 ON src2.key = src3.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_17-38-10_756_6811760235799500968/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-19_10-09-53_877_4064218124982922608/-mr-10000 POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] 370 11003 377 @@ -1900,23 +1776,23 @@ PREHOOK: type: QUERY PREHOOK: Input: default@t1 PREHOOK: Input: default@t2 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_17-38-18_994_7191347978897240811/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-19_10-10-00_997_7881612208030962780/-mr-10000 POSTHOOK: query: SELECT sum(hash(src1.key)), sum(hash(src1.val)), sum(hash(src2.key)) FROM T1 src1 JOIN T2 src2 ON src1.key+1 = src2.key JOIN T2 src3 ON src2.key = src3.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 POSTHOOK: Input: default@t2 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_17-38-18_994_7191347978897240811/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-19_10-10-00_997_7881612208030962780/-mr-10000 POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] 370 11003 377 PREHOOK: query: select /*+ mapjoin(v)*/ sum(hash(k.key)), sum(hash(v.val)) from T1 k left outer join T1 v on k.key+1=v.key PREHOOK: type: QUERY PREHOOK: Input: default@t1 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_17-38-35_614_8013112114265683326/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-19_10-10-21_614_76890166910035533/-mr-10000 POSTHOOK: query: select /*+ mapjoin(v)*/ sum(hash(k.key)), sum(hash(v.val)) from T1 k left outer join T1 v on k.key+1=v.key POSTHOOK: type: QUERY POSTHOOK: Input: default@t1 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-14_17-38-35_614_8013112114265683326/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-19_10-10-21_614_76890166910035533/-mr-10000 POSTHOOK: Lineage: dest_j1.key EXPRESSION [(src)src1.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dest_j1.value SIMPLE [(src)src2.FieldSchema(name:value, type:string, comment:default), ] 372 6320 Index: ql/src/test/results/clientpositive/stats11.q.out =================================================================== --- ql/src/test/results/clientpositive/stats11.q.out (revision 1023076) +++ ql/src/test/results/clientpositive/stats11.q.out (working copy) @@ -84,7 +84,8 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF srcbucket_mapjoin a) (TOK_TABREF srcbucket_mapjoin_part b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB bucketmapjoin_tmp_result)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL b) ds) "2008-04-08")))) STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-7 is a root stage + Stage-1 depends on stages: Stage-7 Stage-5 depends on stages: Stage-1 , consists of Stage-4, Stage-3 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3 @@ -92,6 +93,43 @@ Stage-3 STAGE PLANS: + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + b + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + b + TableScan + alias: b + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: (ds = '2008-04-08') + type: boolean + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} {ds} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col5, _col6 + Position of Big Table: 0 + Bucket Mapjoin Context: + Alias Bucket Base File Name Mapping: + b {srcbucket20.txt=[srcbucket20.txt, srcbucket22.txt], srcbucket21.txt=[srcbucket21.txt, srcbucket23.txt]} + Alias Bucket File Name Mapping: + b {pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt=[pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt, pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt], pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt=[pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt, pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt]} + Alias Bucket Output File Name Mapping: + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt 0 + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt 1 + Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: @@ -139,9 +177,9 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-16_12-35-25_811_5921577067687191235/-ext-10002 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_15-32-14_890_7132224476182536886/-ext-10002 NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-16_12-35-25_811_5921577067687191235/-ext-10000/ + Stats Publishing Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_15-32-14_890_7132224476182536886/-ext-10000/ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -151,12 +189,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284665725 + transient_lastDdlTime 1287181934 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -164,96 +202,11 @@ MultiFileSpray: false Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - b - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - b - TableScan - alias: b - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: - expr: (ds = '2008-04-08') - type: boolean - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {value} {ds} - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - outputColumnNames: _col0, _col1, _col5, _col6 - Position of Big Table: 0 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col5 - type: string - expr: _col6 - type: string - outputColumnNames: _col0, _col1, _col5, _col6 - Filter Operator - isSamplingPred: false - predicate: - expr: (_col6 = '2008-04-08') - type: boolean - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-16_12-35-25_811_5921577067687191235/-ext-10002 - NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-16_12-35-25_811_5921577067687191235/-ext-10000/ - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value1,value2 - columns.types string:string:string - file.inputformat org.apache.hadoop.mapred.TextInputFormat - file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result - name bucketmapjoin_tmp_result - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284665725 - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Bucket Mapjoin Context: - Alias Bucket Base File Name Mapping: - b {srcbucket20.txt=[srcbucket20.txt, srcbucket22.txt], srcbucket21.txt=[srcbucket21.txt, srcbucket23.txt]} - Alias Bucket File Name Mapping: - b {pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt=[pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt, pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt], pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt=[pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt, pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt]} - Alias Bucket Output File Name Mapping: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt 0 - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt 1 Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin [a] + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin [a] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin Partition base file name: srcbucket_mapjoin input format: org.apache.hadoop.mapred.TextInputFormat @@ -265,12 +218,12 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin name srcbucket_mapjoin serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284665721 + transient_lastDdlTime 1287181928 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -282,12 +235,12 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin name srcbucket_mapjoin serialization.ddl struct srcbucket_mapjoin { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284665721 + transient_lastDdlTime 1287181928 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: srcbucket_mapjoin name: srcbucket_mapjoin @@ -299,14 +252,14 @@ Move Operator files: hdfs directory: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-16_12-35-25_811_5921577067687191235/-ext-10002 - destination: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-16_12-35-25_811_5921577067687191235/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_15-32-14_890_7132224476182536886/-ext-10002 + destination: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_15-32-14_890_7132224476182536886/-ext-10000 Stage: Stage-0 Move Operator tables: replace: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-16_12-35-25_811_5921577067687191235/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_15-32-14_890_7132224476182536886/-ext-10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -316,28 +269,28 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284665725 + transient_lastDdlTime 1287181934 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result - tmp directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-16_12-35-25_811_5921577067687191235/-ext-10001 + tmp directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_15-32-14_890_7132224476182536886/-ext-10001 Stage: Stage-2 Stats-Aggr Operator - Stats Aggregation Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-16_12-35-25_811_5921577067687191235/-ext-10000/ + Stats Aggregation Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_15-32-14_890_7132224476182536886/-ext-10000/ Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-16_12-35-25_811_5921577067687191235/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_15-32-14_890_7132224476182536886/-ext-10002 File Output Operator compressed: false GlobalTableId: 0 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-16_12-35-25_811_5921577067687191235/-ext-10000 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_15-32-14_890_7132224476182536886/-ext-10000 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -348,12 +301,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284665725 + transient_lastDdlTime 1287181934 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -361,9 +314,9 @@ MultiFileSpray: false Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-16_12-35-25_811_5921577067687191235/-ext-10002 [pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-16_12-35-25_811_5921577067687191235/-ext-10002] + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_15-32-14_890_7132224476182536886/-ext-10002 [pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_15-32-14_890_7132224476182536886/-ext-10002] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-16_12-35-25_811_5921577067687191235/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_15-32-14_890_7132224476182536886/-ext-10002 Partition base file name: -ext-10002 input format: org.apache.hadoop.mapred.TextInputFormat @@ -374,12 +327,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284665725 + transient_lastDdlTime 1287181934 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -390,12 +343,12 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284665725 + transient_lastDdlTime 1287181934 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result name: bucketmapjoin_tmp_result @@ -423,11 +376,11 @@ PREHOOK: query: select count(1) from bucketmapjoin_tmp_result PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-16_12-35-39_042_2292053960549391675/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_15-32-27_821_1716622073264168482/-mr-10000 POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-16_12-35-39_042_2292053960549391675/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_15-32-27_821_1716622073264168482/-mr-10000 POSTHOOK: Lineage: bucketmapjoin_tmp_result.key SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:key, type:int, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_tmp_result.value1 SIMPLE [(srcbucket_mapjoin)a.FieldSchema(name:value, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_tmp_result.value2 SIMPLE [(srcbucket_mapjoin_part)b.FieldSchema(name:value, type:string, comment:null), ] @@ -476,11 +429,11 @@ PREHOOK: query: select count(1) from bucketmapjoin_tmp_result PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-16_12-36-01_326_1394559857977869870/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_15-32-48_695_8542588372219700867/-mr-10000 POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-16_12-36-01_326_1394559857977869870/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_15-32-48_695_8542588372219700867/-mr-10000 POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] @@ -519,14 +472,14 @@ PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_hash_result_1 PREHOOK: Input: default@bucketmapjoin_hash_result_2 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-16_12-36-11_686_145028380865003759/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_15-32-57_913_2821095706036592785/-mr-10000 POSTHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b on a.key = b.key POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_hash_result_1 POSTHOOK: Input: default@bucketmapjoin_hash_result_2 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-16_12-36-11_686_145028380865003759/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_15-32-57_913_2821095706036592785/-mr-10000 POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] @@ -568,7 +521,8 @@ (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF srcbucket_mapjoin a) (TOK_TABREF srcbucket_mapjoin_part b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_TAB bucketmapjoin_tmp_result)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST a))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) value))) (TOK_WHERE (= (. (TOK_TABLE_OR_COL b) ds) "2008-04-08")))) STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-7 is a root stage + Stage-1 depends on stages: Stage-7 Stage-5 depends on stages: Stage-1 , consists of Stage-4, Stage-3 Stage-4 Stage-0 depends on stages: Stage-4, Stage-3 @@ -576,6 +530,40 @@ Stage-3 STAGE PLANS: + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + a + TableScan + alias: a + GatherStats: false + Common Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} + 1 {value} {ds} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col5, _col6 + Position of Big Table: 1 + Bucket Mapjoin Context: + Alias Bucket Base File Name Mapping: + a {srcbucket20.txt=[srcbucket20.txt], srcbucket21.txt=[srcbucket21.txt], srcbucket22.txt=[srcbucket20.txt], srcbucket23.txt=[srcbucket21.txt]} + Alias Bucket File Name Mapping: + a {pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt=[pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt=[pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt], pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt=[pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt=[pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt]} + Alias Bucket Output File Name Mapping: + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt 0 + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt 1 + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt 2 + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt 3 + Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: @@ -628,9 +616,9 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-16_12-36-16_572_898242702780671781/-ext-10002 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_15-33-01_834_6703337065896490804/-ext-10002 NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-16_12-36-16_572_898242702780671781/-ext-10000/ + Stats Publishing Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_15-33-01_834_6703337065896490804/-ext-10000/ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -640,7 +628,7 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result numFiles 1 numPartitions 0 @@ -649,7 +637,7 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 8983 - transient_lastDdlTime 1284665761 + transient_lastDdlTime 1287181968 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -657,97 +645,11 @@ MultiFileSpray: false Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - a - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - a - TableScan - alias: a - GatherStats: false - Common Join Operator - condition map: - Inner Join 0 to 1 - condition expressions: - 0 {key} {value} - 1 {value} {ds} - handleSkewJoin: false - keys: - 0 [Column[key]] - 1 [Column[key]] - outputColumnNames: _col0, _col1, _col5, _col6 - Position of Big Table: 1 - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col5 - type: string - expr: _col6 - type: string - outputColumnNames: _col0, _col1, _col5, _col6 - Filter Operator - isSamplingPred: false - predicate: - expr: (_col6 = '2008-04-08') - type: boolean - Select Operator - expressions: - expr: _col0 - type: int - expr: _col1 - type: string - expr: _col5 - type: string - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-16_12-36-16_572_898242702780671781/-ext-10002 - NumFilesPerFileSink: 1 - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-16_12-36-16_572_898242702780671781/-ext-10000/ - table: - input format: org.apache.hadoop.mapred.TextInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - properties: - bucket_count -1 - columns key,value1,value2 - columns.types string:string:string - file.inputformat org.apache.hadoop.mapred.TextInputFormat - file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result - name bucketmapjoin_tmp_result - numFiles 1 - numPartitions 0 - numRows 464 - serialization.ddl struct bucketmapjoin_tmp_result { string key, string value1, string value2} - serialization.format 1 - serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - totalSize 8983 - transient_lastDdlTime 1284665761 - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - name: bucketmapjoin_tmp_result - TotalFiles: 1 - GatherStats: true - MultiFileSpray: false - Bucket Mapjoin Context: - Alias Bucket Base File Name Mapping: - a {srcbucket20.txt=[srcbucket20.txt], srcbucket21.txt=[srcbucket21.txt], srcbucket22.txt=[srcbucket20.txt], srcbucket23.txt=[srcbucket21.txt]} - Alias Bucket File Name Mapping: - a {pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt=[pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt=[pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt], pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt=[pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket20.txt], pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt=[pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin/srcbucket21.txt]} - Alias Bucket Output File Name Mapping: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket20.txt 0 - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket21.txt 1 - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket22.txt 2 - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08/srcbucket23.txt 3 Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 [b] + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 [b] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part/ds=2008-04-08 Partition base file name: ds=2008-04-08 input format: org.apache.hadoop.mapred.TextInputFormat @@ -761,13 +663,13 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part name srcbucket_mapjoin_part partition_columns ds serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284665721 + transient_lastDdlTime 1287181928 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -779,13 +681,13 @@ columns.types int:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/srcbucket_mapjoin_part + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/srcbucket_mapjoin_part name srcbucket_mapjoin_part partition_columns ds serialization.ddl struct srcbucket_mapjoin_part { i32 key, string value} serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - transient_lastDdlTime 1284665721 + transient_lastDdlTime 1287181928 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: srcbucket_mapjoin_part name: srcbucket_mapjoin_part @@ -797,14 +699,14 @@ Move Operator files: hdfs directory: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-16_12-36-16_572_898242702780671781/-ext-10002 - destination: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-16_12-36-16_572_898242702780671781/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_15-33-01_834_6703337065896490804/-ext-10002 + destination: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_15-33-01_834_6703337065896490804/-ext-10000 Stage: Stage-0 Move Operator tables: replace: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-16_12-36-16_572_898242702780671781/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_15-33-01_834_6703337065896490804/-ext-10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -814,7 +716,7 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result numFiles 1 numPartitions 0 @@ -823,23 +725,23 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 8983 - transient_lastDdlTime 1284665761 + transient_lastDdlTime 1287181968 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result - tmp directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-16_12-36-16_572_898242702780671781/-ext-10001 + tmp directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_15-33-01_834_6703337065896490804/-ext-10001 Stage: Stage-2 Stats-Aggr Operator - Stats Aggregation Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-16_12-36-16_572_898242702780671781/-ext-10000/ + Stats Aggregation Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_15-33-01_834_6703337065896490804/-ext-10000/ Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-16_12-36-16_572_898242702780671781/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_15-33-01_834_6703337065896490804/-ext-10002 File Output Operator compressed: false GlobalTableId: 0 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-16_12-36-16_572_898242702780671781/-ext-10000 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_15-33-01_834_6703337065896490804/-ext-10000 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -850,7 +752,7 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result numFiles 1 numPartitions 0 @@ -859,7 +761,7 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 8983 - transient_lastDdlTime 1284665761 + transient_lastDdlTime 1287181968 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result TotalFiles: 1 @@ -867,9 +769,9 @@ MultiFileSpray: false Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-16_12-36-16_572_898242702780671781/-ext-10002 [pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-16_12-36-16_572_898242702780671781/-ext-10002] + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_15-33-01_834_6703337065896490804/-ext-10002 [pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_15-33-01_834_6703337065896490804/-ext-10002] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-16_12-36-16_572_898242702780671781/-ext-10002 + pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_15-33-01_834_6703337065896490804/-ext-10002 Partition base file name: -ext-10002 input format: org.apache.hadoop.mapred.TextInputFormat @@ -880,7 +782,7 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result numFiles 1 numPartitions 0 @@ -889,7 +791,7 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 8983 - transient_lastDdlTime 1284665761 + transient_lastDdlTime 1287181968 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -900,7 +802,7 @@ columns.types string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/bucketmapjoin_tmp_result + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/bucketmapjoin_tmp_result name bucketmapjoin_tmp_result numFiles 1 numPartitions 0 @@ -909,7 +811,7 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 8983 - transient_lastDdlTime 1284665761 + transient_lastDdlTime 1287181968 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: bucketmapjoin_tmp_result name: bucketmapjoin_tmp_result @@ -949,11 +851,11 @@ PREHOOK: query: select count(1) from bucketmapjoin_tmp_result PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-16_12-36-29_599_6490705534837936982/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_15-33-15_502_1421160556625509468/-mr-10000 POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-16_12-36-29_599_6490705534837936982/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_15-33-15_502_1421160556625509468/-mr-10000 POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value2 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value2, type:string, comment:null), ] @@ -1038,11 +940,11 @@ PREHOOK: query: select count(1) from bucketmapjoin_tmp_result PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_tmp_result -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-16_12-36-53_220_7463746957700915088/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_15-33-36_617_5470095902610700853/-mr-10000 POSTHOOK: query: select count(1) from bucketmapjoin_tmp_result POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_tmp_result -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-16_12-36-53_220_7463746957700915088/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_15-33-36_617_5470095902610700853/-mr-10000 POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] @@ -1105,14 +1007,14 @@ PREHOOK: type: QUERY PREHOOK: Input: default@bucketmapjoin_hash_result_1 PREHOOK: Input: default@bucketmapjoin_hash_result_2 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-16_12-37-03_681_8960608162191818904/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_15-33-45_895_3152224560181417883/-mr-10000 POSTHOOK: query: select a.key-b.key, a.value1-b.value1, a.value2-b.value2 from bucketmapjoin_hash_result_1 a left outer join bucketmapjoin_hash_result_2 b on a.key = b.key POSTHOOK: type: QUERY POSTHOOK: Input: default@bucketmapjoin_hash_result_1 POSTHOOK: Input: default@bucketmapjoin_hash_result_2 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-16_12-37-03_681_8960608162191818904/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_15-33-45_895_3152224560181417883/-mr-10000 POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.key EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:key, type:string, comment:null), ] POSTHOOK: Lineage: bucketmapjoin_hash_result_1.value1 EXPRESSION [(bucketmapjoin_tmp_result)bucketmapjoin_tmp_result.FieldSchema(name:value1, type:string, comment:null), ] Index: ql/src/test/results/clientpositive/union22.q.out =================================================================== --- ql/src/test/results/clientpositive/union22.q.out (revision 1023076) +++ ql/src/test/results/clientpositive/union22.q.out (working copy) @@ -82,12 +82,59 @@ (TOK_QUERY (TOK_FROM (TOK_SUBQUERY (TOK_UNION (TOK_QUERY (TOK_FROM (TOK_TABREF dst_union22_delta)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (TOK_TABLE_OR_COL k1) k1) (TOK_SELEXPR (TOK_TABLE_OR_COL k2) k2) (TOK_SELEXPR (TOK_TABLE_OR_COL k3) k3) (TOK_SELEXPR (TOK_TABLE_OR_COL k4) k4)) (TOK_WHERE (and (= (TOK_TABLE_OR_COL ds) '1') (<= (TOK_TABLE_OR_COL k0) 50))))) (TOK_QUERY (TOK_FROM (TOK_LEFTOUTERJOIN (TOK_TABREF dst_union22 a) (TOK_SUBQUERY (TOK_QUERY (TOK_FROM (TOK_TABREF dst_union22_delta)) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (and (= (TOK_TABLE_OR_COL ds) '1') (> (TOK_TABLE_OR_COL k0) 50))))) b) (and (= (. (TOK_TABLE_OR_COL a) k1) (. (TOK_TABLE_OR_COL b) k1)) (= (. (TOK_TABLE_OR_COL a) ds) '1')))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_HINTLIST (TOK_HINT TOK_MAPJOIN (TOK_HINTARGLIST b))) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) k1) k1) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) k2) k2) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) k3) k3) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) k4) k4)) (TOK_WHERE (> (. (TOK_TABLE_OR_COL a) k1) 20))))) subq)) (TOK_INSERT (TOK_DESTINATION (TOK_TAB dst_union22 (TOK_PARTSPEC (TOK_PARTVAL ds '2')))) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)))) STAGE DEPENDENCIES: - Stage-1 is a root stage + Stage-7 is a root stage + Stage-1 depends on stages: Stage-7 Stage-3 depends on stages: Stage-1 Stage-0 depends on stages: Stage-3 Stage-4 depends on stages: Stage-0 STAGE PLANS: + Stage: Stage-7 + Map Reduce Local Work + Alias -> Map Local Tables: + null-subquery2:subq-subquery2:b:dst_union22_delta + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + null-subquery2:subq-subquery2:b:dst_union22_delta + TableScan + alias: dst_union22_delta + GatherStats: false + Filter Operator + isSamplingPred: false + predicate: + expr: ((ds = '1') and (k0 > 50)) + type: boolean + Filter Operator + isSamplingPred: false + predicate: + expr: ((ds = '1') and (k0 > 50)) + type: boolean + Select Operator + expressions: + expr: k1 + type: string + expr: k3 + type: string + expr: k4 + type: string + outputColumnNames: _col1, _col3, _col4 + Common Join Operator + condition map: + Left Outer Join0 to 1 + condition expressions: + 0 {k1} {k2} + 1 {_col3} {_col4} + filter predicates: + 0 {(ds = '1')} + 1 + handleSkewJoin: false + keys: + 0 [Column[k1]] + 1 [Column[_col1]] + outputColumnNames: _col0, _col1, _col10, _col11 + Position of Big Table: 0 + Stage: Stage-1 Map Reduce Alias -> Map Operator Tree: @@ -118,7 +165,7 @@ File Output Operator compressed: false GlobalTableId: 0 - directory: file:/tmp/nzhang/hive_2010-09-21_14-46-27_944_8496687190593309577/-mr-10002 + directory: file:/tmp/liyintang/hive_2010-10-15_16-07-19_952_3322818947448684803/-mr-10002 NumFilesPerFileSink: 1 table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -132,69 +179,11 @@ MultiFileSpray: false Local Work: Map Reduce Local Work - Alias -> Map Local Tables: - null-subquery2:subq-subquery2:b:dst_union22_delta - Fetch Operator - limit: -1 - Alias -> Map Local Operator Tree: - null-subquery2:subq-subquery2:b:dst_union22_delta - TableScan - alias: dst_union22_delta - GatherStats: false - Filter Operator - isSamplingPred: false - predicate: - expr: ((ds = '1') and (k0 > 50)) - type: boolean - Filter Operator - isSamplingPred: false - predicate: - expr: ((ds = '1') and (k0 > 50)) - type: boolean - Select Operator - expressions: - expr: k1 - type: string - expr: k3 - type: string - expr: k4 - type: string - outputColumnNames: _col1, _col3, _col4 - Common Join Operator - condition map: - Left Outer Join0 to 1 - condition expressions: - 0 {k1} {k2} - 1 {_col3} {_col4} - filter predicates: - 0 {(ds = '1')} - 1 - handleSkewJoin: false - keys: - 0 [Column[k1]] - 1 [Column[_col1]] - outputColumnNames: _col0, _col1, _col10, _col11 - Position of Big Table: 0 - File Output Operator - compressed: false - GlobalTableId: 0 - directory: file:/tmp/nzhang/hive_2010-09-21_14-46-27_944_8496687190593309577/-mr-10002 - NumFilesPerFileSink: 1 - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - properties: - columns _col0,_col1,_col10,_col11 - columns.types string,string,string,string - escape.delim \ - TotalFiles: 1 - GatherStats: false - MultiFileSpray: false Needs Tagging: false Path -> Alias: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dst_union22/ds=1 [null-subquery2:subq-subquery2:a] + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dst_union22/ds=1 [null-subquery2:subq-subquery2:a] Path -> Partition: - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dst_union22/ds=1 + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dst_union22/ds=1 Partition base file name: ds=1 input format: org.apache.hadoop.mapred.TextInputFormat @@ -207,7 +196,7 @@ columns.types string:string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dst_union22 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dst_union22 name dst_union22 numFiles 1 numPartitions 1 @@ -217,7 +206,7 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 11624 - transient_lastDdlTime 1285105581 + transient_lastDdlTime 1287184033 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -228,7 +217,7 @@ columns.types string:string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dst_union22 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dst_union22 name dst_union22 numFiles 1 numPartitions 1 @@ -238,7 +227,7 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 11624 - transient_lastDdlTime 1285105581 + transient_lastDdlTime 1287184033 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: dst_union22 name: dst_union22 @@ -246,7 +235,7 @@ Stage: Stage-3 Map Reduce Alias -> Map Operator Tree: - file:/tmp/nzhang/hive_2010-09-21_14-46-27_944_8496687190593309577/-mr-10002 + file:/tmp/liyintang/hive_2010-10-15_16-07-19_952_3322818947448684803/-mr-10002 Select Operator expressions: expr: _col0 @@ -289,10 +278,10 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-21_14-46-27_944_8496687190593309577/-ext-10000 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_16-07-19_952_3322818947448684803/-ext-10000 NumFilesPerFileSink: 1 Static Partition Specification: ds=2/ - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-21_14-46-27_944_8496687190593309577/-ext-10000/ + Stats Publishing Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_16-07-19_952_3322818947448684803/-ext-10000/ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -302,7 +291,7 @@ columns.types string:string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dst_union22 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dst_union22 name dst_union22 numFiles 1 numPartitions 1 @@ -312,7 +301,7 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 11624 - transient_lastDdlTime 1285105581 + transient_lastDdlTime 1287184033 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: dst_union22 TotalFiles: 1 @@ -358,10 +347,10 @@ File Output Operator compressed: false GlobalTableId: 1 - directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-21_14-46-27_944_8496687190593309577/-ext-10000 + directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_16-07-19_952_3322818947448684803/-ext-10000 NumFilesPerFileSink: 1 Static Partition Specification: ds=2/ - Stats Publishing Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-21_14-46-27_944_8496687190593309577/-ext-10000/ + Stats Publishing Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_16-07-19_952_3322818947448684803/-ext-10000/ table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -371,7 +360,7 @@ columns.types string:string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dst_union22 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dst_union22 name dst_union22 numFiles 1 numPartitions 1 @@ -381,7 +370,7 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 11624 - transient_lastDdlTime 1285105581 + transient_lastDdlTime 1287184033 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: dst_union22 TotalFiles: 1 @@ -389,10 +378,10 @@ MultiFileSpray: false Needs Tagging: false Path -> Alias: - file:/tmp/nzhang/hive_2010-09-21_14-46-27_944_8496687190593309577/-mr-10002 [file:/tmp/nzhang/hive_2010-09-21_14-46-27_944_8496687190593309577/-mr-10002] - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dst_union22_delta/ds=1 [null-subquery1:subq-subquery1:dst_union22_delta] + file:/tmp/liyintang/hive_2010-10-15_16-07-19_952_3322818947448684803/-mr-10002 [file:/tmp/liyintang/hive_2010-10-15_16-07-19_952_3322818947448684803/-mr-10002] + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dst_union22_delta/ds=1 [null-subquery1:subq-subquery1:dst_union22_delta] Path -> Partition: - file:/tmp/nzhang/hive_2010-09-21_14-46-27_944_8496687190593309577/-mr-10002 + file:/tmp/liyintang/hive_2010-10-15_16-07-19_952_3322818947448684803/-mr-10002 Partition base file name: -mr-10002 input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -408,7 +397,7 @@ columns _col0,_col1,_col10,_col11 columns.types string,string,string,string escape.delim \ - pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dst_union22_delta/ds=1 + pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dst_union22_delta/ds=1 Partition base file name: ds=1 input format: org.apache.hadoop.mapred.TextInputFormat @@ -421,7 +410,7 @@ columns.types string:string:string:string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dst_union22_delta + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dst_union22_delta name dst_union22_delta numFiles 1 numPartitions 1 @@ -431,7 +420,7 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 17436 - transient_lastDdlTime 1285105587 + transient_lastDdlTime 1287184039 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe input format: org.apache.hadoop.mapred.TextInputFormat @@ -442,7 +431,7 @@ columns.types string:string:string:string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dst_union22_delta + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dst_union22_delta name dst_union22_delta numFiles 1 numPartitions 1 @@ -452,7 +441,7 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 17436 - transient_lastDdlTime 1285105587 + transient_lastDdlTime 1287184039 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: dst_union22_delta name: dst_union22_delta @@ -463,7 +452,7 @@ partition: ds 2 replace: true - source: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-21_14-46-27_944_8496687190593309577/-ext-10000 + source: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_16-07-19_952_3322818947448684803/-ext-10000 table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -473,7 +462,7 @@ columns.types string:string:string:string file.inputformat org.apache.hadoop.mapred.TextInputFormat file.outputformat org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat - location pfile:/data/users/nzhang/work/784/apache-hive/build/ql/test/data/warehouse/dst_union22 + location pfile:/data/users/liyintang/trunk_os/build/ql/test/data/warehouse/dst_union22 name dst_union22 numFiles 1 numPartitions 1 @@ -483,14 +472,14 @@ serialization.format 1 serialization.lib org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe totalSize 11624 - transient_lastDdlTime 1285105581 + transient_lastDdlTime 1287184033 serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe name: dst_union22 - tmp directory: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-21_14-46-27_944_8496687190593309577/-ext-10001 + tmp directory: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_16-07-19_952_3322818947448684803/-ext-10001 Stage: Stage-4 Stats-Aggr Operator - Stats Aggregation Key Prefix: pfile:/data/users/nzhang/work/784/apache-hive/build/ql/scratchdir/hive_2010-09-21_14-46-27_944_8496687190593309577/-ext-10000/ + Stats Aggregation Key Prefix: pfile:/data/users/liyintang/trunk_os/build/ql/scratchdir/hive_2010-10-15_16-07-19_952_3322818947448684803/-ext-10000/ PREHOOK: query: insert overwrite table dst_union22 partition (ds='2') @@ -540,11 +529,11 @@ PREHOOK: query: select * from dst_union22 where ds = '2' order by k1 PREHOOK: type: QUERY PREHOOK: Input: default@dst_union22@ds=2 -PREHOOK: Output: file:/tmp/nzhang/hive_2010-09-21_14-46-38_439_7599989320643509508/-mr-10000 +PREHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_16-07-32_654_4484631386729574257/-mr-10000 POSTHOOK: query: select * from dst_union22 where ds = '2' order by k1 POSTHOOK: type: QUERY POSTHOOK: Input: default@dst_union22@ds=2 -POSTHOOK: Output: file:/tmp/nzhang/hive_2010-09-21_14-46-38_439_7599989320643509508/-mr-10000 +POSTHOOK: Output: file:/tmp/liyintang/hive_2010-10-15_16-07-32_654_4484631386729574257/-mr-10000 POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k1 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: dst_union22 PARTITION(ds=1).k3 SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ]