diff --git ql/pom.xml ql/pom.xml index 90dfb2c..fa79601 100644 --- ql/pom.xml +++ ql/pom.xml @@ -351,6 +351,7 @@ org.apache.hive:hive-common org.apache.hive:hive-exec org.apache.hive:hive-serde + org.apache.hive:hive-metastore com.esotericsoftware.kryo:kryo org.apache.thrift:libthrift commons-lang:commons-lang diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java index 99b062f..65c4a8f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsTask.java @@ -71,7 +71,6 @@ public ColumnStatsTask() { @Override public void initialize(HiveConf conf, QueryPlan queryPlan, DriverContext ctx) { super.initialize(conf, queryPlan, ctx); - work.initializeForFetch(); try { JobConf job = new JobConf(conf); ftOp = new FetchOperator(work.getfWork(), job); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java index ffc4c42..f88d810 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FetchOperator.java @@ -34,10 +34,8 @@ import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.FileUtils; -import org.apache.hadoop.hive.common.ObjectPair; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.exec.mr.ExecMapperContext; -import org.apache.hadoop.hive.ql.exec.FooterBuffer; import org.apache.hadoop.hive.ql.io.HiveContextAwareRecordReader; import org.apache.hadoop.hive.ql.io.HiveInputFormat; import org.apache.hadoop.hive.ql.io.HiveRecordReader; @@ -48,7 +46,6 @@ import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; -import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.Deserializer; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.objectinspector.DelegatedObjectInspectorFactory; @@ -116,21 +113,25 @@ public FetchOperator() { } public FetchOperator(FetchWork work, JobConf job) { - this.job = job; - this.work = work; - initialize(); + this(work, job, null, null); } public FetchOperator(FetchWork work, JobConf job, Operator operator, - List vcCols) { + ExecMapperContext context) { this.job = job; this.work = work; - this.operator = operator; - this.vcCols = vcCols; - initialize(); + initialize(operator, context); } - private void initialize() { + private List getVirtualColumns(Operator ts) { + if (ts instanceof TableScanOperator && ts.getConf() != null) { + return ((TableScanOperator)ts).getConf().getVirtualCols(); + } + return null; + } + + public void initialize(Operator operator, ExecMapperContext context) { + this.vcCols = getVirtualColumns(operator); if (hasVC = vcCols != null && !vcCols.isEmpty()) { List names = new ArrayList(vcCols.size()); List inspectors = new ArrayList(vcCols.size()); @@ -156,15 +157,14 @@ private void initialize() { } else { isNativeTable = true; } - setupExecContext(); - } - - private void setupExecContext() { - if (hasVC || work.getSplitSample() != null) { + if (context == null && (hasVC || work.getSplitSample() != null)) { context = new ExecMapperContext(); - if (operator != null) { - operator.setExecContext(context); - } + } + this.operator = operator; + this.context = context; + + if (operator != null && context != null) { + operator.setExecContext(context); } } @@ -501,27 +501,28 @@ public boolean doNext(WritableComparable key, Writable value) throws IOException /** * Get the next row and push down it to operator tree. - * Currently only used by FetchTask. + * Currently only used by FetchTask and ExecDriver (for partition sampling). **/ public boolean pushRow() throws IOException, HiveException { - if(work.getRowsComputedUsingStats() != null) { - for (List row : work.getRowsComputedUsingStats()) { - operator.processOp(row, 0); - } - operator.flush(); - return true; + if (work.getRowsComputedUsingStats() == null) { + return pushRow(getNextRow()); } - InspectableObject row = getNextRow(); - if (row != null) { - pushRow(row); - } else { - operator.flush(); + for (List row : work.getRowsComputedUsingStats()) { + operator.processOp(row, 0); } - return row != null; + operator.flush(); + return false; } - protected void pushRow(InspectableObject row) throws HiveException { - operator.processOp(row.o, 0); + protected boolean pushRow(InspectableObject row) throws HiveException { + if (row != null) { + operator.processOp(row.o, 0); + if (!operator.getDone()) { + return true; + } + } + operator.flush(); + return false; } private transient final InspectableObject inspectable = new InspectableObject(); @@ -629,11 +630,9 @@ public void clearFetchContext() throws HiveException { } if (operator != null) { operator.close(false); - operator = null; } if (context != null) { - context.clear(); - context = null; + context.reset(); } this.currTbl = null; this.currPath = null; @@ -645,17 +644,31 @@ public void clearFetchContext() throws HiveException { } } + public void close() throws HiveException { + clearFetchContext(); + if (context != null) { + context.clear(); + context = null; + } + operator = null; + } + /** * used for bucket map join */ - public void setupContext(List paths) { + public void setupContext(String currentInput, List paths) { this.iterPath = paths.iterator(); if (work.isNotPartitioned()) { this.currTbl = work.getTblDesc(); } else { this.iterPartDesc = work.getPartDescs(paths).iterator(); } - setupExecContext(); + if (context != null) { + context.setCurrentBigBucketFile(currentInput); + } + if (operator != null) { + operator.reset(); + } } /** diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java index 770d904..e2ee452 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FetchTask.java @@ -30,7 +30,6 @@ import org.apache.hadoop.hive.ql.QueryPlan; import org.apache.hadoop.hive.ql.io.HiveInputFormat; import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.metadata.VirtualColumn; import org.apache.hadoop.hive.ql.plan.FetchWork; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.api.StageType; @@ -75,7 +74,7 @@ public void initialize(HiveConf conf, QueryPlan queryPlan, DriverContext ctx) { HiveInputFormat.pushFilters(job, ts); } sink = work.getSink(); - fetch = new FetchOperator(work, job, source, getVirtualColumns(source)); + fetch = new FetchOperator(work, job, source, null); source.initialize(conf, new ObjectInspector[]{fetch.getOutputObjectInspector()}); totalRows = 0; @@ -87,13 +86,6 @@ public void initialize(HiveConf conf, QueryPlan queryPlan, DriverContext ctx) { } } - private List getVirtualColumns(Operator ts) { - if (ts instanceof TableScanOperator && ts.getConf() != null) { - return ((TableScanOperator)ts).getConf().getVirtualCols(); - } - return null; - } - @Override public int execute(DriverContext driverContext) { assert false; @@ -129,7 +121,7 @@ public boolean fetch(List res) throws IOException, CommandNeedRetryException { } try { if (rowsRet <= 0) { - fetch.clearFetchContext(); + fetch.close(); return false; } boolean fetched = false; @@ -171,7 +163,7 @@ public String getName() { */ public void clearFetch() throws HiveException { if (fetch != null) { - fetch.clearFetchContext(); + fetch.close(); } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java index aa8f19c..4f534fd 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/HashTableSinkOperator.java @@ -291,7 +291,8 @@ public void closeOp(boolean abort) throws HiveException { out.close(); } tableContainer.clear(); - console.printInfo(Utilities.now() + "\tUpload 1 File to: " + tmpURIPath); + console.printInfo(Utilities.now() + "\tUpload 1 File to: " + tmpURIPath + + " [" + fs.getFileStatus(path).getLen() + " bytes]"); } } super.closeOp(abort); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/PartitionKeySampler.java ql/src/java/org/apache/hadoop/hive/ql/exec/PartitionKeySampler.java index 166461a..fc6ad25 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/PartitionKeySampler.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/PartitionKeySampler.java @@ -149,23 +149,15 @@ public void setSamplePercent(float samplePercent) { } @Override - public boolean pushRow() throws IOException, HiveException { - if (!super.pushRow()) { - return false; - } - if (sampled < sampleNum) { + protected boolean pushRow(InspectableObject row) throws HiveException { + if (row != null && sampled < sampleNum) { + if (random.nextFloat() < samplePercent) { + sampled++; + return super.pushRow(row); + } return true; } - operator.flush(); return false; } - - @Override - protected void pushRow(InspectableObject row) throws HiveException { - if (random.nextFloat() < samplePercent) { - sampled++; - super.pushRow(row); - } - } } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java index 81a1232..0c511c8 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/SMBMapJoinOperator.java @@ -201,7 +201,6 @@ public void initializeMapredLocalWork(MapJoinDesc mjConf, Configuration hconf, FetchOperator fetchOp = new FetchOperator(fetchWork, jobClone); ts.initialize(jobClone, new ObjectInspector[]{fetchOp.getOutputObjectInspector()}); - fetchOp.clearFetchContext(); DummyStoreOperator sinkOp = aliasToSinkWork.get(alias); @@ -521,16 +520,17 @@ private void setUpFetchContexts(String alias, MergeQueue mergeQueue) throws Hive Class bucketMatcherCls = bucketMatcherCxt.getBucketMatcherClass(); BucketMatcher bucketMatcher = ReflectionUtils.newInstance(bucketMatcherCls, null); - getExecContext().setFileId(bucketMatcherCxt.createFileId(currentInputPath.toString())); + String inputPathString = currentInputPath.toString(); + getExecContext().setFileId(bucketMatcherCxt.createFileId(inputPathString)); LOG.info("set task id: " + getExecContext().getFileId()); bucketMatcher.setAliasBucketFileNameMapping(bucketMatcherCxt .getAliasBucketFileNameMapping()); - List aliasFiles = bucketMatcher.getAliasBucketFiles(currentInputPath.toString(), + List aliasFiles = bucketMatcher.getAliasBucketFiles(inputPathString, bucketMatcherCxt.getMapJoinBigTableAlias(), alias); - mergeQueue.setupContext(aliasFiles); + mergeQueue.setupContext(inputPathString, aliasFiles); } private void fetchOneRow(byte tag) { @@ -617,7 +617,7 @@ public void closeOp(boolean abort) throws HiveException { MergeQueue mergeQueue = entry.getValue(); Operator forwardOp = localWork.getAliasToWork().get(alias); forwardOp.close(abort); - mergeQueue.clearFetchContext(); + mergeQueue.close(); } } @@ -690,7 +690,7 @@ public MergeQueue(String alias, FetchWork fetchWork, JobConf jobConf, // currently, number of paths is always the same (bucket numbers are all the same over // all partitions in a table). // But if hive supports assigning bucket number for each partition, this can be vary - public void setupContext(List paths) throws HiveException { + public void setupContext(String inputPathString, List paths) throws HiveException { int segmentLen = paths.size(); FetchOperator[] segments = segmentsForSize(segmentLen); for (int i = 0 ; i < segmentLen; i++) { @@ -698,7 +698,7 @@ public void setupContext(List paths) throws HiveException { if (segments[i] == null) { segments[i] = new FetchOperator(fetchWork, new JobConf(jobConf)); } - segments[i].setupContext(Arrays.asList(path)); + segments[i].setupContext(inputPathString, Arrays.asList(path)); } initialize(segmentLen); for (int i = 0; i < segmentLen; i++) { @@ -810,6 +810,17 @@ private boolean next(Integer current) throws IOException, HiveException { keys[current] = null; return false; } + + public void close() throws HiveException { + if (segments != null) { + for (FetchOperator op : segments) { + if (op != null) { + op.close(); + } + } + } + segments = null; + } } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecMapperContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecMapperContext.java index 74bc2d2..094ce9d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecMapperContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/mr/ExecMapperContext.java @@ -17,11 +17,8 @@ */ package org.apache.hadoop.hive.ql.exec.mr; -import java.util.Map; - import org.apache.commons.logging.Log; import org.apache.hadoop.fs.Path; -import org.apache.hadoop.hive.ql.exec.FetchOperator; import org.apache.hadoop.hive.ql.io.IOContext; import org.apache.hadoop.hive.ql.plan.MapredLocalWork; import org.apache.hadoop.mapred.JobConf; @@ -45,7 +42,6 @@ // if big alias is partitioned table, it's partition spec + bucket number private String fileId = null; private MapredLocalWork localWork = null; - private Map fetchOperators; private JobConf jc; private IOContext ioCxt; @@ -64,6 +60,14 @@ public ExecMapperContext() { ioCxt = IOContext.get(); } + public void reset() { + lastInputPath = null; + currentInputPath = null; + inputFileChecked = false; + currentBigBucketFile = null; + fileId = null; + } + public void clear() { IOContext.clear(); ioCxt = null; @@ -138,14 +142,6 @@ public void setFileId(String fileId) { this.fileId = fileId; } - public Map getFetchOperators() { - return fetchOperators; - } - - public void setFetchOperators(Map fetchOperators) { - this.fetchOperators = fetchOperators; - } - public IOContext getIoCxt() { return ioCxt; } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java index 0cc90d0..9880a42 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/mr/MapredLocalTask.java @@ -64,7 +64,6 @@ import org.apache.hadoop.hive.ql.session.SessionState; import org.apache.hadoop.hive.ql.session.SessionState.LogHelper; import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; -import org.apache.hadoop.hive.serde2.objectinspector.InspectableObject; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.shims.HadoopShims; import org.apache.hadoop.hive.shims.ShimLoader; @@ -91,10 +90,6 @@ public static MemoryMXBean memoryMXBean; private static final Log LOG = LogFactory.getLog(MapredLocalTask.class); - // not sure we need this exec context; but all the operators in the work - // will pass this context throught - private final ExecMapperContext execContext = new ExecMapperContext(); - private Process executor; public MapredLocalTask() { @@ -292,30 +287,29 @@ public int executeFromChildJVM(DriverContext driverContext) { + "\tStarting to launch local task to process map join;\tmaximum memory = " + memoryMXBean.getHeapMemoryUsage().getMax()); fetchOperators = new HashMap(); - Map fetchOpJobConfMap = new HashMap(); - execContext.setJc(job); - // set the local work, so all the operator can get this context - execContext.setLocalWork(work); + boolean inputFileChangeSenstive = work.getInputFileChangeSensitive(); try { - - initializeOperators(fetchOpJobConfMap); + initializeOperators(); // for each big table's bucket, call the start forward if (inputFileChangeSenstive) { for (Map> bigTableBucketFiles : work .getBucketMapjoinContext().getAliasBucketFileNameMapping().values()) { for (String bigTableBucket : bigTableBucketFiles.keySet()) { - startForward(inputFileChangeSenstive, bigTableBucket); + startForward(bigTableBucket); } } } else { - startForward(inputFileChangeSenstive, null); + startForward(null); } + closeAllFetchers(); + long currentTime = System.currentTimeMillis(); long elapsed = currentTime - startTime; console.printInfo(Utilities.now() + "\tEnd of local task; Time Taken: " + Utilities.showTime(elapsed) + " sec."); } catch (Throwable throwable) { + console.printError(org.apache.hadoop.util.StringUtils.stringifyException(throwable)); if (throwable instanceof OutOfMemoryError || (throwable instanceof MapJoinMemoryExhaustionException)) { l4j.error("Hive Runtime Error: Map local work exhausted memory", throwable); @@ -328,14 +322,11 @@ public int executeFromChildJVM(DriverContext driverContext) { return 0; } - private void startForward(boolean inputFileChangeSenstive, String bigTableBucket) - throws Exception { + private void startForward(String bigTableBucket) throws Exception { for (Map.Entry entry : fetchOperators.entrySet()) { String alias = entry.getKey(); FetchOperator fetchOp = entry.getValue(); - - if (inputFileChangeSenstive) { - fetchOp.clearFetchContext(); + if (bigTableBucket != null) { setUpFetchOpContext(fetchOp, alias, bigTableBucket); } @@ -345,37 +336,32 @@ private void startForward(boolean inputFileChangeSenstive, String bigTableBucket continue; } - // get the root operator - Operator forwardOp = work.getAliasToWork().get(alias); - // walk through the operator tree - while (true) { - InspectableObject row = fetchOp.getNextRow(); - if (row == null) { - if (inputFileChangeSenstive) { - execContext.setCurrentBigBucketFile(bigTableBucket); - forwardOp.reset(); - } - forwardOp.close(false); - break; - } - forwardOp.processOp(row.o, 0); - // check if any operator had a fatal error or early exit during - // execution - if (forwardOp.getDone()) { - // ExecMapper.setDone(true); - break; - } + try { + while (fetchOp.pushRow()); + } finally { + fetchOp.clearFetchContext(); } } } - private void initializeOperators(Map fetchOpJobConfMap) - throws HiveException { + private void closeAllFetchers() throws Exception { + for (FetchOperator fetchOp : fetchOperators.values()) { + fetchOp.close(); + } + } + + private void initializeOperators() throws HiveException { + ExecMapperContext execContext = new ExecMapperContext(); + execContext.setJc(job); + // set the local work, so all the operator can get this context + execContext.setLocalWork(work); + // this mapper operator is used to initialize all the operators for (Map.Entry entry : work.getAliasToFetchWork().entrySet()) { JobConf jobClone = new JobConf(job); TableScanOperator ts = (TableScanOperator)work.getAliasToWork().get(entry.getKey()); + // push down projections ColumnProjectionUtils.appendReadColumns( jobClone, ts.getNeededColumnIDs(), ts.getNeededColumns()); @@ -383,29 +369,12 @@ private void initializeOperators(Map fetchOpJobConfMap) HiveInputFormat.pushFilters(jobClone, ts); // create a fetch operator - FetchOperator fetchOp = new FetchOperator(entry.getValue(), jobClone); - fetchOpJobConfMap.put(fetchOp, jobClone); - fetchOperators.put(entry.getKey(), fetchOp); - l4j.info("fetchoperator for " + entry.getKey() + " created"); - } - // initilize all forward operator - for (Map.Entry entry : fetchOperators.entrySet()) { - // get the forward op - String alias = entry.getKey(); - Operator forwardOp = work.getAliasToWork().get(alias); - - // put the exe context into all the operators - forwardOp.setExecContext(execContext); - // All the operators need to be initialized before process - FetchOperator fetchOp = entry.getValue(); - JobConf jobConf = fetchOpJobConfMap.get(fetchOp); + FetchOperator fetchOp = new FetchOperator(entry.getValue(), jobClone, ts, execContext); - if (jobConf == null) { - jobConf = job; - } - // initialize the forward operator ObjectInspector objectInspector = fetchOp.getOutputObjectInspector(); - forwardOp.initialize(jobConf, new ObjectInspector[] {objectInspector}); + ts.initialize(jobClone, new ObjectInspector[] {objectInspector}); + + fetchOperators.put(entry.getKey(), fetchOp); l4j.info("fetchoperator for " + entry.getKey() + " initialized"); } } @@ -450,16 +419,15 @@ private void generateDummyHashTable(String alias, String bigBucketFileName) private void setUpFetchOpContext(FetchOperator fetchOp, String alias, String currentInputFile) throws Exception { - BucketMapJoinContext bucketMatcherCxt = this.work.getBucketMapjoinContext(); + BucketMapJoinContext bucketMatcherCxt = work.getBucketMapjoinContext(); Class bucketMatcherCls = bucketMatcherCxt.getBucketMatcherClass(); - BucketMatcher bucketMatcher = (BucketMatcher) ReflectionUtils.newInstance(bucketMatcherCls, - null); + BucketMatcher bucketMatcher = ReflectionUtils.newInstance(bucketMatcherCls, null); bucketMatcher.setAliasBucketFileNameMapping(bucketMatcherCxt.getAliasBucketFileNameMapping()); List aliasFiles = bucketMatcher.getAliasBucketFiles(currentInputFile, bucketMatcherCxt .getMapJoinBigTableAlias(), alias); - fetchOp.setupContext(aliasFiles); + fetchOp.setupContext(currentInputFile, aliasFiles); } @Override diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsWork.java index 3cae727..6805799 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/ColumnStatsWork.java @@ -20,8 +20,6 @@ import java.io.Serializable; -import org.apache.hadoop.hive.ql.exec.ListSinkOperator; - /** * ColumnStats Work. * @@ -66,14 +64,6 @@ public void setColStats(ColumnStatsDesc colStats) { this.colStats = colStats; } - public ListSinkOperator getSink() { - return fWork.getSink(); - } - - public void initializeForFetch() { - fWork.initializeForFetch(); - } - public int getLeastNumRows() { return fWork.getLeastNumRows(); } diff --git ql/src/test/queries/clientpositive/join_vc.q ql/src/test/queries/clientpositive/join_vc.q index 63b3da7..68149de 100644 --- ql/src/test/queries/clientpositive/join_vc.q +++ ql/src/test/queries/clientpositive/join_vc.q @@ -3,3 +3,11 @@ explain select t3.BLOCK__OFFSET__INSIDE__FILE,t3.key,t3.value from src t1 join src t2 on t1.key = t2.key join src t3 on t2.value = t3.value order by t3.BLOCK__OFFSET__INSIDE__FILE,t3.key,t3.value limit 3; select t3.BLOCK__OFFSET__INSIDE__FILE,t3.key,t3.value from src t1 join src t2 on t1.key = t2.key join src t3 on t2.value = t3.value order by t3.BLOCK__OFFSET__INSIDE__FILE,t3.key,t3.value limit 3; + +set hive.auto.convert.join=true; + +-- HIVE-4790 MapredLocalTask does not make virtual columns + +explain +SELECT *,a.BLOCK__OFFSET__INSIDE__FILE FROM src a JOIN src b ON a.key = b.key where a.key < 50 order by a.key, a.BLOCK__OFFSET__INSIDE__FILE; +SELECT *,a.BLOCK__OFFSET__INSIDE__FILE FROM src a JOIN src b ON a.key = b.key where a.key < 50 order by a.key, a.BLOCK__OFFSET__INSIDE__FILE; diff --git ql/src/test/results/clientpositive/join_vc.q.out ql/src/test/results/clientpositive/join_vc.q.out index cdbc5c1..c6b5770 100644 --- ql/src/test/results/clientpositive/join_vc.q.out +++ ql/src/test/results/clientpositive/join_vc.q.out @@ -170,3 +170,197 @@ POSTHOOK: Input: default@src 0 238 val_238 0 238 val_238 0 238 val_238 +PREHOOK: query: -- HIVE-4790 MapredLocalTask does not make virtual columns + +explain +SELECT *,a.BLOCK__OFFSET__INSIDE__FILE FROM src a JOIN src b ON a.key = b.key where a.key < 50 order by a.key, a.BLOCK__OFFSET__INSIDE__FILE +PREHOOK: type: QUERY +POSTHOOK: query: -- HIVE-4790 MapredLocalTask does not make virtual columns + +explain +SELECT *,a.BLOCK__OFFSET__INSIDE__FILE FROM src a JOIN src b ON a.key = b.key where a.key < 50 order by a.key, a.BLOCK__OFFSET__INSIDE__FILE +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) BLOCK__OFFSET__INSIDE__FILE))) (TOK_WHERE (< (. (TOK_TABLE_OR_COL a) key) 50)) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key)) (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) BLOCK__OFFSET__INSIDE__FILE))))) + +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-2 depends on stages: Stage-5 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-5 + Map Reduce Local Work + Alias -> Map Local Tables: + a + Fetch Operator + limit: -1 + Alias -> Map Local Operator Tree: + a + TableScan + alias: a + Filter Operator + predicate: + expr: (key < 50) + type: boolean + HashTable Sink Operator + condition expressions: + 0 {key} {value} {BLOCK__OFFSET__INSIDE__FILE} + 1 {key} {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + Position of Big Table: 1 + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + b + TableScan + alias: b + Filter Operator + predicate: + expr: (key < 50) + type: boolean + Map Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {key} {value} {BLOCK__OFFSET__INSIDE__FILE} + 1 {key} {value} + handleSkewJoin: false + keys: + 0 [Column[key]] + 1 [Column[key]] + outputColumnNames: _col0, _col1, _col2, _col4, _col5 + Position of Big Table: 1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col4 + type: string + expr: _col5 + type: string + expr: _col2 + type: bigint + outputColumnNames: _col0, _col1, _col2, _col3, _col4 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + expr: _col4 + type: bigint + sort order: ++ + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + expr: _col2 + type: string + expr: _col3 + type: string + expr: _col4 + type: bigint + Local Work: + Map Reduce Local Work + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + +PREHOOK: query: SELECT *,a.BLOCK__OFFSET__INSIDE__FILE FROM src a JOIN src b ON a.key = b.key where a.key < 50 order by a.key, a.BLOCK__OFFSET__INSIDE__FILE +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: SELECT *,a.BLOCK__OFFSET__INSIDE__FILE FROM src a JOIN src b ON a.key = b.key where a.key < 50 order by a.key, a.BLOCK__OFFSET__INSIDE__FILE +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +0 val_0 0 val_0 968 +0 val_0 0 val_0 968 +0 val_0 0 val_0 968 +0 val_0 0 val_0 2088 +0 val_0 0 val_0 2088 +0 val_0 0 val_0 2088 +0 val_0 0 val_0 2632 +0 val_0 0 val_0 2632 +0 val_0 0 val_0 2632 +10 val_10 10 val_10 2846 +11 val_11 11 val_11 3170 +12 val_12 12 val_12 1720 +12 val_12 12 val_12 1720 +12 val_12 12 val_12 4362 +12 val_12 12 val_12 4362 +15 val_15 15 val_15 386 +15 val_15 15 val_15 386 +15 val_15 15 val_15 2770 +15 val_15 15 val_15 2770 +17 val_17 17 val_17 910 +18 val_18 18 val_18 5340 +18 val_18 18 val_18 5340 +18 val_18 18 val_18 5514 +18 val_18 18 val_18 5514 +19 val_19 19 val_19 2824 +2 val_2 2 val_2 4004 +20 val_20 20 val_20 1118 +24 val_24 24 val_24 1972 +24 val_24 24 val_24 1972 +24 val_24 24 val_24 4594 +24 val_24 24 val_24 4594 +26 val_26 26 val_26 2226 +26 val_26 26 val_26 2226 +26 val_26 26 val_26 5284 +26 val_26 26 val_26 5284 +27 val_27 27 val_27 34 +28 val_28 28 val_28 5616 +30 val_30 30 val_30 3494 +33 val_33 33 val_33 3592 +34 val_34 34 val_34 3192 +35 val_35 35 val_35 1238 +35 val_35 35 val_35 1238 +35 val_35 35 val_35 1238 +35 val_35 35 val_35 3138 +35 val_35 35 val_35 3138 +35 val_35 35 val_35 3138 +35 val_35 35 val_35 4012 +35 val_35 35 val_35 4012 +35 val_35 35 val_35 4012 +37 val_37 37 val_37 328 +37 val_37 37 val_37 328 +37 val_37 37 val_37 5626 +37 val_37 37 val_37 5626 +4 val_4 4 val_4 1218 +41 val_41 41 val_41 3388 +42 val_42 42 val_42 2030 +42 val_42 42 val_42 2030 +42 val_42 42 val_42 3298 +42 val_42 42 val_42 3298 +43 val_43 43 val_43 2330 +44 val_44 44 val_44 4068 +47 val_47 47 val_47 1198 +5 val_5 5 val_5 3060 +5 val_5 5 val_5 3060 +5 val_5 5 val_5 3060 +5 val_5 5 val_5 3864 +5 val_5 5 val_5 3864 +5 val_5 5 val_5 3864 +5 val_5 5 val_5 4540 +5 val_5 5 val_5 4540 +5 val_5 5 val_5 4540 +8 val_8 8 val_8 1916 +9 val_9 9 val_9 5398