From 4aa0f99c69f34710739a02bccd3d58d09bcd34d2 Mon Sep 17 00:00:00 2001 From: Syed Albiz Date: Tue, 5 Jul 2011 11:52:48 -0700 Subject: [PATCH 1/1] implement file path checking in indexed file format to delegate non indexed work to parent diff --git ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexResult.java ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexResult.java index b9b586e..e4e9c62 100644 --- ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexResult.java +++ ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexResult.java @@ -82,7 +82,7 @@ public class HiveIndexResult { BytesRefWritable[] bytesRef = new BytesRefWritable[2]; boolean ignoreHdfsLoc = false; - public HiveIndexResult(String indexFile, JobConf conf) throws IOException, + public HiveIndexResult(List indexFiles, JobConf conf) throws IOException, HiveException { job = conf; @@ -90,18 +90,20 @@ public class HiveIndexResult { bytesRef[1] = new BytesRefWritable(); ignoreHdfsLoc = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_INDEX_IGNORE_HDFS_LOC); - if (indexFile != null) { - Path indexFilePath = new Path(indexFile); + if (indexFiles != null && indexFiles.size() > 0) { FileSystem fs = FileSystem.get(conf); - FileStatus indexStat = fs.getFileStatus(indexFilePath); List paths = new ArrayList(); - if (indexStat.isDir()) { - FileStatus[] fss = fs.listStatus(indexFilePath); - for (FileStatus f : fss) { - paths.add(f.getPath()); + for (String indexFile : indexFiles) { + Path indexFilePath = new Path(indexFile); + FileStatus indexStat = fs.getFileStatus(indexFilePath); + if (indexStat.isDir()) { + FileStatus[] fss = fs.listStatus(indexFilePath); + for (FileStatus f : fss) { + paths.add(f.getPath()); + } + } else { + paths.add(indexFilePath); } - } else { - paths.add(indexFilePath); } long maxEntriesToLoad = HiveConf.getLongVar(conf, HiveConf.ConfVars.HIVE_INDEX_COMPACT_QUERY_MAX_ENTRIES); diff --git ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java index f1ee95d..1a1ecd7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java @@ -20,12 +20,18 @@ package org.apache.hadoop.hive.ql.index; import java.io.IOException; import java.util.ArrayList; +import java.util.List; import java.util.Iterator; import java.util.Set; +import java.util.Map; +import java.util.Arrays; +import java.util.HashMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.exec.Utilities; @@ -91,15 +97,27 @@ public class HiveIndexedInputFormat extends HiveInputFormat { return result.toArray(new HiveInputSplit[result.size()]); } + public static List getIndexFiles(String indexFileStr) { + // tokenize and store string of form (path,)+ + if (indexFileStr == null) { + return null; + } + String[] chunks = indexFileStr.split(","); + return Arrays.asList(chunks); + } + @Override public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { String indexFileStr = job.get(indexFile); l4j.info("index_file is " + indexFileStr); + List indexFiles = getIndexFiles(indexFileStr); HiveIndexResult hiveIndexResult = null; - if (indexFileStr != null) { + if (indexFiles != null) { + boolean first = true; + StringBuilder newInputPaths = new StringBuilder(); try { - hiveIndexResult = new HiveIndexResult(indexFileStr, job); + hiveIndexResult = new HiveIndexResult(indexFiles, job); } catch (HiveException e) { l4j.error("Unable to read index.."); throw new IOException(e); @@ -107,8 +125,6 @@ public class HiveIndexedInputFormat extends HiveInputFormat { Set inputFiles = hiveIndexResult.buckets.keySet(); Iterator iter = inputFiles.iterator(); - boolean first = true; - StringBuilder newInputPaths = new StringBuilder(); while(iter.hasNext()) { String path = iter.next(); if (path.trim().equalsIgnoreCase("")) { @@ -121,7 +137,6 @@ public class HiveIndexedInputFormat extends HiveInputFormat { } newInputPaths.append(path); } - FileInputFormat.setInputPaths(job, newInputPaths.toString()); } else { return super.getSplits(job, numSplits); diff --git ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java index 61bbbf5..8f185be 100644 --- ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java +++ ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java @@ -81,14 +81,6 @@ public class BitmapIndexHandler extends TableBasedIndexHandler { return; // abort if we couldn't pull out anything from the predicate } - // Build reentrant QL for index query - StringBuilder qlCommand = new StringBuilder("INSERT OVERWRITE DIRECTORY "); - - String tmpFile = pctx.getContext().getMRTmpFileURI(); - qlCommand.append( "\"" + tmpFile + "\" "); // QL includes " around file name - qlCommand.append("SELECT bucketname AS `_bucketname` , COLLECT_SET(offset) AS `_offsets` FROM "); - qlCommand.append("(SELECT `_bucketname` AS bucketname , `_offset` AS offset FROM "); - List iqs = new ArrayList(indexes.size()); int i = 0; for (Index index : indexes) { @@ -100,6 +92,17 @@ public class BitmapIndexHandler extends TableBasedIndexHandler { "ind" + i++)); } } + // setup TableScanOperator to change input format for original query + queryContext.setIndexInputFormat(HiveIndexedInputFormat.class.getName()); + + // Build reentrant QL for index query + StringBuilder qlCommand = new StringBuilder("INSERT OVERWRITE DIRECTORY "); + + String tmpFile = pctx.getContext().getMRTmpFileURI(); + qlCommand.append( "\"" + tmpFile + "\" "); // QL includes " around file name + qlCommand.append("SELECT bucketname AS `_bucketname` , COLLECT_SET(offset) AS `_offsets` FROM "); + qlCommand.append("(SELECT `_bucketname` AS bucketname , `_offset` AS offset FROM "); + BitmapQuery head = iqs.get(0); for ( i = 1; i < iqs.size(); i++) { @@ -113,10 +116,7 @@ public class BitmapIndexHandler extends TableBasedIndexHandler { Driver driver = new Driver(pctx.getConf()); driver.compile(qlCommand.toString(), false); - // setup TableScanOperator to change input format for original query - queryContext.setIndexInputFormat(HiveIndexedInputFormat.class.getName()); queryContext.setIndexIntermediateFile(tmpFile); - queryContext.addAdditionalSemanticInputs(driver.getPlan().getInputs()); queryContext.setQueryTasks(driver.getPlan().getRootTasks()); } diff --git ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java index 7c91946..ff9c4cc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java +++ ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java @@ -161,11 +161,14 @@ public class CompactIndexHandler extends TableBasedIndexHandler { // pass residual predicate back out for further processing queryContext.setResidualPredicate(decomposedPredicate.residualPredicate); + // setup TableScanOperator to change input format for original query + queryContext.setIndexInputFormat(HiveCompactIndexInputFormat.class.getName()); // Build reentrant QL for index query StringBuilder qlCommand = new StringBuilder("INSERT OVERWRITE DIRECTORY "); String tmpFile = pctx.getContext().getMRTmpFileURI(); + queryContext.setIndexIntermediateFile(tmpFile); qlCommand.append( "\"" + tmpFile + "\" "); // QL includes " around file name qlCommand.append("SELECT `_bucketname` , `_offsets` FROM "); qlCommand.append(HiveUtils.unparseIdentifier(index.getIndexTableName())); @@ -179,9 +182,6 @@ public class CompactIndexHandler extends TableBasedIndexHandler { Driver driver = new Driver(pctx.getConf()); driver.compile(qlCommand.toString(), false); - // setup TableScanOperator to change input format for original query - queryContext.setIndexInputFormat(HiveCompactIndexInputFormat.class.getName()); - queryContext.setIndexIntermediateFile(tmpFile); queryContext.addAdditionalSemanticInputs(driver.getPlan().getInputs()); queryContext.setQueryTasks(driver.getPlan().getRootTasks()); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java index dbc489f..8295687 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java @@ -53,6 +53,7 @@ import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc; +import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.ql.plan.MapredWork; @@ -83,11 +84,15 @@ public class IndexWhereProcessor implements NodeProcessor { TableScanOperator operator = (TableScanOperator) nd; List opChildren = operator.getChildren(); TableScanDesc operatorDesc = operator.getConf(); + if (operatorDesc == null) { + return null; + } ExprNodeDesc predicate = operatorDesc.getFilterExpr(); IndexWhereProcCtx context = (IndexWhereProcCtx) procCtx; ParseContext pctx = context.getParseContext(); LOG.info("Processing predicate for index optimization"); + if (predicate == null) { LOG.info("null predicate pushed down"); return null; @@ -114,47 +119,42 @@ public class IndexWhereProcessor implements NodeProcessor { // get potential reentrant index queries from each index Map queryContexts = new HashMap(); - Collection> tableIndexes = indexes.values(); - for (List indexesOnTable : tableIndexes) { - List> indexesByType = new ArrayList>(); - for (Index index : indexesOnTable) { - boolean added = false; - for (List indexType : indexesByType) { - if (indexType.isEmpty()) { - indexType.add(index); - added = true; - } else if (indexType.get(0).getIndexHandlerClass().equals( - index.getIndexHandlerClass())) { - indexType.add(index); - added = true; - break; - } - } - if (!added) { - List newType = new ArrayList(); - newType.add(index); - indexesByType.add(newType); - } + // make sure we have an index on the table being scanned + TableDesc tblDesc = operator.getTableDesc(); + Table srcTable = pctx.getTopToTable().get(operator); + if (indexes == null || indexes.get(srcTable) == null) { + return null; + } + + List tableIndexes = indexes.get(srcTable); + Map> indexesByType = new HashMap>(); + for (Index indexOnTable : tableIndexes) { + if (indexesByType.get(indexOnTable.getIndexHandlerClass()) == null) { + List newType = new ArrayList(); + newType.add(indexOnTable); + indexesByType.put(indexOnTable.getIndexHandlerClass(), newType); + } else { + indexesByType.get(indexOnTable.getIndexHandlerClass()).add(indexOnTable); } + } - // choose index type with most indexes of the same type on the table - // TODO HIVE-2130 This would be a good place for some sort of cost based choice? - List bestIndexes = indexesByType.get(0); - for (List indexTypes : indexesByType) { - if (bestIndexes.size() < indexTypes.size()) { - bestIndexes = indexTypes; - } + // choose index type with most indexes of the same type on the table + // TODO HIVE-2130 This would be a good place for some sort of cost based choice? + List bestIndexes = indexesByType.values().iterator().next(); + for (List indexTypes : indexesByType.values()) { + if (bestIndexes.size() < indexTypes.size()) { + bestIndexes = indexTypes; } + } - // rewrite index queries for the chosen index type - HiveIndexQueryContext queryContext = new HiveIndexQueryContext(); - queryContext.setQueryPartitions(queryPartitions); - rewriteForIndexes(predicate, bestIndexes, pctx, currentTask, queryContext); - List> indexTasks = queryContext.getQueryTasks(); + // rewrite index queries for the chosen index type + HiveIndexQueryContext tmpQueryContext = new HiveIndexQueryContext(); + tmpQueryContext.setQueryPartitions(queryPartitions); + rewriteForIndexes(predicate, bestIndexes, pctx, currentTask, tmpQueryContext); + List> indexTasks = tmpQueryContext.getQueryTasks(); - if (indexTasks != null && indexTasks.size() > 0) { - queryContexts.put(bestIndexes.get(0), queryContext); - } + if (indexTasks != null && indexTasks.size() > 0) { + queryContexts.put(bestIndexes.get(0), tmpQueryContext); } // choose an index rewrite to use if (queryContexts.size() > 0) { @@ -168,8 +168,7 @@ public class IndexWhereProcessor implements NodeProcessor { // prepare the map reduce job to use indexing MapredWork work = currentTask.getWork(); work.setInputformat(queryContext.getIndexInputFormat()); - work.setIndexIntermediateFile(queryContext.getIndexIntermediateFile()); - + work.addIndexIntermediateFile(queryContext.getIndexIntermediateFile()); // modify inputs based on index query Set inputs = pctx.getSemanticInputs(); inputs.addAll(queryContext.getAdditionalSemanticInputs()); @@ -226,8 +225,6 @@ public class IndexWhereProcessor implements NodeProcessor { return; } - - /** * Check the partitions used by the table scan to make sure they also exist in the * index table @@ -239,6 +236,7 @@ public class IndexWhereProcessor implements NodeProcessor { throws HiveException { Hive hive = Hive.get(pctx.getConf()); + // make sure each partition exists on the index table PrunedPartitionList queryPartitionList = pctx.getOpToPartList().get(tableScan); Set queryPartitions = queryPartitionList.getConfirmedPartns(); @@ -259,6 +257,9 @@ public class IndexWhereProcessor implements NodeProcessor { private List getIndexTables(Hive hive, Partition part) throws HiveException { List
indexTables = new ArrayList
(); Table partitionedTable = part.getTable(); + if (indexes == null || indexes.get(partitionedTable) == null) { + return indexTables; + } for (Index index : indexes.get(partitionedTable)) { indexTables.add(hive.getTable(index.getIndexTableName())); } @@ -276,6 +277,10 @@ public class IndexWhereProcessor implements NodeProcessor { return true; // empty specs come from non-partitioned tables } + if (indexTables == null || indexTables.size() == 0) { + return false; + } + for (Table indexTable : indexTables) { // get partitions that match the spec List matchingPartitions = hive.getPartitions(indexTable, partSpec); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereTaskDispatcher.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereTaskDispatcher.java index da084f6..699519b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereTaskDispatcher.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereTaskDispatcher.java @@ -45,6 +45,7 @@ import org.apache.hadoop.hive.ql.metadata.Table; import org.apache.hadoop.hive.ql.optimizer.physical.PhysicalContext; import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.plan.MapredWork; /** * @@ -87,10 +88,14 @@ public class IndexWhereTaskDispatcher implements Dispatcher { operatorRules, indexWhereOptimizeCtx); - // walk the mapper operator(not task) tree + // walk the mapper operator(not task) tree for each specific task GraphWalker ogw = new DefaultGraphWalker(dispatcher); ArrayList topNodes = new ArrayList(); - topNodes.addAll(pctx.getTopOps().values()); + if (task.getWork() instanceof MapredWork) { + topNodes.addAll(((MapredWork)task.getWork()).getAliasToWork().values()); + } else { + return null; + } ogw.startWalking(topNodes, null); return null; diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java index a03a9a6..7a84e97 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java @@ -387,8 +387,12 @@ public class MapredWork implements Serializable { return indexIntermediateFile; } - public void setIndexIntermediateFile(String fileName) { - this.indexIntermediateFile = fileName; + public void addIndexIntermediateFile(String fileName) { + if (this.indexIntermediateFile == null) { + this.indexIntermediateFile = fileName; + } else { + this.indexIntermediateFile += "," + fileName; + } } public void setGatheringStats(boolean gatherStats) { diff --git ql/src/test/queries/clientpositive/index_auto_mult_tables.q ql/src/test/queries/clientpositive/index_auto_mult_tables.q new file mode 100644 index 0000000..0ae43e0 --- /dev/null +++ ql/src/test/queries/clientpositive/index_auto_mult_tables.q @@ -0,0 +1,23 @@ +-- try the query without indexing, with manual indexing, and with automatic indexing + +-- without indexing +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key; +SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key; + + +CREATE INDEX src_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD; +ALTER INDEX src_index ON src REBUILD; + +CREATE INDEX srcpart_index ON TABLE srcpart(key) as 'BITMAP' WITH DEFERRED REBUILD; +ALTER INDEX srcpart_index ON srcpart REBUILD; + +SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +SET hive.optimize.index.filter=true; +SET hive.optimize.index.filter.compact.minsize=0; + +-- automatic indexing +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key; +SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key; + +DROP INDEX src_index on src; +DROP INDEX srcpart_index on src; diff --git ql/src/test/queries/clientpositive/index_auto_mult_tables_compact.q ql/src/test/queries/clientpositive/index_auto_mult_tables_compact.q new file mode 100644 index 0000000..808a04c --- /dev/null +++ ql/src/test/queries/clientpositive/index_auto_mult_tables_compact.q @@ -0,0 +1,23 @@ +-- try the query without indexing, with manual indexing, and with automatic indexing + +-- without indexing +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key; +SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key; + + +CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD; +ALTER INDEX src_index ON src REBUILD; + +CREATE INDEX srcpart_index ON TABLE srcpart(key) as 'COMPACT' WITH DEFERRED REBUILD; +ALTER INDEX srcpart_index ON srcpart REBUILD; + +SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +SET hive.optimize.index.filter=true; +SET hive.optimize.index.filter.compact.minsize=0; + +-- automatic indexing +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key; +SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key; + +DROP INDEX src_index on src; +DROP INDEX srcpart_index on src; diff --git ql/src/test/queries/clientpositive/index_auto_self_join.q ql/src/test/queries/clientpositive/index_auto_self_join.q new file mode 100644 index 0000000..0b741e3 --- /dev/null +++ ql/src/test/queries/clientpositive/index_auto_self_join.q @@ -0,0 +1,18 @@ +-- try the query without indexing, with manual indexing, and with automatic indexing + +-- without indexing +EXPLAIN SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key; +SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key; + +CREATE INDEX src_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD; +ALTER INDEX src_index ON src REBUILD; + +SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +SET hive.optimize.index.filter=true; +SET hive.optimize.index.filter.compact.minsize=0; + +-- automatic indexing +EXPLAIN SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key; +SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key; + +DROP INDEX src_index on src; diff --git ql/src/test/results/clientpositive/index_auto_mult_tables.q.out ql/src/test/results/clientpositive/index_auto_mult_tables.q.out new file mode 100644 index 0000000..8cc9c3b --- /dev/null +++ ql/src/test/results/clientpositive/index_auto_mult_tables.q.out @@ -0,0 +1,656 @@ +PREHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing + +-- without indexing +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key +PREHOOK: type: QUERY +POSTHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing + +-- without indexing +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME srcpart) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 80) (< (. (TOK_TABLE_OR_COL a) key) 100)) (> (. (TOK_TABLE_OR_COL b) key) 70)) (< (. (TOK_TABLE_OR_COL b) key) 90))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + Filter Operator + predicate: + expr: ((key > 80) and (key < 100)) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 0 + value expressions: + expr: key + type: string + expr: value + type: string + b + TableScan + alias: b + Filter Operator + predicate: + expr: ((key > 70) and (key < 90)) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 1 + value expressions: + expr: key + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col4 + Filter Operator + predicate: + expr: ((((_col0 > 80) and (_col0 < 100)) and (_col4 > 70)) and (_col4 < 90)) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-21_15-57-25_282_6914613336680231589/-mr-10002 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-21_15-57-25_479_8785736728375386316/-mr-10000 +POSTHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-21_15-57-25_479_8785736728375386316/-mr-10000 +82 val_82 +82 val_82 +82 val_82 +82 val_82 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +85 val_85 +85 val_85 +85 val_85 +85 val_85 +86 val_86 +86 val_86 +86 val_86 +86 val_86 +87 val_87 +87 val_87 +87 val_87 +87 val_87 +PREHOOK: query: CREATE INDEX src_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD +PREHOOK: type: CREATEINDEX +POSTHOOK: query: CREATE INDEX src_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD +POSTHOOK: type: CREATEINDEX +PREHOOK: query: ALTER INDEX src_index ON src REBUILD +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@default__src_src_index__ +POSTHOOK: query: ALTER INDEX src_index ON src REBUILD +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@default__src_src_index__ +POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: CREATE INDEX srcpart_index ON TABLE srcpart(key) as 'BITMAP' WITH DEFERRED REBUILD +PREHOOK: type: CREATEINDEX +POSTHOOK: query: CREATE INDEX srcpart_index ON TABLE srcpart(key) as 'BITMAP' WITH DEFERRED REBUILD +POSTHOOK: type: CREATEINDEX +POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: ALTER INDEX srcpart_index ON srcpart REBUILD +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=11 +PREHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=12 +PREHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=11 +PREHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=12 +POSTHOOK: query: ALTER INDEX srcpart_index ON srcpart REBUILD +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=12 +POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bitmaps EXPRESSION [(srcpart)srcpart.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offset SIMPLE [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: -- automatic indexing +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key +PREHOOK: type: QUERY +POSTHOOK: query: -- automatic indexing +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key +POSTHOOK: type: QUERY +POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bitmaps EXPRESSION [(srcpart)srcpart.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offset SIMPLE [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME srcpart) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 80) (< (. (TOK_TABLE_OR_COL a) key) 100)) (> (. (TOK_TABLE_OR_COL b) key) 70)) (< (. (TOK_TABLE_OR_COL b) key) 90))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))))) + +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-4 depends on stages: Stage-5 + Stage-1 depends on stages: Stage-4, Stage-6 + Stage-2 depends on stages: Stage-1 + Stage-7 is a root stage + Stage-6 depends on stages: Stage-7 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: + tmp_index:ind0:default__srcpart_srcpart_index__ + TableScan + alias: default__srcpart_srcpart_index__ + Filter Operator + predicate: + expr: (((key > 70) and (key < 90)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) + type: boolean + Filter Operator + predicate: + expr: ((key > 70) and (key < 90)) + type: boolean + Select Operator + expressions: + expr: _bucketname + type: string + expr: _offset + type: bigint + expr: _bitmaps + type: array + outputColumnNames: _col1, _col2, _col3 + Filter Operator + predicate: + expr: (not EWAH_BITMAP_EMPTY(_col3)) + type: boolean + Select Operator + expressions: + expr: _col1 + type: string + expr: _col2 + type: bigint + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + Group By Operator + aggregations: + expr: collect_set(_col1) + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: array + Reduce Operator Tree: + Group By Operator + aggregations: + expr: collect_set(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: array + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true + destination: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-21_15-58-28_568_8754389781386510554/-mr-10003 + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + filterExpr: + expr: ((key > 80) and (key < 100)) + type: boolean + Filter Operator + predicate: + expr: ((key > 80) and (key < 100)) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 0 + value expressions: + expr: key + type: string + expr: value + type: string + b + TableScan + alias: b + filterExpr: + expr: ((key > 70) and (key < 90)) + type: boolean + Filter Operator + predicate: + expr: ((key > 70) and (key < 90)) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 1 + value expressions: + expr: key + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col4 + Filter Operator + predicate: + expr: ((((_col0 > 80) and (_col0 < 100)) and (_col4 > 70)) and (_col4 < 90)) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-21_15-58-28_568_8754389781386510554/-mr-10002 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-7 + Map Reduce + Alias -> Map Operator Tree: + tmp_index:ind0:default__src_src_index__ + TableScan + alias: default__src_src_index__ + Filter Operator + predicate: + expr: (((key > 80) and (key < 100)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) + type: boolean + Filter Operator + predicate: + expr: ((key > 80) and (key < 100)) + type: boolean + Select Operator + expressions: + expr: _bucketname + type: string + expr: _offset + type: bigint + expr: _bitmaps + type: array + outputColumnNames: _col1, _col2, _col3 + Filter Operator + predicate: + expr: (not EWAH_BITMAP_EMPTY(_col3)) + type: boolean + Select Operator + expressions: + expr: _col1 + type: string + expr: _col2 + type: bigint + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + Group By Operator + aggregations: + expr: collect_set(_col1) + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: array + Reduce Operator Tree: + Group By Operator + aggregations: + expr: collect_set(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: array + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true + destination: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-21_15-58-28_568_8754389781386510554/-mr-10004 + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@default__src_src_index__ +PREHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=11 +PREHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=12 +PREHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=11 +PREHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=12 +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-21_15-58-29_498_5104544146610457543/-mr-10000 +POSTHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__src_src_index__ +POSTHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=12 +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-21_15-58-29_498_5104544146610457543/-mr-10000 +POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bitmaps EXPRESSION [(srcpart)srcpart.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offset SIMPLE [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +82 val_82 +82 val_82 +82 val_82 +82 val_82 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +85 val_85 +85 val_85 +85 val_85 +85 val_85 +86 val_86 +86 val_86 +86 val_86 +86 val_86 +87 val_87 +87 val_87 +87 val_87 +87 val_87 +PREHOOK: query: DROP INDEX src_index on src +PREHOOK: type: DROPINDEX +POSTHOOK: query: DROP INDEX src_index on src +POSTHOOK: type: DROPINDEX +POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bitmaps EXPRESSION [(srcpart)srcpart.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offset SIMPLE [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: DROP INDEX srcpart_index on src +PREHOOK: type: DROPINDEX +POSTHOOK: query: DROP INDEX srcpart_index on src +POSTHOOK: type: DROPINDEX +POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bitmaps EXPRESSION [(srcpart)srcpart.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offset SIMPLE [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out new file mode 100644 index 0000000..69899ba --- /dev/null +++ ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out @@ -0,0 +1,580 @@ +PREHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing + +-- without indexing +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key +PREHOOK: type: QUERY +POSTHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing + +-- without indexing +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME srcpart) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 80) (< (. (TOK_TABLE_OR_COL a) key) 100)) (> (. (TOK_TABLE_OR_COL b) key) 70)) (< (. (TOK_TABLE_OR_COL b) key) 90))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + Filter Operator + predicate: + expr: ((key > 80) and (key < 100)) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 0 + value expressions: + expr: key + type: string + expr: value + type: string + b + TableScan + alias: b + Filter Operator + predicate: + expr: ((key > 70) and (key < 90)) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 1 + value expressions: + expr: key + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col4 + Filter Operator + predicate: + expr: ((((_col0 > 80) and (_col0 < 100)) and (_col4 > 70)) and (_col4 < 90)) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-19_12-10-49_392_4516248091220190992/-mr-10002 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-19_12-10-49_554_6164696431939596533/-mr-10000 +POSTHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-19_12-10-49_554_6164696431939596533/-mr-10000 +82 val_82 +82 val_82 +82 val_82 +82 val_82 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +85 val_85 +85 val_85 +85 val_85 +85 val_85 +86 val_86 +86 val_86 +86 val_86 +86 val_86 +87 val_87 +87 val_87 +87 val_87 +87 val_87 +PREHOOK: query: CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD +PREHOOK: type: CREATEINDEX +POSTHOOK: query: CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD +POSTHOOK: type: CREATEINDEX +PREHOOK: query: ALTER INDEX src_index ON src REBUILD +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@default__src_src_index__ +POSTHOOK: query: ALTER INDEX src_index ON src REBUILD +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@default__src_src_index__ +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: CREATE INDEX srcpart_index ON TABLE srcpart(key) as 'COMPACT' WITH DEFERRED REBUILD +PREHOOK: type: CREATEINDEX +POSTHOOK: query: CREATE INDEX srcpart_index ON TABLE srcpart(key) as 'COMPACT' WITH DEFERRED REBUILD +POSTHOOK: type: CREATEINDEX +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: ALTER INDEX srcpart_index ON srcpart REBUILD +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=11 +PREHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=12 +PREHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=11 +PREHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=12 +POSTHOOK: query: ALTER INDEX srcpart_index ON srcpart REBUILD +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=12 +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offsets EXPRESSION [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: -- automatic indexing +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key +PREHOOK: type: QUERY +POSTHOOK: query: -- automatic indexing +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key +POSTHOOK: type: QUERY +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offsets EXPRESSION [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME srcpart) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 80) (< (. (TOK_TABLE_OR_COL a) key) 100)) (> (. (TOK_TABLE_OR_COL b) key) 70)) (< (. (TOK_TABLE_OR_COL b) key) 90))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))))) + +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-8 depends on stages: Stage-5 , consists of Stage-7, Stage-6 + Stage-7 + Stage-4 depends on stages: Stage-7, Stage-6 + Stage-1 depends on stages: Stage-4, Stage-9 + Stage-2 depends on stages: Stage-1 + Stage-6 + Stage-10 is a root stage + Stage-13 depends on stages: Stage-10 , consists of Stage-12, Stage-11 + Stage-12 + Stage-9 depends on stages: Stage-12, Stage-11 + Stage-11 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: + default__srcpart_srcpart_index__ + TableScan + alias: default__srcpart_srcpart_index__ + filterExpr: + expr: ((key > 70) and (key < 90)) + type: boolean + Filter Operator + predicate: + expr: ((key > 70) and (key < 90)) + type: boolean + Filter Operator + predicate: + expr: ((key > 70) and (key < 90)) + type: boolean + Select Operator + expressions: + expr: _bucketname + type: string + expr: _offsets + type: array + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-8 + Conditional Operator + + Stage: Stage-7 + Move Operator + files: + hdfs directory: true + destination: file:/Users/salbiz/dev/hive/build/ql/scratchdir/hive_2011-07-19_12-11-53_889_3781106201069873933/-ext-10000 + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true + destination: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-19_12-11-53_189_1308324465823418093/-mr-10003 + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + filterExpr: + expr: ((key > 80) and (key < 100)) + type: boolean + Filter Operator + predicate: + expr: ((key > 80) and (key < 100)) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 0 + value expressions: + expr: key + type: string + expr: value + type: string + b + TableScan + alias: b + filterExpr: + expr: ((key > 70) and (key < 90)) + type: boolean + Filter Operator + predicate: + expr: ((key > 70) and (key < 90)) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 1 + value expressions: + expr: key + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col4 + Filter Operator + predicate: + expr: ((((_col0 > 80) and (_col0 < 100)) and (_col4 > 70)) and (_col4 < 90)) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-19_12-11-53_189_1308324465823418093/-mr-10002 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-6 + Map Reduce + Alias -> Map Operator Tree: + file:/Users/salbiz/dev/hive/build/ql/scratchdir/hive_2011-07-19_12-11-53_889_3781106201069873933/-ext-10001 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-10 + Map Reduce + Alias -> Map Operator Tree: + default__src_src_index__ + TableScan + alias: default__src_src_index__ + filterExpr: + expr: ((key > 80) and (key < 100)) + type: boolean + Filter Operator + predicate: + expr: ((key > 80) and (key < 100)) + type: boolean + Filter Operator + predicate: + expr: ((key > 80) and (key < 100)) + type: boolean + Select Operator + expressions: + expr: _bucketname + type: string + expr: _offsets + type: array + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-13 + Conditional Operator + + Stage: Stage-12 + Move Operator + files: + hdfs directory: true + destination: file:/Users/salbiz/dev/hive/build/ql/scratchdir/hive_2011-07-19_12-11-53_997_8165369880816641277/-ext-10000 + + Stage: Stage-9 + Move Operator + files: + hdfs directory: true + destination: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-19_12-11-53_189_1308324465823418093/-mr-10004 + + Stage: Stage-11 + Map Reduce + Alias -> Map Operator Tree: + file:/Users/salbiz/dev/hive/build/ql/scratchdir/hive_2011-07-19_12-11-53_997_8165369880816641277/-ext-10001 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@default__src_src_index__ +PREHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=11 +PREHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=12 +PREHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=11 +PREHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=12 +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-19_12-11-54_127_6393806523582122331/-mr-10000 +POSTHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__src_src_index__ +POSTHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=12 +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-19_12-11-54_127_6393806523582122331/-mr-10000 +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offsets EXPRESSION [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +82 val_82 +82 val_82 +82 val_82 +82 val_82 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +85 val_85 +85 val_85 +85 val_85 +85 val_85 +86 val_86 +86 val_86 +86 val_86 +86 val_86 +87 val_87 +87 val_87 +87 val_87 +87 val_87 +PREHOOK: query: DROP INDEX src_index on src +PREHOOK: type: DROPINDEX +POSTHOOK: query: DROP INDEX src_index on src +POSTHOOK: type: DROPINDEX +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offsets EXPRESSION [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: DROP INDEX srcpart_index on src +PREHOOK: type: DROPINDEX +POSTHOOK: query: DROP INDEX srcpart_index on src +POSTHOOK: type: DROPINDEX +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offsets EXPRESSION [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/index_auto_self_join.q.out ql/src/test/results/clientpositive/index_auto_self_join.q.out new file mode 100644 index 0000000..bff41a2 --- /dev/null +++ ql/src/test/results/clientpositive/index_auto_self_join.q.out @@ -0,0 +1,496 @@ +PREHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing + +-- without indexing +EXPLAIN SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key +PREHOOK: type: QUERY +POSTHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing + +-- without indexing +EXPLAIN SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (= (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 80) (< (. (TOK_TABLE_OR_COL a) key) 100)) (> (. (TOK_TABLE_OR_COL b) key) 70)) (< (. (TOK_TABLE_OR_COL b) key) 90))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + Filter Operator + predicate: + expr: ((key > 80) and (key < 100)) + type: boolean + Reduce Output Operator + key expressions: + expr: value + type: string + sort order: + + Map-reduce partition columns: + expr: value + type: string + tag: 0 + value expressions: + expr: key + type: string + b + TableScan + alias: b + Filter Operator + predicate: + expr: ((key > 70) and (key < 90)) + type: boolean + Reduce Output Operator + key expressions: + expr: value + type: string + sort order: + + Map-reduce partition columns: + expr: value + type: string + tag: 1 + value expressions: + expr: key + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col0} + handleSkewJoin: false + outputColumnNames: _col0, _col4 + Filter Operator + predicate: + expr: ((((_col0 > 80) and (_col0 < 100)) and (_col4 > 70)) and (_col4 < 90)) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + expr: _col4 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-21_15-48-37_169_6128853157667138527/-mr-10002 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-21_15-48-37_538_1936430561678404727/-mr-10000 +POSTHOOK: query: SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-21_15-48-37_538_1936430561678404727/-mr-10000 +82 82 +83 83 +83 83 +83 83 +83 83 +84 84 +84 84 +84 84 +84 84 +85 85 +86 86 +87 87 +PREHOOK: query: CREATE INDEX src_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD +PREHOOK: type: CREATEINDEX +POSTHOOK: query: CREATE INDEX src_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD +POSTHOOK: type: CREATEINDEX +PREHOOK: query: ALTER INDEX src_index ON src REBUILD +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@default__src_src_index__ +POSTHOOK: query: ALTER INDEX src_index ON src REBUILD +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@default__src_src_index__ +POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: -- automatic indexing +EXPLAIN SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key +PREHOOK: type: QUERY +POSTHOOK: query: -- automatic indexing +EXPLAIN SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key +POSTHOOK: type: QUERY +POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (= (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 80) (< (. (TOK_TABLE_OR_COL a) key) 100)) (> (. (TOK_TABLE_OR_COL b) key) 70)) (< (. (TOK_TABLE_OR_COL b) key) 90))) (TOK_ORDERBY (TOK_TABSORTCOLNAMEASC (. (TOK_TABLE_OR_COL a) key))))) + +STAGE DEPENDENCIES: + Stage-5 is a root stage + Stage-4 depends on stages: Stage-5 + Stage-1 depends on stages: Stage-4, Stage-6 + Stage-2 depends on stages: Stage-1 + Stage-7 is a root stage + Stage-6 depends on stages: Stage-7 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: + tmp_index:ind0:default__src_src_index__ + TableScan + alias: default__src_src_index__ + Filter Operator + predicate: + expr: (((key > 70) and (key < 90)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) + type: boolean + Filter Operator + predicate: + expr: ((key > 70) and (key < 90)) + type: boolean + Select Operator + expressions: + expr: _bucketname + type: string + expr: _offset + type: bigint + expr: _bitmaps + type: array + outputColumnNames: _col1, _col2, _col3 + Filter Operator + predicate: + expr: (not EWAH_BITMAP_EMPTY(_col3)) + type: boolean + Select Operator + expressions: + expr: _col1 + type: string + expr: _col2 + type: bigint + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + Group By Operator + aggregations: + expr: collect_set(_col1) + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: array + Reduce Operator Tree: + Group By Operator + aggregations: + expr: collect_set(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: array + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-4 + Move Operator + files: + hdfs directory: true + destination: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-21_15-49-01_083_3406432687803338424/-mr-10003 + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + filterExpr: + expr: ((key > 80) and (key < 100)) + type: boolean + Filter Operator + predicate: + expr: ((key > 80) and (key < 100)) + type: boolean + Reduce Output Operator + key expressions: + expr: value + type: string + sort order: + + Map-reduce partition columns: + expr: value + type: string + tag: 0 + value expressions: + expr: key + type: string + b + TableScan + alias: b + filterExpr: + expr: ((key > 70) and (key < 90)) + type: boolean + Filter Operator + predicate: + expr: ((key > 70) and (key < 90)) + type: boolean + Reduce Output Operator + key expressions: + expr: value + type: string + sort order: + + Map-reduce partition columns: + expr: value + type: string + tag: 1 + value expressions: + expr: key + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col0} + handleSkewJoin: false + outputColumnNames: _col0, _col4 + Filter Operator + predicate: + expr: ((((_col0 > 80) and (_col0 < 100)) and (_col4 > 70)) and (_col4 < 90)) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + expr: _col4 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + + Stage: Stage-2 + Map Reduce + Alias -> Map Operator Tree: + file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-21_15-49-01_083_3406432687803338424/-mr-10002 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + tag: -1 + value expressions: + expr: _col0 + type: string + expr: _col1 + type: string + Reduce Operator Tree: + Extract + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-7 + Map Reduce + Alias -> Map Operator Tree: + tmp_index:ind0:default__src_src_index__ + TableScan + alias: default__src_src_index__ + Filter Operator + predicate: + expr: (((key > 80) and (key < 100)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) + type: boolean + Filter Operator + predicate: + expr: ((key > 80) and (key < 100)) + type: boolean + Select Operator + expressions: + expr: _bucketname + type: string + expr: _offset + type: bigint + expr: _bitmaps + type: array + outputColumnNames: _col1, _col2, _col3 + Filter Operator + predicate: + expr: (not EWAH_BITMAP_EMPTY(_col3)) + type: boolean + Select Operator + expressions: + expr: _col1 + type: string + expr: _col2 + type: bigint + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + Group By Operator + aggregations: + expr: collect_set(_col1) + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: array + Reduce Operator Tree: + Group By Operator + aggregations: + expr: collect_set(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: array + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true + destination: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-21_15-49-01_083_3406432687803338424/-mr-10004 + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key +PREHOOK: type: QUERY +PREHOOK: Input: default@default__src_src_index__ +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-21_15-49-01_661_550203989096880097/-mr-10000 +POSTHOOK: query: SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 ORDER BY a.key +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__src_src_index__ +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-21_15-49-01_661_550203989096880097/-mr-10000 +POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +82 82 +83 83 +83 83 +83 83 +83 83 +84 84 +84 84 +84 84 +84 84 +85 85 +86 86 +87 87 +PREHOOK: query: DROP INDEX src_index on src +PREHOOK: type: DROPINDEX +POSTHOOK: query: DROP INDEX src_index on src +POSTHOOK: type: DROPINDEX +POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/index_bitmap_auto_partitioned.q.out ql/src/test/results/clientpositive/index_bitmap_auto_partitioned.q.out index 4c9efd1..cf1f420 100644 --- ql/src/test/results/clientpositive/index_bitmap_auto_partitioned.q.out +++ ql/src/test/results/clientpositive/index_bitmap_auto_partitioned.q.out @@ -52,9 +52,6 @@ STAGE PLANS: tmp_index:ind0:default__srcpart_src_part_index__ TableScan alias: default__srcpart_src_part_index__ - filterExpr: - expr: ((key = 86) and (not EWAH_BITMAP_EMPTY(_bitmaps))) - type: boolean Filter Operator predicate: expr: ((key = 86) and (not EWAH_BITMAP_EMPTY(_bitmaps))) @@ -139,7 +136,7 @@ STAGE PLANS: Move Operator files: hdfs directory: true - destination: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-06-15_16-26-39_938_2166512808072912128/-mr-10002 + destination: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-21_16-10-48_357_8499417362820521307/-mr-10002 Stage: Stage-1 Map Reduce @@ -196,14 +193,14 @@ PREHOOK: Input: default@default__srcpart_src_part_index__@ds=2008-04-09/hr=11 PREHOOK: Input: default@default__srcpart_src_part_index__@ds=2008-04-09/hr=12 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-06-15_16-26-40_311_5688266146614431290/-mr-10000 +PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-21_16-10-48_702_3053293858148124582/-mr-10000 POSTHOOK: query: SELECT key, value FROM srcpart WHERE key=86 AND ds='2008-04-09' ORDER BY key POSTHOOK: type: QUERY POSTHOOK: Input: default@default__srcpart_src_part_index__@ds=2008-04-09/hr=11 POSTHOOK: Input: default@default__srcpart_src_part_index__@ds=2008-04-09/hr=12 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 -POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-06-15_16-26-40_311_5688266146614431290/-mr-10000 +POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-21_16-10-48_702_3053293858148124582/-mr-10000 POSTHOOK: Lineage: default__srcpart_src_part_index__ PARTITION(ds=2008-04-09,hr=12)._bitmaps EXPRESSION [(srcpart)srcpart.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] POSTHOOK: Lineage: default__srcpart_src_part_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] POSTHOOK: Lineage: default__srcpart_src_part_index__ PARTITION(ds=2008-04-09,hr=12)._offset SIMPLE [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] -- 1.7.4.4