From 8e210c7e56cb5c06c58dac7c0eff185e3324b3e8 Mon Sep 17 00:00:00 2001 From: Syed Albiz Date: Tue, 5 Jul 2011 11:52:48 -0700 Subject: [PATCH 1/1] implement file path checking in indexed file format to delegate non indexed work to parent diff --git ql/src/java/org/apache/hadoop/hive/ql/Driver.java ql/src/java/org/apache/hadoop/hive/ql/Driver.java index b278ffe..9cd290a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/Driver.java +++ ql/src/java/org/apache/hadoop/hive/ql/Driver.java @@ -95,6 +95,7 @@ import org.apache.hadoop.hive.ql.parse.SemanticAnalyzerFactory; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.parse.VariableSubstitution; import org.apache.hadoop.hive.ql.plan.HiveOperation; +import org.apache.hadoop.hive.ql.plan.MapredWork; import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.processors.CommandProcessor; import org.apache.hadoop.hive.ql.processors.CommandProcessorResponse; diff --git ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexQueryContext.java ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexQueryContext.java index 617723e..5a785b5 100644 --- ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexQueryContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexQueryContext.java @@ -38,7 +38,6 @@ public class HiveIndexQueryContext { // merging the index query tasks private String indexInputFormat; // input format to set on the TableScanOperator to activate indexing private String indexIntermediateFile; // name of intermediate file written by the index query for the - // TableScanOperator to use private List> queryTasks; // list of tasks that will execute the index query and write // results to a temporary file private ExprNodeDesc residualPredicate; // predicate that could not be processed by an index handler @@ -72,6 +71,7 @@ public class HiveIndexQueryContext { public String getIndexIntermediateFile() { return indexIntermediateFile; } + public void setIndexIntermediateFile(String indexIntermediateFile) { this.indexIntermediateFile = indexIntermediateFile; } diff --git ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexResult.java ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexResult.java index b9b586e..e4e9c62 100644 --- ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexResult.java +++ ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexResult.java @@ -82,7 +82,7 @@ public class HiveIndexResult { BytesRefWritable[] bytesRef = new BytesRefWritable[2]; boolean ignoreHdfsLoc = false; - public HiveIndexResult(String indexFile, JobConf conf) throws IOException, + public HiveIndexResult(List indexFiles, JobConf conf) throws IOException, HiveException { job = conf; @@ -90,18 +90,20 @@ public class HiveIndexResult { bytesRef[1] = new BytesRefWritable(); ignoreHdfsLoc = HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_INDEX_IGNORE_HDFS_LOC); - if (indexFile != null) { - Path indexFilePath = new Path(indexFile); + if (indexFiles != null && indexFiles.size() > 0) { FileSystem fs = FileSystem.get(conf); - FileStatus indexStat = fs.getFileStatus(indexFilePath); List paths = new ArrayList(); - if (indexStat.isDir()) { - FileStatus[] fss = fs.listStatus(indexFilePath); - for (FileStatus f : fss) { - paths.add(f.getPath()); + for (String indexFile : indexFiles) { + Path indexFilePath = new Path(indexFile); + FileStatus indexStat = fs.getFileStatus(indexFilePath); + if (indexStat.isDir()) { + FileStatus[] fss = fs.listStatus(indexFilePath); + for (FileStatus f : fss) { + paths.add(f.getPath()); + } + } else { + paths.add(indexFilePath); } - } else { - paths.add(indexFilePath); } long maxEntriesToLoad = HiveConf.getLongVar(conf, HiveConf.ConfVars.HIVE_INDEX_COMPACT_QUERY_MAX_ENTRIES); diff --git ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java index f1ee95d..1a1ecd7 100644 --- ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java @@ -20,12 +20,18 @@ package org.apache.hadoop.hive.ql.index; import java.io.IOException; import java.util.ArrayList; +import java.util.List; import java.util.Iterator; import java.util.Set; +import java.util.Map; +import java.util.Arrays; +import java.util.HashMap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.fs.Path; +import org.apache.hadoop.fs.FileStatus; +import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.conf.HiveConf.ConfVars; import org.apache.hadoop.hive.ql.exec.Utilities; @@ -91,15 +97,27 @@ public class HiveIndexedInputFormat extends HiveInputFormat { return result.toArray(new HiveInputSplit[result.size()]); } + public static List getIndexFiles(String indexFileStr) { + // tokenize and store string of form (path,)+ + if (indexFileStr == null) { + return null; + } + String[] chunks = indexFileStr.split(","); + return Arrays.asList(chunks); + } + @Override public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { String indexFileStr = job.get(indexFile); l4j.info("index_file is " + indexFileStr); + List indexFiles = getIndexFiles(indexFileStr); HiveIndexResult hiveIndexResult = null; - if (indexFileStr != null) { + if (indexFiles != null) { + boolean first = true; + StringBuilder newInputPaths = new StringBuilder(); try { - hiveIndexResult = new HiveIndexResult(indexFileStr, job); + hiveIndexResult = new HiveIndexResult(indexFiles, job); } catch (HiveException e) { l4j.error("Unable to read index.."); throw new IOException(e); @@ -107,8 +125,6 @@ public class HiveIndexedInputFormat extends HiveInputFormat { Set inputFiles = hiveIndexResult.buckets.keySet(); Iterator iter = inputFiles.iterator(); - boolean first = true; - StringBuilder newInputPaths = new StringBuilder(); while(iter.hasNext()) { String path = iter.next(); if (path.trim().equalsIgnoreCase("")) { @@ -121,7 +137,6 @@ public class HiveIndexedInputFormat extends HiveInputFormat { } newInputPaths.append(path); } - FileInputFormat.setInputPaths(job, newInputPaths.toString()); } else { return super.getSplits(job, numSplits); diff --git ql/src/java/org/apache/hadoop/hive/ql/index/TableBasedIndexHandler.java ql/src/java/org/apache/hadoop/hive/ql/index/TableBasedIndexHandler.java index 02ab78c..0ced7c3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/index/TableBasedIndexHandler.java +++ ql/src/java/org/apache/hadoop/hive/ql/index/TableBasedIndexHandler.java @@ -28,15 +28,18 @@ import java.util.Map.Entry; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.metastore.api.FieldSchema; import org.apache.hadoop.hive.ql.exec.Task; +import org.apache.hadoop.hive.ql.exec.TableScanOperator; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.hooks.ReadEntity; import org.apache.hadoop.hive.ql.hooks.WriteEntity; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.metadata.HiveUtils; import org.apache.hadoop.hive.ql.metadata.Partition; +import org.apache.hadoop.hive.ql.parse.ParseContext; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.PartitionDesc; import org.apache.hadoop.hive.ql.plan.TableDesc; +import org.apache.hadoop.hive.ql.metadata.Table; /** * Index handler for indexes that use tables to store indexes. diff --git ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java index 61bbbf5..8f185be 100644 --- ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java +++ ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java @@ -81,14 +81,6 @@ public class BitmapIndexHandler extends TableBasedIndexHandler { return; // abort if we couldn't pull out anything from the predicate } - // Build reentrant QL for index query - StringBuilder qlCommand = new StringBuilder("INSERT OVERWRITE DIRECTORY "); - - String tmpFile = pctx.getContext().getMRTmpFileURI(); - qlCommand.append( "\"" + tmpFile + "\" "); // QL includes " around file name - qlCommand.append("SELECT bucketname AS `_bucketname` , COLLECT_SET(offset) AS `_offsets` FROM "); - qlCommand.append("(SELECT `_bucketname` AS bucketname , `_offset` AS offset FROM "); - List iqs = new ArrayList(indexes.size()); int i = 0; for (Index index : indexes) { @@ -100,6 +92,17 @@ public class BitmapIndexHandler extends TableBasedIndexHandler { "ind" + i++)); } } + // setup TableScanOperator to change input format for original query + queryContext.setIndexInputFormat(HiveIndexedInputFormat.class.getName()); + + // Build reentrant QL for index query + StringBuilder qlCommand = new StringBuilder("INSERT OVERWRITE DIRECTORY "); + + String tmpFile = pctx.getContext().getMRTmpFileURI(); + qlCommand.append( "\"" + tmpFile + "\" "); // QL includes " around file name + qlCommand.append("SELECT bucketname AS `_bucketname` , COLLECT_SET(offset) AS `_offsets` FROM "); + qlCommand.append("(SELECT `_bucketname` AS bucketname , `_offset` AS offset FROM "); + BitmapQuery head = iqs.get(0); for ( i = 1; i < iqs.size(); i++) { @@ -113,10 +116,7 @@ public class BitmapIndexHandler extends TableBasedIndexHandler { Driver driver = new Driver(pctx.getConf()); driver.compile(qlCommand.toString(), false); - // setup TableScanOperator to change input format for original query - queryContext.setIndexInputFormat(HiveIndexedInputFormat.class.getName()); queryContext.setIndexIntermediateFile(tmpFile); - queryContext.addAdditionalSemanticInputs(driver.getPlan().getInputs()); queryContext.setQueryTasks(driver.getPlan().getRootTasks()); } diff --git ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java index 7c91946..ff9c4cc 100644 --- ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java +++ ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java @@ -161,11 +161,14 @@ public class CompactIndexHandler extends TableBasedIndexHandler { // pass residual predicate back out for further processing queryContext.setResidualPredicate(decomposedPredicate.residualPredicate); + // setup TableScanOperator to change input format for original query + queryContext.setIndexInputFormat(HiveCompactIndexInputFormat.class.getName()); // Build reentrant QL for index query StringBuilder qlCommand = new StringBuilder("INSERT OVERWRITE DIRECTORY "); String tmpFile = pctx.getContext().getMRTmpFileURI(); + queryContext.setIndexIntermediateFile(tmpFile); qlCommand.append( "\"" + tmpFile + "\" "); // QL includes " around file name qlCommand.append("SELECT `_bucketname` , `_offsets` FROM "); qlCommand.append(HiveUtils.unparseIdentifier(index.getIndexTableName())); @@ -179,9 +182,6 @@ public class CompactIndexHandler extends TableBasedIndexHandler { Driver driver = new Driver(pctx.getConf()); driver.compile(qlCommand.toString(), false); - // setup TableScanOperator to change input format for original query - queryContext.setIndexInputFormat(HiveCompactIndexInputFormat.class.getName()); - queryContext.setIndexIntermediateFile(tmpFile); queryContext.addAdditionalSemanticInputs(driver.getPlan().getInputs()); queryContext.setQueryTasks(driver.getPlan().getRootTasks()); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java index dbc489f..08d2afa 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java @@ -53,6 +53,7 @@ import org.apache.hadoop.hive.ql.parse.PrunedPartitionList; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.ql.plan.ExprNodeDesc; import org.apache.hadoop.hive.ql.plan.FilterDesc; +import org.apache.hadoop.hive.ql.plan.TableDesc; import org.apache.hadoop.hive.ql.plan.TableScanDesc; import org.apache.hadoop.hive.ql.plan.MapredWork; @@ -114,47 +115,42 @@ public class IndexWhereProcessor implements NodeProcessor { // get potential reentrant index queries from each index Map queryContexts = new HashMap(); - Collection> tableIndexes = indexes.values(); - for (List indexesOnTable : tableIndexes) { - List> indexesByType = new ArrayList>(); - for (Index index : indexesOnTable) { - boolean added = false; - for (List indexType : indexesByType) { - if (indexType.isEmpty()) { - indexType.add(index); - added = true; - } else if (indexType.get(0).getIndexHandlerClass().equals( - index.getIndexHandlerClass())) { - indexType.add(index); - added = true; - break; - } - } - if (!added) { - List newType = new ArrayList(); - newType.add(index); - indexesByType.add(newType); - } + // make sure we have an index on the table being scanned + TableDesc tblDesc = operator.getTableDesc(); + Table srcTable = pctx.getTopToTable().get(operator); + if (indexes == null || indexes.get(srcTable) == null) { + return null; + } + + List tableIndexes = indexes.get(srcTable); + Map> indexesByType = new HashMap>(); + for (Index indexOnTable : tableIndexes) { + if (indexesByType.get(indexOnTable.getIndexHandlerClass()) == null) { + List newType = new ArrayList(); + newType.add(indexOnTable); + indexesByType.put(indexOnTable.getIndexHandlerClass(), newType); + } else { + indexesByType.get(indexOnTable.getIndexHandlerClass()).add(indexOnTable); } + } - // choose index type with most indexes of the same type on the table - // TODO HIVE-2130 This would be a good place for some sort of cost based choice? - List bestIndexes = indexesByType.get(0); - for (List indexTypes : indexesByType) { - if (bestIndexes.size() < indexTypes.size()) { - bestIndexes = indexTypes; - } + // choose index type with most indexes of the same type on the table + // TODO HIVE-2130 This would be a good place for some sort of cost based choice? + List bestIndexes = indexesByType.values().iterator().next(); + for (List indexTypes : indexesByType.values()) { + if (bestIndexes.size() < indexTypes.size()) { + bestIndexes = indexTypes; } + } - // rewrite index queries for the chosen index type - HiveIndexQueryContext queryContext = new HiveIndexQueryContext(); - queryContext.setQueryPartitions(queryPartitions); - rewriteForIndexes(predicate, bestIndexes, pctx, currentTask, queryContext); - List> indexTasks = queryContext.getQueryTasks(); + // rewrite index queries for the chosen index type + HiveIndexQueryContext tmpQueryContext = new HiveIndexQueryContext(); + tmpQueryContext.setQueryPartitions(queryPartitions); + rewriteForIndexes(predicate, bestIndexes, pctx, currentTask, tmpQueryContext); + List> indexTasks = tmpQueryContext.getQueryTasks(); - if (indexTasks != null && indexTasks.size() > 0) { - queryContexts.put(bestIndexes.get(0), queryContext); - } + if (indexTasks != null && indexTasks.size() > 0) { + queryContexts.put(bestIndexes.get(0), tmpQueryContext); } // choose an index rewrite to use if (queryContexts.size() > 0) { @@ -168,8 +164,7 @@ public class IndexWhereProcessor implements NodeProcessor { // prepare the map reduce job to use indexing MapredWork work = currentTask.getWork(); work.setInputformat(queryContext.getIndexInputFormat()); - work.setIndexIntermediateFile(queryContext.getIndexIntermediateFile()); - + work.addIndexIntermediateFile(queryContext.getIndexIntermediateFile()); // modify inputs based on index query Set inputs = pctx.getSemanticInputs(); inputs.addAll(queryContext.getAdditionalSemanticInputs()); @@ -226,8 +221,6 @@ public class IndexWhereProcessor implements NodeProcessor { return; } - - /** * Check the partitions used by the table scan to make sure they also exist in the * index table @@ -239,6 +232,7 @@ public class IndexWhereProcessor implements NodeProcessor { throws HiveException { Hive hive = Hive.get(pctx.getConf()); + // make sure each partition exists on the index table PrunedPartitionList queryPartitionList = pctx.getOpToPartList().get(tableScan); Set queryPartitions = queryPartitionList.getConfirmedPartns(); @@ -259,6 +253,9 @@ public class IndexWhereProcessor implements NodeProcessor { private List getIndexTables(Hive hive, Partition part) throws HiveException { List
indexTables = new ArrayList
(); Table partitionedTable = part.getTable(); + if (indexes == null || indexes.get(partitionedTable) == null) { + return indexTables; + } for (Index index : indexes.get(partitionedTable)) { indexTables.add(hive.getTable(index.getIndexTableName())); } @@ -276,6 +273,10 @@ public class IndexWhereProcessor implements NodeProcessor { return true; // empty specs come from non-partitioned tables } + if (indexTables == null || indexTables.size() == 0) { + return false; + } + for (Table indexTable : indexTables) { // get partitions that match the spec List matchingPartitions = hive.getPartitions(indexTable, partSpec); diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java index a03a9a6..7a84e97 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java @@ -387,8 +387,12 @@ public class MapredWork implements Serializable { return indexIntermediateFile; } - public void setIndexIntermediateFile(String fileName) { - this.indexIntermediateFile = fileName; + public void addIndexIntermediateFile(String fileName) { + if (this.indexIntermediateFile == null) { + this.indexIntermediateFile = fileName; + } else { + this.indexIntermediateFile += "," + fileName; + } } public void setGatheringStats(boolean gatherStats) { diff --git ql/src/test/queries/clientpositive/index_auto_mult_tables.q ql/src/test/queries/clientpositive/index_auto_mult_tables.q new file mode 100644 index 0000000..33cc1db --- /dev/null +++ ql/src/test/queries/clientpositive/index_auto_mult_tables.q @@ -0,0 +1,23 @@ +-- try the query without indexing, with manual indexing, and with automatic indexing + +-- without indexing +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90; +SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90; + + +CREATE INDEX src_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD; +ALTER INDEX src_index ON src REBUILD; + +CREATE INDEX srcpart_index ON TABLE srcpart(key) as 'BITMAP' WITH DEFERRED REBUILD; +ALTER INDEX srcpart_index ON srcpart REBUILD; + +SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +SET hive.optimize.index.filter=true; +SET hive.optimize.index.filter.compact.minsize=0; + +-- automatic indexing +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90; +SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90; + +DROP INDEX src_index on src; +DROP INDEX srcpart_index on src; diff --git ql/src/test/queries/clientpositive/index_auto_mult_tables_compact.q ql/src/test/queries/clientpositive/index_auto_mult_tables_compact.q new file mode 100644 index 0000000..48b46c4 --- /dev/null +++ ql/src/test/queries/clientpositive/index_auto_mult_tables_compact.q @@ -0,0 +1,23 @@ +-- try the query without indexing, with manual indexing, and with automatic indexing + +-- without indexing +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90; +SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90; + + +CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD; +ALTER INDEX src_index ON src REBUILD; + +CREATE INDEX srcpart_index ON TABLE srcpart(key) as 'COMPACT' WITH DEFERRED REBUILD; +ALTER INDEX srcpart_index ON srcpart REBUILD; + +SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +SET hive.optimize.index.filter=true; +SET hive.optimize.index.filter.compact.minsize=0; + +-- automatic indexing +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90; +SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90; + +DROP INDEX src_index on src; +DROP INDEX srcpart_index on src; diff --git ql/src/test/queries/clientpositive/index_auto_self_join.q ql/src/test/queries/clientpositive/index_auto_self_join.q new file mode 100644 index 0000000..94585bc --- /dev/null +++ ql/src/test/queries/clientpositive/index_auto_self_join.q @@ -0,0 +1,19 @@ +-- try the query without indexing, with manual indexing, and with automatic indexing + +-- without indexing +EXPLAIN SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90; +SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90; + + +CREATE INDEX src_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD; +ALTER INDEX src_index ON src REBUILD; + +SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +SET hive.optimize.index.filter=true; +SET hive.optimize.index.filter.compact.minsize=0; + +-- automatic indexing +EXPLAIN SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90; +SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90; + +DROP INDEX src_index on src; diff --git ql/src/test/results/clientpositive/index_auto_mult_tables.q.out ql/src/test/results/clientpositive/index_auto_mult_tables.q.out new file mode 100644 index 0000000..3c92266 --- /dev/null +++ ql/src/test/results/clientpositive/index_auto_mult_tables.q.out @@ -0,0 +1,612 @@ +PREHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing + +-- without indexing +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 +PREHOOK: type: QUERY +POSTHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing + +-- without indexing +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME srcpart) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 80) (< (. (TOK_TABLE_OR_COL a) key) 100)) (> (. (TOK_TABLE_OR_COL b) key) 70)) (< (. (TOK_TABLE_OR_COL b) key) 90))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + Filter Operator + predicate: + expr: ((key > 80) and (key < 100)) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 0 + value expressions: + expr: key + type: string + expr: value + type: string + b + TableScan + alias: b + Filter Operator + predicate: + expr: ((key > 70) and (key < 90)) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 1 + value expressions: + expr: key + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col4 + Filter Operator + predicate: + expr: ((((_col0 > 80) and (_col0 < 100)) and (_col4 > 70)) and (_col4 < 90)) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-07_14-22-32_395_4888951281399520769/-mr-10000 +POSTHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-07_14-22-32_395_4888951281399520769/-mr-10000 +82 val_82 +82 val_82 +82 val_82 +82 val_82 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +85 val_85 +85 val_85 +85 val_85 +85 val_85 +86 val_86 +86 val_86 +86 val_86 +86 val_86 +87 val_87 +87 val_87 +87 val_87 +87 val_87 +PREHOOK: query: CREATE INDEX src_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD +PREHOOK: type: CREATEINDEX +POSTHOOK: query: CREATE INDEX src_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD +POSTHOOK: type: CREATEINDEX +PREHOOK: query: ALTER INDEX src_index ON src REBUILD +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@default__src_src_index__ +POSTHOOK: query: ALTER INDEX src_index ON src REBUILD +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@default__src_src_index__ +POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: CREATE INDEX srcpart_index ON TABLE srcpart(key) as 'BITMAP' WITH DEFERRED REBUILD +PREHOOK: type: CREATEINDEX +POSTHOOK: query: CREATE INDEX srcpart_index ON TABLE srcpart(key) as 'BITMAP' WITH DEFERRED REBUILD +POSTHOOK: type: CREATEINDEX +POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: ALTER INDEX srcpart_index ON srcpart REBUILD +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=11 +PREHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=12 +PREHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=11 +PREHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=12 +POSTHOOK: query: ALTER INDEX srcpart_index ON srcpart REBUILD +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=12 +POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bitmaps EXPRESSION [(srcpart)srcpart.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offset SIMPLE [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: -- automatic indexing +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 +PREHOOK: type: QUERY +POSTHOOK: query: -- automatic indexing +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bitmaps EXPRESSION [(srcpart)srcpart.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offset SIMPLE [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME srcpart) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 80) (< (. (TOK_TABLE_OR_COL a) key) 100)) (> (. (TOK_TABLE_OR_COL b) key) 70)) (< (. (TOK_TABLE_OR_COL b) key) 90))))) + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-3, Stage-5 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + tmp_index:ind0:default__srcpart_srcpart_index__ + TableScan + alias: default__srcpart_srcpart_index__ + filterExpr: + expr: (((key > 70) and (key < 90)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) + type: boolean + Filter Operator + predicate: + expr: (((key > 70) and (key < 90)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) + type: boolean + Filter Operator + predicate: + expr: ((key > 70) and (key < 90)) + type: boolean + Select Operator + expressions: + expr: _bucketname + type: string + expr: _offset + type: bigint + expr: _bitmaps + type: array + outputColumnNames: _col1, _col2, _col3 + Filter Operator + predicate: + expr: (not EWAH_BITMAP_EMPTY(_col3)) + type: boolean + Select Operator + expressions: + expr: _col1 + type: string + expr: _col2 + type: bigint + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + Group By Operator + aggregations: + expr: collect_set(_col1) + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: array + Reduce Operator Tree: + Group By Operator + aggregations: + expr: collect_set(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: array + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-3 + Move Operator + files: + hdfs directory: true + destination: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-07_14-23-29_050_4478039713625360996/-mr-10002 + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + filterExpr: + expr: ((key > 80) and (key < 100)) + type: boolean + Filter Operator + predicate: + expr: ((key > 80) and (key < 100)) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 0 + value expressions: + expr: key + type: string + expr: value + type: string + b + TableScan + alias: b + filterExpr: + expr: ((key > 70) and (key < 90)) + type: boolean + Filter Operator + predicate: + expr: ((key > 70) and (key < 90)) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 1 + value expressions: + expr: key + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col4 + Filter Operator + predicate: + expr: ((((_col0 > 80) and (_col0 < 100)) and (_col4 > 70)) and (_col4 < 90)) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-6 + Map Reduce + Alias -> Map Operator Tree: + tmp_index:ind0:default__src_src_index__ + TableScan + alias: default__src_src_index__ + filterExpr: + expr: (((key > 80) and (key < 100)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) + type: boolean + Filter Operator + predicate: + expr: (((key > 80) and (key < 100)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) + type: boolean + Filter Operator + predicate: + expr: ((key > 80) and (key < 100)) + type: boolean + Select Operator + expressions: + expr: _bucketname + type: string + expr: _offset + type: bigint + expr: _bitmaps + type: array + outputColumnNames: _col1, _col2, _col3 + Filter Operator + predicate: + expr: (not EWAH_BITMAP_EMPTY(_col3)) + type: boolean + Select Operator + expressions: + expr: _col1 + type: string + expr: _col2 + type: bigint + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + Group By Operator + aggregations: + expr: collect_set(_col1) + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: array + Reduce Operator Tree: + Group By Operator + aggregations: + expr: collect_set(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: array + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-5 + Move Operator + files: + hdfs directory: true + destination: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-07_14-23-29_050_4478039713625360996/-mr-10003 + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 +PREHOOK: type: QUERY +PREHOOK: Input: default@default__src_src_index__ +PREHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=11 +PREHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=12 +PREHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=11 +PREHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=12 +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-07_14-23-29_929_2234583197048673956/-mr-10000 +POSTHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__src_src_index__ +POSTHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=12 +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-07_14-23-29_929_2234583197048673956/-mr-10000 +POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bitmaps EXPRESSION [(srcpart)srcpart.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offset SIMPLE [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +82 val_82 +82 val_82 +82 val_82 +82 val_82 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +85 val_85 +85 val_85 +85 val_85 +85 val_85 +86 val_86 +86 val_86 +86 val_86 +86 val_86 +87 val_87 +87 val_87 +87 val_87 +87 val_87 +PREHOOK: query: DROP INDEX src_index on src +PREHOOK: type: DROPINDEX +POSTHOOK: query: DROP INDEX src_index on src +POSTHOOK: type: DROPINDEX +POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bitmaps EXPRESSION [(srcpart)srcpart.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offset SIMPLE [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: DROP INDEX srcpart_index on src +PREHOOK: type: DROPINDEX +POSTHOOK: query: DROP INDEX srcpart_index on src +POSTHOOK: type: DROPINDEX +POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bitmaps EXPRESSION [(srcpart)srcpart.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offset SIMPLE [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out new file mode 100644 index 0000000..0fa3ef3 --- /dev/null +++ ql/src/test/results/clientpositive/index_auto_mult_tables_compact.q.out @@ -0,0 +1,530 @@ +PREHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing + +-- without indexing +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 +PREHOOK: type: QUERY +POSTHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing + +-- without indexing +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME srcpart) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 80) (< (. (TOK_TABLE_OR_COL a) key) 100)) (> (. (TOK_TABLE_OR_COL b) key) 70)) (< (. (TOK_TABLE_OR_COL b) key) 90))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + Filter Operator + predicate: + expr: ((key > 80) and (key < 100)) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 0 + value expressions: + expr: key + type: string + expr: value + type: string + b + TableScan + alias: b + Filter Operator + predicate: + expr: ((key > 70) and (key < 90)) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 1 + value expressions: + expr: key + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col4 + Filter Operator + predicate: + expr: ((((_col0 > 80) and (_col0 < 100)) and (_col4 > 70)) and (_col4 < 90)) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-12_15-50-43_145_6165633419060166689/-mr-10000 +POSTHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-12_15-50-43_145_6165633419060166689/-mr-10000 +82 val_82 +82 val_82 +82 val_82 +82 val_82 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +85 val_85 +85 val_85 +85 val_85 +85 val_85 +86 val_86 +86 val_86 +86 val_86 +86 val_86 +87 val_87 +87 val_87 +87 val_87 +87 val_87 +PREHOOK: query: CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD +PREHOOK: type: CREATEINDEX +POSTHOOK: query: CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD +POSTHOOK: type: CREATEINDEX +PREHOOK: query: ALTER INDEX src_index ON src REBUILD +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@default__src_src_index__ +POSTHOOK: query: ALTER INDEX src_index ON src REBUILD +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@default__src_src_index__ +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: CREATE INDEX srcpart_index ON TABLE srcpart(key) as 'COMPACT' WITH DEFERRED REBUILD +PREHOOK: type: CREATEINDEX +POSTHOOK: query: CREATE INDEX srcpart_index ON TABLE srcpart(key) as 'COMPACT' WITH DEFERRED REBUILD +POSTHOOK: type: CREATEINDEX +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: ALTER INDEX srcpart_index ON srcpart REBUILD +PREHOOK: type: QUERY +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=11 +PREHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=12 +PREHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=11 +PREHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=12 +POSTHOOK: query: ALTER INDEX srcpart_index ON srcpart REBUILD +POSTHOOK: type: QUERY +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=11 +POSTHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=12 +POSTHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=11 +POSTHOOK: Output: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=12 +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offsets EXPRESSION [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: -- automatic indexing +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 +PREHOOK: type: QUERY +POSTHOOK: query: -- automatic indexing +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offsets EXPRESSION [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME srcpart) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 80) (< (. (TOK_TABLE_OR_COL a) key) 100)) (> (. (TOK_TABLE_OR_COL b) key) 70)) (< (. (TOK_TABLE_OR_COL b) key) 90))))) + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-7 depends on stages: Stage-4 , consists of Stage-6, Stage-5 + Stage-6 + Stage-3 depends on stages: Stage-6, Stage-5 + Stage-1 depends on stages: Stage-3, Stage-8 + Stage-5 + Stage-9 is a root stage + Stage-12 depends on stages: Stage-9 , consists of Stage-11, Stage-10 + Stage-11 + Stage-8 depends on stages: Stage-11, Stage-10 + Stage-10 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + default__srcpart_srcpart_index__ + TableScan + alias: default__srcpart_srcpart_index__ + filterExpr: + expr: ((key > 70) and (key < 90)) + type: boolean + Filter Operator + predicate: + expr: ((key > 70) and (key < 90)) + type: boolean + Filter Operator + predicate: + expr: ((key > 70) and (key < 90)) + type: boolean + Select Operator + expressions: + expr: _bucketname + type: string + expr: _offsets + type: array + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true + destination: file:/Users/salbiz/dev/hive/build/ql/scratchdir/hive_2011-07-12_15-51-44_030_2872015902549044636/-ext-10000 + + Stage: Stage-3 + Move Operator + files: + hdfs directory: true + destination: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-12_15-51-42_889_5714971053768287766/-mr-10002 + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + filterExpr: + expr: ((key > 80) and (key < 100)) + type: boolean + Filter Operator + predicate: + expr: ((key > 80) and (key < 100)) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 0 + value expressions: + expr: key + type: string + expr: value + type: string + b + TableScan + alias: b + filterExpr: + expr: ((key > 70) and (key < 90)) + type: boolean + Filter Operator + predicate: + expr: ((key > 70) and (key < 90)) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 1 + value expressions: + expr: key + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col4 + Filter Operator + predicate: + expr: ((((_col0 > 80) and (_col0 < 100)) and (_col4 > 70)) and (_col4 < 90)) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: + file:/Users/salbiz/dev/hive/build/ql/scratchdir/hive_2011-07-12_15-51-44_030_2872015902549044636/-ext-10001 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-9 + Map Reduce + Alias -> Map Operator Tree: + default__src_src_index__ + TableScan + alias: default__src_src_index__ + filterExpr: + expr: ((key > 80) and (key < 100)) + type: boolean + Filter Operator + predicate: + expr: ((key > 80) and (key < 100)) + type: boolean + Filter Operator + predicate: + expr: ((key > 80) and (key < 100)) + type: boolean + Select Operator + expressions: + expr: _bucketname + type: string + expr: _offsets + type: array + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-12 + Conditional Operator + + Stage: Stage-11 + Move Operator + files: + hdfs directory: true + destination: file:/Users/salbiz/dev/hive/build/ql/scratchdir/hive_2011-07-12_15-51-44_219_8031121985180529805/-ext-10000 + + Stage: Stage-8 + Move Operator + files: + hdfs directory: true + destination: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-12_15-51-42_889_5714971053768287766/-mr-10003 + + Stage: Stage-10 + Map Reduce + Alias -> Map Operator Tree: + file:/Users/salbiz/dev/hive/build/ql/scratchdir/hive_2011-07-12_15-51-44_219_8031121985180529805/-ext-10001 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 +PREHOOK: type: QUERY +PREHOOK: Input: default@default__src_src_index__ +PREHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=11 +PREHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=12 +PREHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=11 +PREHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=12 +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-12_15-51-44_373_639312906006459580/-mr-10000 +POSTHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__src_src_index__ +POSTHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@default__srcpart_srcpart_index__@ds=2008-04-09/hr=12 +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-12_15-51-44_373_639312906006459580/-mr-10000 +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offsets EXPRESSION [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +82 val_82 +82 val_82 +82 val_82 +82 val_82 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +85 val_85 +85 val_85 +85 val_85 +85 val_85 +86 val_86 +86 val_86 +86 val_86 +86 val_86 +87 val_87 +87 val_87 +87 val_87 +87 val_87 +PREHOOK: query: DROP INDEX src_index on src +PREHOOK: type: DROPINDEX +POSTHOOK: query: DROP INDEX src_index on src +POSTHOOK: type: DROPINDEX +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offsets EXPRESSION [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: DROP INDEX srcpart_index on src +PREHOOK: type: DROPINDEX +POSTHOOK: query: DROP INDEX srcpart_index on src +POSTHOOK: type: DROPINDEX +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._bucketname SIMPLE [(srcpart)srcpart.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12)._offsets EXPRESSION [(srcpart)srcpart.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__srcpart_srcpart_index__ PARTITION(ds=2008-04-09,hr=12).key SIMPLE [(srcpart)srcpart.FieldSchema(name:key, type:string, comment:default), ] diff --git ql/src/test/results/clientpositive/index_auto_self_join.q.out ql/src/test/results/clientpositive/index_auto_self_join.q.out new file mode 100644 index 0000000..72dd746 --- /dev/null +++ ql/src/test/results/clientpositive/index_auto_self_join.q.out @@ -0,0 +1,452 @@ +PREHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing + +-- without indexing +EXPLAIN SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 +PREHOOK: type: QUERY +POSTHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing + +-- without indexing +EXPLAIN SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (= (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 80) (< (. (TOK_TABLE_OR_COL a) key) 100)) (> (. (TOK_TABLE_OR_COL b) key) 70)) (< (. (TOK_TABLE_OR_COL b) key) 90))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + Filter Operator + predicate: + expr: ((key > 80) and (key < 100)) + type: boolean + Reduce Output Operator + key expressions: + expr: value + type: string + sort order: + + Map-reduce partition columns: + expr: value + type: string + tag: 0 + value expressions: + expr: key + type: string + b + TableScan + alias: b + Filter Operator + predicate: + expr: ((key > 70) and (key < 90)) + type: boolean + Reduce Output Operator + key expressions: + expr: value + type: string + sort order: + + Map-reduce partition columns: + expr: value + type: string + tag: 1 + value expressions: + expr: key + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col0} + handleSkewJoin: false + outputColumnNames: _col0, _col4 + Filter Operator + predicate: + expr: ((((_col0 > 80) and (_col0 < 100)) and (_col4 > 70)) and (_col4 < 90)) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + expr: _col4 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-12_16-26-07_595_5304132428093950776/-mr-10000 +POSTHOOK: query: SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-12_16-26-07_595_5304132428093950776/-mr-10000 +82 82 +83 83 +83 83 +83 83 +83 83 +84 84 +84 84 +84 84 +84 84 +85 85 +86 86 +87 87 +PREHOOK: query: CREATE INDEX src_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD +PREHOOK: type: CREATEINDEX +POSTHOOK: query: CREATE INDEX src_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD +POSTHOOK: type: CREATEINDEX +PREHOOK: query: ALTER INDEX src_index ON src REBUILD +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@default__src_src_index__ +POSTHOOK: query: ALTER INDEX src_index ON src REBUILD +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@default__src_src_index__ +POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: -- automatic indexing +EXPLAIN SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 +PREHOOK: type: QUERY +POSTHOOK: query: -- automatic indexing +EXPLAIN SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME src) b) (= (. (TOK_TABLE_OR_COL a) value) (. (TOK_TABLE_OR_COL b) value)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL b) key))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 80) (< (. (TOK_TABLE_OR_COL a) key) 100)) (> (. (TOK_TABLE_OR_COL b) key) 70)) (< (. (TOK_TABLE_OR_COL b) key) 90))))) + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-3 depends on stages: Stage-4 + Stage-1 depends on stages: Stage-3, Stage-5 + Stage-6 is a root stage + Stage-5 depends on stages: Stage-6 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + tmp_index:ind0:default__src_src_index__ + TableScan + alias: default__src_src_index__ + filterExpr: + expr: (((key > 70) and (key < 90)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) + type: boolean + Filter Operator + predicate: + expr: (((key > 70) and (key < 90)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) + type: boolean + Filter Operator + predicate: + expr: ((key > 70) and (key < 90)) + type: boolean + Select Operator + expressions: + expr: _bucketname + type: string + expr: _offset + type: bigint + expr: _bitmaps + type: array + outputColumnNames: _col1, _col2, _col3 + Filter Operator + predicate: + expr: (not EWAH_BITMAP_EMPTY(_col3)) + type: boolean + Select Operator + expressions: + expr: _col1 + type: string + expr: _col2 + type: bigint + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + Group By Operator + aggregations: + expr: collect_set(_col1) + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: array + Reduce Operator Tree: + Group By Operator + aggregations: + expr: collect_set(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: array + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-3 + Move Operator + files: + hdfs directory: true + destination: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-12_16-26-25_010_7467783917657268663/-mr-10002 + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + filterExpr: + expr: ((key > 80) and (key < 100)) + type: boolean + Filter Operator + predicate: + expr: ((key > 80) and (key < 100)) + type: boolean + Reduce Output Operator + key expressions: + expr: value + type: string + sort order: + + Map-reduce partition columns: + expr: value + type: string + tag: 0 + value expressions: + expr: key + type: string + b + TableScan + alias: b + filterExpr: + expr: ((key > 70) and (key < 90)) + type: boolean + Filter Operator + predicate: + expr: ((key > 70) and (key < 90)) + type: boolean + Reduce Output Operator + key expressions: + expr: value + type: string + sort order: + + Map-reduce partition columns: + expr: value + type: string + tag: 1 + value expressions: + expr: key + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} + 1 {VALUE._col0} + handleSkewJoin: false + outputColumnNames: _col0, _col4 + Filter Operator + predicate: + expr: ((((_col0 > 80) and (_col0 < 100)) and (_col4 > 70)) and (_col4 < 90)) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + expr: _col4 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-6 + Map Reduce + Alias -> Map Operator Tree: + tmp_index:ind0:default__src_src_index__ + TableScan + alias: default__src_src_index__ + filterExpr: + expr: (((key > 80) and (key < 100)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) + type: boolean + Filter Operator + predicate: + expr: (((key > 80) and (key < 100)) and (not EWAH_BITMAP_EMPTY(_bitmaps))) + type: boolean + Filter Operator + predicate: + expr: ((key > 80) and (key < 100)) + type: boolean + Select Operator + expressions: + expr: _bucketname + type: string + expr: _offset + type: bigint + expr: _bitmaps + type: array + outputColumnNames: _col1, _col2, _col3 + Filter Operator + predicate: + expr: (not EWAH_BITMAP_EMPTY(_col3)) + type: boolean + Select Operator + expressions: + expr: _col1 + type: string + expr: _col2 + type: bigint + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: bigint + outputColumnNames: _col0, _col1 + Group By Operator + aggregations: + expr: collect_set(_col1) + bucketGroup: false + keys: + expr: _col0 + type: string + mode: hash + outputColumnNames: _col0, _col1 + Reduce Output Operator + key expressions: + expr: _col0 + type: string + sort order: + + Map-reduce partition columns: + expr: _col0 + type: string + tag: -1 + value expressions: + expr: _col1 + type: array + Reduce Operator Tree: + Group By Operator + aggregations: + expr: collect_set(VALUE._col0) + bucketGroup: false + keys: + expr: KEY._col0 + type: string + mode: mergepartial + outputColumnNames: _col0, _col1 + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: array + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-5 + Move Operator + files: + hdfs directory: true + destination: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-12_16-26-25_010_7467783917657268663/-mr-10003 + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 +PREHOOK: type: QUERY +PREHOOK: Input: default@default__src_src_index__ +PREHOOK: Input: default@src +PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-12_16-26-25_462_7617998484576104743/-mr-10000 +POSTHOOK: query: SELECT a.key, b.key FROM src a JOIN src b ON (a.value = b.value) WHERE a.key > 80 AND a.key < 100 AND b.key > 70 AND b.key < 90 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__src_src_index__ +POSTHOOK: Input: default@src +POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-07-12_16-26-25_462_7617998484576104743/-mr-10000 +POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +82 82 +83 83 +83 83 +83 83 +83 83 +84 84 +84 84 +84 84 +84 84 +85 85 +86 86 +87 87 +PREHOOK: query: DROP INDEX src_index on src +PREHOOK: type: DROPINDEX +POSTHOOK: query: DROP INDEX src_index on src +POSTHOOK: type: DROPINDEX +POSTHOOK: Lineage: default__src_src_index__._bitmaps EXPRESSION [(src)src.FieldSchema(name:ROW__OFFSET__INSIDE__BLOCK, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offset SIMPLE [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -- 1.7.4.4