From 961fecfd220ca94157d8d68e3857c1ff3dc6fb2c Mon Sep 17 00:00:00 2001 From: Syed Albiz Date: Tue, 5 Jul 2011 11:52:48 -0700 Subject: [PATCH 1/1] implement file path checking in indexed file format to delegate non indexed work to parent diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java index 090ecfc..f3ac692 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/ExecDriver.java @@ -483,6 +483,10 @@ public class ExecDriver extends Task implements Serializable, Hadoop conf.set("hive.index.compact.file", work.getIndexIntermediateFile()); conf.set("hive.index.blockfilter.file", work.getIndexIntermediateFile()); } + + if (work.getIndexedPaths() != null) { + conf.set("hive.index.blockfilter.tables", work.getIndexedPaths()); + } } public boolean mapStarted() { diff --git ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexQueryContext.java ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexQueryContext.java index 617723e..668e63e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexQueryContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexQueryContext.java @@ -38,6 +38,7 @@ public class HiveIndexQueryContext { // merging the index query tasks private String indexInputFormat; // input format to set on the TableScanOperator to activate indexing private String indexIntermediateFile; // name of intermediate file written by the index query for the + private List filteredTableNames;; // List of table names that have an index filter applied // TableScanOperator to use private List> queryTasks; // list of tasks that will execute the index query and write // results to a temporary file @@ -65,13 +66,19 @@ public class HiveIndexQueryContext { public String getIndexInputFormat() { return indexInputFormat; } - public void setIndexInputFormat(String indexInputFormat) { + public void setIndexInputFormat(String indexInputFormat, List origTableNames) { this.indexInputFormat = indexInputFormat; + this.filteredTableNames = origTableNames; } public String getIndexIntermediateFile() { return indexIntermediateFile; } + + public List getFilteredTables() { + return this.filteredTableNames; + } + public void setIndexIntermediateFile(String indexIntermediateFile) { this.indexIntermediateFile = indexIntermediateFile; } diff --git ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java index f1ee95d..6d635ea 100644 --- ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java @@ -20,6 +20,7 @@ package org.apache.hadoop.hive.ql.index; import java.io.IOException; import java.util.ArrayList; +import java.util.List; import java.util.Iterator; import java.util.Set; @@ -95,6 +96,26 @@ public class HiveIndexedInputFormat extends HiveInputFormat { public InputSplit[] getSplits(JobConf job, int numSplits) throws IOException { String indexFileStr = job.get(indexFile); l4j.info("index_file is " + indexFileStr); + String filteredPathString = job.get("hive.index.blockfilter.tables"); + Path[] dirs = FileInputFormat.getInputPaths(job); + List unfilteredPaths = new ArrayList(); + boolean filteredInput = true; + if (filteredPathString != null) { + String[] filteredPaths = filteredPathString.split(","); + if (dirs != null) { + for (Path p : dirs) { + for (String fp : filteredPaths) { + if (!p.toUri().toString().equals(fp)) { + unfilteredPaths.add(p); + filteredInput = false; + } + } + } + } + } + + if (!filteredInput) + return super.getSplits(job, numSplits); HiveIndexResult hiveIndexResult = null; if (indexFileStr != null) { @@ -121,7 +142,6 @@ public class HiveIndexedInputFormat extends HiveInputFormat { } newInputPaths.append(path); } - FileInputFormat.setInputPaths(job, newInputPaths.toString()); } else { return super.getSplits(job, numSplits); diff --git ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java index 61bbbf5..a12d521 100644 --- ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java +++ ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java @@ -90,10 +90,12 @@ public class BitmapIndexHandler extends TableBasedIndexHandler { qlCommand.append("(SELECT `_bucketname` AS bucketname , `_offset` AS offset FROM "); List iqs = new ArrayList(indexes.size()); + List origTableNames = new ArrayList(indexes.size()); int i = 0; for (Index index : indexes) { ExprNodeDesc indexPredicate = indexPredicates.get(index); if (indexPredicate != null) { + origTableNames.add(index.getOrigTableName()); iqs.add(new BitmapInnerQuery( index.getIndexTableName(), indexPredicate, @@ -114,7 +116,10 @@ public class BitmapIndexHandler extends TableBasedIndexHandler { driver.compile(qlCommand.toString(), false); // setup TableScanOperator to change input format for original query - queryContext.setIndexInputFormat(HiveIndexedInputFormat.class.getName()); + queryContext.setIndexInputFormat( + HiveIndexedInputFormat.class.getName(), + origTableNames + ); queryContext.setIndexIntermediateFile(tmpFile); queryContext.addAdditionalSemanticInputs(driver.getPlan().getInputs()); diff --git ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java index 7c91946..60a52ea 100644 --- ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java +++ ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java @@ -180,7 +180,12 @@ public class CompactIndexHandler extends TableBasedIndexHandler { driver.compile(qlCommand.toString(), false); // setup TableScanOperator to change input format for original query - queryContext.setIndexInputFormat(HiveCompactIndexInputFormat.class.getName()); + List origTableNames = new ArrayList(); + origTableNames.add(index.getOrigTableName()); + queryContext.setIndexInputFormat( + HiveCompactIndexInputFormat.class.getName(), + origTableNames + ); queryContext.setIndexIntermediateFile(tmpFile); queryContext.addAdditionalSemanticInputs(driver.getPlan().getInputs()); diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java index dbc489f..731e62b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/index/IndexWhereProcessor.java @@ -167,6 +167,17 @@ public class IndexWhereProcessor implements NodeProcessor { // prepare the map reduce job to use indexing MapredWork work = currentTask.getWork(); + List filteredTableNames = queryContext.getFilteredTables(); + StringBuilder sb = new StringBuilder(); + for (String tableName : filteredTableNames) { + for (Partition p : queryPartitions) { + if (p.getTable().toString() == tableName) { + sb.append(p.getTable().getPath().toUri().toString()); + sb.append(","); + } + } + } + work.setIndexedPaths(sb.toString()); work.setInputformat(queryContext.getIndexInputFormat()); work.setIndexIntermediateFile(queryContext.getIndexIntermediateFile()); @@ -259,6 +270,9 @@ public class IndexWhereProcessor implements NodeProcessor { private List getIndexTables(Hive hive, Partition part) throws HiveException { List
indexTables = new ArrayList
(); Table partitionedTable = part.getTable(); + if (indexes == null || indexes.get(partitionedTable) == null) { + return indexTables; + } for (Index index : indexes.get(partitionedTable)) { indexTables.add(hive.getTable(index.getIndexTableName())); } @@ -276,6 +290,10 @@ public class IndexWhereProcessor implements NodeProcessor { return true; // empty specs come from non-partitioned tables } + if (indexTables == null || indexTables.size() == 0) { + return false; + } + for (Table indexTable : indexTables) { // get partitions that match the spec List matchingPartitions = hive.getPartitions(indexTable, partSpec); diff --git ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java index a03a9a6..19a7590 100644 --- ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java +++ ql/src/java/org/apache/hadoop/hive/ql/plan/MapredWork.java @@ -55,6 +55,8 @@ public class MapredWork implements Serializable { private HashMap nameToSplitSample; + private String filteredPaths; + // map<->reduce interface // schema of the map-reduce 'key' object - this is homogeneous private TableDesc keyDesc; @@ -387,10 +389,18 @@ public class MapredWork implements Serializable { return indexIntermediateFile; } + public String getIndexedPaths() { + return this.filteredPaths; + } + public void setIndexIntermediateFile(String fileName) { this.indexIntermediateFile = fileName; } + public void setIndexedPaths(String filteredPaths) { + this.filteredPaths = filteredPaths; + } + public void setGatheringStats(boolean gatherStats) { this.gatheringStats = gatherStats; } diff --git ql/src/test/queries/clientpositive/index_auto_mult_tables.q ql/src/test/queries/clientpositive/index_auto_mult_tables.q new file mode 100644 index 0000000..3b4364a --- /dev/null +++ ql/src/test/queries/clientpositive/index_auto_mult_tables.q @@ -0,0 +1,20 @@ +-- try the query without indexing, with manual indexing, and with automatic indexing + +-- without indexing +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 80 AND b.key < 100; +SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 80 AND b.key < 100; + + +CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD; +ALTER INDEX src_index ON src REBUILD; + +SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +SET hive.optimize.index.filter=true; +SET hive.optimize.index.filter.compact.minsize=0; + +-- automatic indexing +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 80 AND b.key < 100; +SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 80 AND b.key < 100; + +DROP INDEX src_index on src; +DROP INDEX srcpart_index on src; diff --git ql/src/test/results/clientpositive/index_auto_mult_tables.q.out ql/src/test/results/clientpositive/index_auto_mult_tables.q.out new file mode 100644 index 0000000..62e24a4 --- /dev/null +++ ql/src/test/results/clientpositive/index_auto_mult_tables.q.out @@ -0,0 +1,605 @@ +PREHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing + +-- without indexing +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 80 AND b.key < 100 +PREHOOK: type: QUERY +POSTHOOK: query: -- try the query without indexing, with manual indexing, and with automatic indexing + +-- without indexing +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 80 AND b.key < 100 +POSTHOOK: type: QUERY +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME srcpart) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 80) (< (. (TOK_TABLE_OR_COL a) key) 100)) (> (. (TOK_TABLE_OR_COL b) key) 80)) (< (. (TOK_TABLE_OR_COL b) key) 100))))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + filterExpr: + expr: ((key > 80) and (key < 100)) + type: boolean + Filter Operator + predicate: + expr: ((key > 80) and (key < 100)) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 0 + value expressions: + expr: key + type: string + expr: value + type: string + b + TableScan + alias: b + filterExpr: + expr: ((key > 80) and (key < 100)) + type: boolean + Filter Operator + predicate: + expr: ((key > 80) and (key < 100)) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 1 + value expressions: + expr: key + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col4 + Filter Operator + predicate: + expr: ((((_col0 > 80) and (_col0 < 100)) and (_col4 > 80)) and (_col4 < 100)) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 80 AND b.key < 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-06-30_17-10-30_990_5618599262181037701/-mr-10000 +POSTHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 80 AND b.key < 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-06-30_17-10-30_990_5618599262181037701/-mr-10000 +82 val_82 +82 val_82 +82 val_82 +82 val_82 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +85 val_85 +85 val_85 +85 val_85 +85 val_85 +86 val_86 +86 val_86 +86 val_86 +86 val_86 +87 val_87 +87 val_87 +87 val_87 +87 val_87 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +92 val_92 +92 val_92 +92 val_92 +92 val_92 +95 val_95 +95 val_95 +95 val_95 +95 val_95 +95 val_95 +95 val_95 +95 val_95 +95 val_95 +95 val_95 +95 val_95 +95 val_95 +95 val_95 +95 val_95 +95 val_95 +95 val_95 +95 val_95 +96 val_96 +96 val_96 +96 val_96 +96 val_96 +97 val_97 +97 val_97 +97 val_97 +97 val_97 +97 val_97 +97 val_97 +97 val_97 +97 val_97 +97 val_97 +97 val_97 +97 val_97 +97 val_97 +97 val_97 +97 val_97 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +98 val_98 +98 val_98 +98 val_98 +98 val_98 +98 val_98 +98 val_98 +98 val_98 +98 val_98 +98 val_98 +98 val_98 +98 val_98 +98 val_98 +98 val_98 +98 val_98 +PREHOOK: query: CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD +PREHOOK: type: CREATEINDEX +POSTHOOK: query: CREATE INDEX src_index ON TABLE src(key) as 'COMPACT' WITH DEFERRED REBUILD +POSTHOOK: type: CREATEINDEX +PREHOOK: query: ALTER INDEX src_index ON src REBUILD +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@default__src_src_index__ +POSTHOOK: query: ALTER INDEX src_index ON src REBUILD +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@default__src_src_index__ +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: -- automatic indexing +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 80 AND b.key < 100 +PREHOOK: type: QUERY +POSTHOOK: query: -- automatic indexing +EXPLAIN SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 80 AND b.key < 100 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_JOIN (TOK_TABREF (TOK_TABNAME src) a) (TOK_TABREF (TOK_TABNAME srcpart) b) (= (. (TOK_TABLE_OR_COL a) key) (. (TOK_TABLE_OR_COL b) key)))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) key)) (TOK_SELEXPR (. (TOK_TABLE_OR_COL a) value))) (TOK_WHERE (AND (AND (AND (> (. (TOK_TABLE_OR_COL a) key) 80) (< (. (TOK_TABLE_OR_COL a) key) 100)) (> (. (TOK_TABLE_OR_COL b) key) 80)) (< (. (TOK_TABLE_OR_COL b) key) 100))))) + +STAGE DEPENDENCIES: + Stage-4 is a root stage + Stage-7 depends on stages: Stage-4 , consists of Stage-6, Stage-5 + Stage-6 + Stage-3 depends on stages: Stage-6, Stage-5 + Stage-1 depends on stages: Stage-3 + Stage-5 + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-4 + Map Reduce + Alias -> Map Operator Tree: + default__src_src_index__ + TableScan + alias: default__src_src_index__ + filterExpr: + expr: ((key > 80) and (key < 100)) + type: boolean + Filter Operator + predicate: + expr: ((key > 80) and (key < 100)) + type: boolean + Filter Operator + predicate: + expr: ((key > 80) and (key < 100)) + type: boolean + Select Operator + expressions: + expr: _bucketname + type: string + expr: _offsets + type: array + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 1 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-7 + Conditional Operator + + Stage: Stage-6 + Move Operator + files: + hdfs directory: true + destination: file:/Users/salbiz/dev/hive/build/ql/scratchdir/hive_2011-06-30_17-10-49_107_3095811586655937883/-ext-10000 + + Stage: Stage-3 + Move Operator + files: + hdfs directory: true + destination: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-06-30_17-10-48_693_7302808354239007888/-mr-10002 + + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + a + TableScan + alias: a + filterExpr: + expr: ((key > 80) and (key < 100)) + type: boolean + Filter Operator + predicate: + expr: ((key > 80) and (key < 100)) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 0 + value expressions: + expr: key + type: string + expr: value + type: string + b + TableScan + alias: b + filterExpr: + expr: ((key > 80) and (key < 100)) + type: boolean + Filter Operator + predicate: + expr: ((key > 80) and (key < 100)) + type: boolean + Reduce Output Operator + key expressions: + expr: key + type: string + sort order: + + Map-reduce partition columns: + expr: key + type: string + tag: 1 + value expressions: + expr: key + type: string + Reduce Operator Tree: + Join Operator + condition map: + Inner Join 0 to 1 + condition expressions: + 0 {VALUE._col0} {VALUE._col1} + 1 {VALUE._col0} + handleSkewJoin: false + outputColumnNames: _col0, _col1, _col4 + Filter Operator + predicate: + expr: ((((_col0 > 80) and (_col0 < 100)) and (_col4 > 80)) and (_col4 < 100)) + type: boolean + Select Operator + expressions: + expr: _col0 + type: string + expr: _col1 + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-5 + Map Reduce + Alias -> Map Operator Tree: + file:/Users/salbiz/dev/hive/build/ql/scratchdir/hive_2011-06-30_17-10-49_107_3095811586655937883/-ext-10001 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 80 AND b.key < 100 +PREHOOK: type: QUERY +PREHOOK: Input: default@default__src_src_index__ +PREHOOK: Input: default@src +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +PREHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-06-30_17-10-49_243_6215797490113851033/-mr-10000 +POSTHOOK: query: SELECT a.key, a.value FROM src a JOIN srcpart b ON (a.key = b.key) WHERE a.key > 80 AND a.key < 100 AND b.key > 80 AND b.key < 100 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__src_src_index__ +POSTHOOK: Input: default@src +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=11 +POSTHOOK: Input: default@srcpart@ds=2008-04-09/hr=12 +POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-06-30_17-10-49_243_6215797490113851033/-mr-10000 +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +82 val_82 +82 val_82 +82 val_82 +82 val_82 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +83 val_83 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +84 val_84 +85 val_85 +85 val_85 +85 val_85 +85 val_85 +86 val_86 +86 val_86 +86 val_86 +86 val_86 +87 val_87 +87 val_87 +87 val_87 +87 val_87 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +90 val_90 +92 val_92 +92 val_92 +92 val_92 +92 val_92 +95 val_95 +95 val_95 +95 val_95 +95 val_95 +95 val_95 +95 val_95 +95 val_95 +95 val_95 +95 val_95 +95 val_95 +95 val_95 +95 val_95 +95 val_95 +95 val_95 +95 val_95 +95 val_95 +96 val_96 +96 val_96 +96 val_96 +96 val_96 +97 val_97 +97 val_97 +97 val_97 +97 val_97 +97 val_97 +97 val_97 +97 val_97 +97 val_97 +97 val_97 +97 val_97 +97 val_97 +97 val_97 +97 val_97 +97 val_97 +97 val_97 +97 val_97 +98 val_98 +98 val_98 +98 val_98 +98 val_98 +98 val_98 +98 val_98 +98 val_98 +98 val_98 +98 val_98 +98 val_98 +98 val_98 +98 val_98 +98 val_98 +98 val_98 +98 val_98 +98 val_98 +PREHOOK: query: DROP INDEX src_index on src +PREHOOK: type: DROPINDEX +POSTHOOK: query: DROP INDEX src_index on src +POSTHOOK: type: DROPINDEX +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +PREHOOK: query: DROP INDEX srcpart_index on src +PREHOOK: type: DROPINDEX +POSTHOOK: query: DROP INDEX srcpart_index on src +POSTHOOK: type: DROPINDEX +POSTHOOK: Lineage: default__src_src_index__._bucketname SIMPLE [(src)src.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__src_src_index__._offsets EXPRESSION [(src)src.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__src_src_index__.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] -- 1.7.4.4