From 914174579e0caa1c6a87c9faa99d2a368ccf74b0 Mon Sep 17 00:00:00 2001 From: Syed Albiz Date: Mon, 1 Aug 2011 19:09:56 -0700 Subject: [PATCH 1/1] fix some stupid shit diff --git ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java index 1a1ecd7..c52624c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/index/HiveIndexedInputFormat.java @@ -124,6 +124,10 @@ public class HiveIndexedInputFormat extends HiveInputFormat { } Set inputFiles = hiveIndexResult.buckets.keySet(); + if (inputFiles == null || inputFiles.size() <= 0) { + // return empty splits if index results were empty + return new InputSplit[0]; + } Iterator iter = inputFiles.iterator(); while(iter.hasNext()) { String path = iter.next(); diff --git ql/src/test/queries/clientpositive/index_auto_empty.q ql/src/test/queries/clientpositive/index_auto_empty.q new file mode 100644 index 0000000..cb32162 --- /dev/null +++ ql/src/test/queries/clientpositive/index_auto_empty.q @@ -0,0 +1,21 @@ +-- Test to ensure that an empty index result is propagated correctly + +-- Create temp, and populate it with some values in src. +CREATE TABLE temp(key STRING, val STRING) STORED AS TEXTFILE; + +-- Build an index on temp. +CREATE INDEX temp_index ON TABLE temp(key) as 'COMPACT' WITH DEFERRED REBUILD; +ALTER INDEX temp_index ON temp REBUILD; + +SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +SET hive.optimize.index.filter=true; +SET hive.optimize.index.filter.compact.minsize=0; + +-- query should not return any values +SELECT * FROM default__temp_temp_index__ WHERE key = 86; +EXPLAIN SELECT * FROM temp WHERE key = 86; +SELECT * FROM temp WHERE key = 86; + +SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +SET hive.optimize.index.filter=false; +DROP table temp; diff --git ql/src/test/results/clientpositive/index_auto_empty.q.out ql/src/test/results/clientpositive/index_auto_empty.q.out new file mode 100644 index 0000000..8e5a884 --- /dev/null +++ ql/src/test/results/clientpositive/index_auto_empty.q.out @@ -0,0 +1,110 @@ +PREHOOK: query: -- Test to ensure that an empty index result is propagated correctly + +-- Create temp, and populate it with some values in src. +CREATE TABLE temp(key STRING, val STRING) STORED AS TEXTFILE +PREHOOK: type: CREATETABLE +POSTHOOK: query: -- Test to ensure that an empty index result is propagated correctly + +-- Create temp, and populate it with some values in src. +CREATE TABLE temp(key STRING, val STRING) STORED AS TEXTFILE +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@temp +PREHOOK: query: -- Build an index on temp. +CREATE INDEX temp_index ON TABLE temp(key) as 'COMPACT' WITH DEFERRED REBUILD +PREHOOK: type: CREATEINDEX +POSTHOOK: query: -- Build an index on temp. +CREATE INDEX temp_index ON TABLE temp(key) as 'COMPACT' WITH DEFERRED REBUILD +POSTHOOK: type: CREATEINDEX +PREHOOK: query: ALTER INDEX temp_index ON temp REBUILD +PREHOOK: type: QUERY +PREHOOK: Input: default@temp +PREHOOK: Output: default@default__temp_temp_index__ +POSTHOOK: query: ALTER INDEX temp_index ON temp REBUILD +POSTHOOK: type: QUERY +POSTHOOK: Input: default@temp +POSTHOOK: Output: default@default__temp_temp_index__ +POSTHOOK: Lineage: default__temp_temp_index__._bucketname SIMPLE [(temp)temp.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__temp_temp_index__._offsets EXPRESSION [(temp)temp.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__temp_temp_index__.key SIMPLE [(temp)temp.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: -- query should not return any values +SELECT * FROM default__temp_temp_index__ WHERE key = 86 +PREHOOK: type: QUERY +PREHOOK: Input: default@default__temp_temp_index__ +PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-08-02_16-40-09_452_4182570904364026850/-mr-10000 +POSTHOOK: query: -- query should not return any values +SELECT * FROM default__temp_temp_index__ WHERE key = 86 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@default__temp_temp_index__ +POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-08-02_16-40-09_452_4182570904364026850/-mr-10000 +POSTHOOK: Lineage: default__temp_temp_index__._bucketname SIMPLE [(temp)temp.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__temp_temp_index__._offsets EXPRESSION [(temp)temp.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__temp_temp_index__.key SIMPLE [(temp)temp.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: EXPLAIN SELECT * FROM temp WHERE key = 86 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT * FROM temp WHERE key = 86 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: default__temp_temp_index__._bucketname SIMPLE [(temp)temp.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__temp_temp_index__._offsets EXPRESSION [(temp)temp.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__temp_temp_index__.key SIMPLE [(temp)temp.FieldSchema(name:key, type:string, comment:null), ] +ABSTRACT SYNTAX TREE: + (TOK_QUERY (TOK_FROM (TOK_TABREF (TOK_TABNAME temp))) (TOK_INSERT (TOK_DESTINATION (TOK_DIR TOK_TMP_FILE)) (TOK_SELECT (TOK_SELEXPR TOK_ALLCOLREF)) (TOK_WHERE (= (TOK_TABLE_OR_COL key) 86)))) + +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Alias -> Map Operator Tree: + temp + TableScan + alias: temp + filterExpr: + expr: (key = 86) + type: boolean + Filter Operator + predicate: + expr: (key = 86) + type: boolean + Select Operator + expressions: + expr: key + type: string + expr: val + type: string + outputColumnNames: _col0, _col1 + File Output Operator + compressed: false + GlobalTableId: 0 + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + + Stage: Stage-0 + Fetch Operator + limit: -1 + + +PREHOOK: query: SELECT * FROM temp WHERE key = 86 +PREHOOK: type: QUERY +PREHOOK: Input: default@temp +PREHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-08-02_16-40-15_355_7361467869618565705/-mr-10000 +POSTHOOK: query: SELECT * FROM temp WHERE key = 86 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@temp +POSTHOOK: Output: file:/var/folders/5V/5V4Zq77qGD4aSK9m8V3frVsFdRU/-Tmp-/salbiz/hive_2011-08-02_16-40-15_355_7361467869618565705/-mr-10000 +POSTHOOK: Lineage: default__temp_temp_index__._bucketname SIMPLE [(temp)temp.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__temp_temp_index__._offsets EXPRESSION [(temp)temp.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__temp_temp_index__.key SIMPLE [(temp)temp.FieldSchema(name:key, type:string, comment:null), ] +PREHOOK: query: DROP table temp +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@temp +PREHOOK: Output: default@temp +POSTHOOK: query: DROP table temp +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@temp +POSTHOOK: Output: default@temp +POSTHOOK: Lineage: default__temp_temp_index__._bucketname SIMPLE [(temp)temp.FieldSchema(name:INPUT__FILE__NAME, type:string, comment:), ] +POSTHOOK: Lineage: default__temp_temp_index__._offsets EXPRESSION [(temp)temp.FieldSchema(name:BLOCK__OFFSET__INSIDE__FILE, type:bigint, comment:), ] +POSTHOOK: Lineage: default__temp_temp_index__.key SIMPLE [(temp)temp.FieldSchema(name:key, type:string, comment:null), ] -- 1.7.4.4