From a3c03b07f919d1bc60ce2561acdab3a4ae349265 Mon Sep 17 00:00:00 2001 From: Syed Albiz Date: Sat, 30 Jul 2011 13:34:26 -0700 Subject: [PATCH 1/1] fix the shit that broke diff --git ql/src/java/org/apache/hadoop/hive/ql/index/TableBasedIndexHandler.java ql/src/java/org/apache/hadoop/hive/ql/index/TableBasedIndexHandler.java index 02ab78c..4a77645 100644 --- ql/src/java/org/apache/hadoop/hive/ql/index/TableBasedIndexHandler.java +++ ql/src/java/org/apache/hadoop/hive/ql/index/TableBasedIndexHandler.java @@ -101,7 +101,7 @@ public abstract class TableBasedIndexHandler extends AbstractIndexHandler { abstract protected Task getIndexBuilderMapRedTask(Set inputs, Set outputs, List indexField, boolean partitioned, PartitionDesc indexTblPartDesc, String indexTableName, - PartitionDesc baseTablePartDesc, String baseTableName, String dbName); + PartitionDesc baseTablePartDesc, String baseTableName, String dbName) throws HiveException; protected List getPartKVPairStringArray( LinkedHashMap partSpec) { diff --git ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java index 61bbbf5..50f4e8a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java +++ ql/src/java/org/apache/hadoop/hive/ql/index/bitmap/BitmapIndexHandler.java @@ -220,7 +220,7 @@ public class BitmapIndexHandler extends TableBasedIndexHandler { protected Task getIndexBuilderMapRedTask(Set inputs, Set outputs, List indexField, boolean partitioned, PartitionDesc indexTblPartDesc, String indexTableName, - PartitionDesc baseTablePartDesc, String baseTableName, String dbName) { + PartitionDesc baseTablePartDesc, String baseTableName, String dbName) throws HiveException { HiveConf conf = new HiveConf(getConf(), BitmapIndexHandler.class); HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVEROWOFFSET, true); @@ -280,9 +280,9 @@ public class BitmapIndexHandler extends TableBasedIndexHandler { } // Require clusterby ROWOFFSET if map-size aggregation is off. + // TODO: Make this work without map side aggregation if (!conf.get("hive.map.aggr", null).equals("true")) { - command.append(" CLUSTER BY "); - command.append(VirtualColumn.ROWOFFSET.getName()); + throw new HiveException("Cannot construct index without map-side aggregation"); } Driver driver = new Driver(conf); diff --git ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java index 7c91946..19b875d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java +++ ql/src/java/org/apache/hadoop/hive/ql/index/compact/CompactIndexHandler.java @@ -81,7 +81,7 @@ public class CompactIndexHandler extends TableBasedIndexHandler { protected Task getIndexBuilderMapRedTask(Set inputs, Set outputs, List indexField, boolean partitioned, PartitionDesc indexTblPartDesc, String indexTableName, - PartitionDesc baseTablePartDesc, String baseTableName, String dbName) { + PartitionDesc baseTablePartDesc, String baseTableName, String dbName) throws HiveException { String indexCols = HiveUtils.getUnparsedColumnNamesFromFieldSchema(indexField); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEWAHBitmap.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEWAHBitmap.java index 0a3df09..d896b8e 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEWAHBitmap.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEWAHBitmap.java @@ -92,14 +92,19 @@ public class GenericUDAFEWAHBitmap extends AbstractGenericUDAFResolver { inputOI = (PrimitiveObjectInspector) parameters[0]; return ObjectInspectorFactory .getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableLongObjectInspector); - } else { - //no map aggregation + } else if (m == Mode.PARTIAL2 || m == Mode.FINAL) { internalMergeOI = (StandardListObjectInspector) parameters[0]; bitmapLongOI = PrimitiveObjectInspectorFactory.writableLongObjectInspector; inputOI = PrimitiveObjectInspectorFactory.writableByteObjectInspector; loi = (StandardListObjectInspector) ObjectInspectorFactory .getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableLongObjectInspector); return loi; + } else { // Mode.COMPLETE, ie. no map-side aggregation, requires ordering + bitmapLongOI = PrimitiveObjectInspectorFactory.writableLongObjectInspector; + inputOI = PrimitiveObjectInspectorFactory.writableByteObjectInspector; + loi = (StandardListObjectInspector) ObjectInspectorFactory + .getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableLongObjectInspector); + return loi; } } diff --git ql/src/test/queries/clientnegative/index_bitmap_no_map_aggr.q ql/src/test/queries/clientnegative/index_bitmap_no_map_aggr.q new file mode 100644 index 0000000..a17cd1f --- /dev/null +++ ql/src/test/queries/clientnegative/index_bitmap_no_map_aggr.q @@ -0,0 +1,7 @@ +EXPLAIN +CREATE INDEX src1_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD; + +SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +SET hive.map.aggr=false; +CREATE INDEX src1_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD; +ALTER INDEX src1_index ON src REBUILD; diff --git ql/src/test/queries/clientpositive/index_bitmap_auto_no_map_aggr.q ql/src/test/queries/clientpositive/index_bitmap_auto_no_map_aggr.q new file mode 100644 index 0000000..8581b5e --- /dev/null +++ ql/src/test/queries/clientpositive/index_bitmap_auto_no_map_aggr.q @@ -0,0 +1,53 @@ +-- try the query without indexing, with manual indexing, and with automatic indexing +-- without indexing +SELECT key, value FROM src WHERE key=0 AND value = "val_0" ORDER BY key; + +-- create indices +EXPLAIN +CREATE INDEX src1_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD; +EXPLAIN +CREATE INDEX src2_index ON TABLE src(value) as 'BITMAP' WITH DEFERRED REBUILD; + +SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +SET hive.map.aggr=false; +CREATE INDEX src1_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD; +CREATE INDEX src2_index ON TABLE src(value) as 'BITMAP' WITH DEFERRED REBUILD; +ALTER INDEX src1_index ON src REBUILD; +ALTER INDEX src2_index ON src REBUILD; +SELECT * FROM default__src_src1_index__ ORDER BY key; +SELECT * FROM default__src_src2_index__ ORDER BY value; + + +-- manual indexing +EXPLAIN +SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets` +FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__ + WHERE key = 0) a + JOIN + (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__ + WHERE value = "val_0") b + ON + a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT +EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname; + +INSERT OVERWRITE DIRECTORY "/tmp/index_result" +SELECT a.bucketname AS `_bucketname`, COLLECT_SET(a.offset) as `_offsets` +FROM (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src1_index__ + WHERE key = 0) a + JOIN + (SELECT `_bucketname` AS bucketname, `_offset` AS offset, `_bitmaps` AS bitmaps FROM default__src_src2_index__ + WHERE value = "val_0") b + ON + a.bucketname = b.bucketname AND a.offset = b.offset WHERE NOT +EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(a.bitmaps, b.bitmaps)) GROUP BY a.bucketname; + +SELECT key, value FROM src WHERE key=0 AND value = "val_0" ORDER BY key; + + +SET hive.input.format=org.apache.hadoop.hive.ql.io.HiveInputFormat; +SET hive.optimize.index.filter=true; +SELECT key, value FROM src WHERE key=0 AND value = "val_0" ORDER BY key; + +DROP INDEX src1_index ON src; +DROP INDEX src2_index ON src; + diff --git ql/src/test/results/clientnegative/index_bitmap_no_map_aggr.q.out ql/src/test/results/clientnegative/index_bitmap_no_map_aggr.q.out new file mode 100644 index 0000000..b49b499 --- /dev/null +++ ql/src/test/results/clientnegative/index_bitmap_no_map_aggr.q.out @@ -0,0 +1,21 @@ +PREHOOK: query: EXPLAIN +CREATE INDEX src1_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD +PREHOOK: type: CREATEINDEX +POSTHOOK: query: EXPLAIN +CREATE INDEX src1_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD +POSTHOOK: type: CREATEINDEX +ABSTRACT SYNTAX TREE: + (TOK_CREATEINDEX src1_index 'BITMAP' (TOK_TABNAME src) (TOK_TABCOLNAME key) TOK_DEFERRED_REBUILDINDEX) + +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + + +PREHOOK: query: CREATE INDEX src1_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD +PREHOOK: type: CREATEINDEX +POSTHOOK: query: CREATE INDEX src1_index ON TABLE src(key) as 'BITMAP' WITH DEFERRED REBUILD +POSTHOOK: type: CREATEINDEX +FAILED: Error in semantic analysis: org.apache.hadoop.hive.ql.parse.SemanticException: org.apache.hadoop.hive.ql.metadata.HiveException: Cannot construct index without map-side aggregation -- 1.7.4.4