diff --git pom.xml pom.xml index 0e30078..1ee7977 100644 --- pom.xml +++ pom.xml @@ -126,7 +126,7 @@ 4.2.5 4.2.5 1.9.2 - 0.3.2 + 0.9.2 5.5.1 3.0.1 7.6.0.v20120127 diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/AbstractGenericUDFEWAHBitmapBop.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/AbstractGenericUDFEWAHBitmapBop.java index 58ea3ba..5b615d3 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/AbstractGenericUDFEWAHBitmapBop.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/AbstractGenericUDFEWAHBitmapBop.java @@ -18,17 +18,11 @@ package org.apache.hadoop.hive.ql.udf.generic; -import java.io.IOException; -import java.util.ArrayList; -import java.util.List; - -import javaewah.EWAHCompressedBitmap; +import com.googlecode.javaewah.EWAHCompressedBitmap; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; -import org.apache.hadoop.hive.ql.index.bitmap.BitmapObjectInput; -import org.apache.hadoop.hive.ql.index.bitmap.BitmapObjectOutput; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -37,15 +31,15 @@ import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; -import org.apache.hadoop.io.LongWritable; /** * An abstract class for a UDF that performs a binary operation between two EWAH-compressed bitmaps. * For example: Bitmap OR and AND operations between two EWAH-compressed bitmaps. */ abstract public class AbstractGenericUDFEWAHBitmapBop extends GenericUDF { - protected final ArrayList ret = new ArrayList(); - private transient ObjectInspector b1OI; + + private transient ListObjectInspector b1OI; + private transient ListObjectInspector b2OI; private final String name; AbstractGenericUDFEWAHBitmapBop(String name) { @@ -58,26 +52,29 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumen throw new UDFArgumentLengthException( "The function " + name + "(b1, b2) takes exactly 2 arguments"); } + b1OI = getPrimitiveListOI(arguments[0], name); + b2OI = getPrimitiveListOI(arguments[1], name); + + return ObjectInspectorFactory + .getStandardListObjectInspector(PrimitiveObjectInspectorFactory + .javaLongObjectInspector); + } - if (arguments[0].getCategory().equals(Category.LIST)) { - b1OI = (ListObjectInspector) arguments[0]; - } else { + static ListObjectInspector getPrimitiveListOI(ObjectInspector argument, String name) + throws UDFArgumentTypeException { + if (!argument.getCategory().equals(Category.LIST)) { throw new UDFArgumentTypeException(0, "\"" + Category.LIST.toString().toLowerCase() + "\" is expected at function " + name + ", but \"" - + arguments[0].getTypeName() + "\" is found"); + + argument.getTypeName() + "\" is found"); } - - if (!arguments[1].getCategory().equals(Category.LIST)) { - throw new UDFArgumentTypeException(1, "\"" - + Category.LIST.toString().toLowerCase() - + "\" is expected at function " + name + ", but \"" - + arguments[1].getTypeName() + "\" is found"); - + ListObjectInspector b1OI = (ListObjectInspector) argument; + if (!(b1OI.getListElementObjectInspector() instanceof PrimitiveObjectInspector)) { + throw new UDFArgumentTypeException(0, "\"" + + "Primitive type elements are expected at function " + name + ", but \"" + + b1OI.getListElementObjectInspector().getTypeName() + "\" is found"); } - return ObjectInspectorFactory - .getStandardListObjectInspector(PrimitiveObjectInspectorFactory - .writableLongObjectInspector); + return b1OI; } protected abstract EWAHCompressedBitmap bitmapBop( @@ -89,68 +86,42 @@ public Object evaluate(DeferredObject[] arguments) throws HiveException { Object b1 = arguments[0].get(); Object b2 = arguments[1].get(); - EWAHCompressedBitmap bitmap1 = wordArrayToBitmap(b1); - EWAHCompressedBitmap bitmap2 = wordArrayToBitmap(b2); + EWAHCompressedBitmap bitmap1 = wordArrayToBitmap(b1, b1OI); + EWAHCompressedBitmap bitmap2 = wordArrayToBitmap(b2, b2OI); EWAHCompressedBitmap bitmapAnd = bitmapBop(bitmap1, bitmap2); - BitmapObjectOutput bitmapObjOut = new BitmapObjectOutput(); - try { - bitmapAnd.writeExternal(bitmapObjOut); - } catch (IOException e) { - throw new RuntimeException(e); - } - ret.clear(); - List retList = bitmapToWordArray(bitmapAnd); - for (LongWritable l : retList) { - ret.add(l); - } - return ret; + return bitmapToLongArray(bitmapAnd.toArray()); } - protected EWAHCompressedBitmap wordArrayToBitmap(Object b) { - ListObjectInspector lloi = (ListObjectInspector) b1OI; - int length = lloi.getListLength(b); - ArrayList bitmapArray = new ArrayList(); - for (int i = 0; i < length; i++) { - long l = PrimitiveObjectInspectorUtils.getLong( - lloi.getListElement(b, i), - (PrimitiveObjectInspector) lloi.getListElementObjectInspector()); - bitmapArray.add(new LongWritable(l)); + static EWAHCompressedBitmap wordArrayToBitmap(Object b, ListObjectInspector loi) { + int[] bitmapArray = new int[loi.getListLength(b)]; + for (int i = 0; i < bitmapArray.length; i++) { + bitmapArray[i] = PrimitiveObjectInspectorUtils.getInt( + loi.getListElement(b, i), + (PrimitiveObjectInspector) loi.getListElementObjectInspector()); } + return EWAHCompressedBitmap.bitmapOf(bitmapArray); + } - BitmapObjectInput bitmapObjIn = new BitmapObjectInput(bitmapArray); - EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); - try { - bitmap.readExternal(bitmapObjIn); - } catch (IOException e) { - throw new RuntimeException(e); + static Integer[] bitmapToIntArray(int[] indices) { + Integer[] array = new Integer[indices.length]; + for (int i = 0; i < indices.length; i++) { + array[i] = indices[i]; } - return bitmap; + return array; } - protected List bitmapToWordArray(EWAHCompressedBitmap bitmap) { - BitmapObjectOutput bitmapObjOut = new BitmapObjectOutput(); - try { - bitmap.writeExternal(bitmapObjOut); - } catch (IOException e) { - throw new RuntimeException(e); + static Long[] bitmapToLongArray(int[] indices) { + Long[] array = new Long[indices.length]; + for (int i = 0; i < indices.length; i++) { + array[i] = Long.valueOf(indices[i]); } - return bitmapObjOut.list(); + return array; } @Override public String getDisplayString(String[] children) { - StringBuilder sb = new StringBuilder(); - sb.append(name); - sb.append("("); - for (int i = 0; i < children.length; i++) { - sb.append(children[i]); - if (i + 1 != children.length) { - sb.append(","); - } - } - sb.append(")"); - return sb.toString(); + return getDisplayString(name, children); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEWAHBitmap.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEWAHBitmap.java index e4b412e..8929a54 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEWAHBitmap.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFEWAHBitmap.java @@ -17,17 +17,12 @@ */ package org.apache.hadoop.hive.ql.udf.generic; -import java.io.IOException; -import java.util.ArrayList; - -import javaewah.EWAHCompressedBitmap; +import com.googlecode.javaewah.EWAHCompressedBitmap; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; -import org.apache.hadoop.hive.ql.index.bitmap.BitmapObjectInput; -import org.apache.hadoop.hive.ql.index.bitmap.BitmapObjectOutput; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.ql.parse.SemanticException; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -39,7 +34,6 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import org.apache.hadoop.io.LongWritable; import org.apache.hadoop.util.StringUtils; @@ -76,7 +70,6 @@ public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) // For PARTIAL2 and FINAL: ObjectInspectors for partial aggregations // (lists of bitmaps) - private transient StandardListObjectInspector loi; private transient StandardListObjectInspector internalMergeOI; @Override @@ -85,22 +78,26 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) super.init(m, parameters); // init output object inspectors // The output of a partial aggregation is a list - if (m == Mode.PARTIAL1) { - inputOI = (PrimitiveObjectInspector) parameters[0]; - return ObjectInspectorFactory - .getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableLongObjectInspector); - } else if (m == Mode.PARTIAL2 || m == Mode.FINAL) { - internalMergeOI = (StandardListObjectInspector) parameters[0]; - inputOI = PrimitiveObjectInspectorFactory.writableByteObjectInspector; - loi = (StandardListObjectInspector) ObjectInspectorFactory - .getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableLongObjectInspector); - return loi; - } else { // Mode.COMPLETE, ie. no map-side aggregation, requires ordering - inputOI = PrimitiveObjectInspectorFactory.writableByteObjectInspector; - loi = (StandardListObjectInspector) ObjectInspectorFactory - .getStandardListObjectInspector(PrimitiveObjectInspectorFactory.writableLongObjectInspector); - return loi; + switch(m) { + case PARTIAL1: + inputOI = (PrimitiveObjectInspector) parameters[0]; + break; + case PARTIAL2: + case FINAL: + internalMergeOI = (StandardListObjectInspector) parameters[0]; + break; + case COMPLETE: + // no map-side aggregation, requires ordering + inputOI = (PrimitiveObjectInspector) parameters[0]; + break; + } + // index table is defined to accept long[] + if (m == Mode.COMPLETE || m == Mode.FINAL) { + return ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.javaLongObjectInspector); } + return ObjectInspectorFactory.getStandardListObjectInspector( + PrimitiveObjectInspectorFactory.javaIntObjectInspector); } /** class for storing the current partial result aggregation */ @@ -115,8 +112,7 @@ public int estimate() { @Override public void reset(AggregationBuffer agg) throws HiveException { - - ((BitmapAgg) agg).bitmap = new EWAHCompressedBitmap(); + ((BitmapAgg) agg).bitmap = new EWAHCompressedBitmap(); } @Override @@ -146,42 +142,23 @@ public void iterate(AggregationBuffer agg, Object[] parameters) @Override public Object terminate(AggregationBuffer agg) throws HiveException { - BitmapAgg myagg = (BitmapAgg) agg; - - BitmapObjectOutput bitmapObjOut = new BitmapObjectOutput(); - try { - myagg.bitmap.writeExternal(bitmapObjOut); - } catch (IOException e) { - throw new RuntimeException(e); - } - return bitmapObjOut.list(); + BitmapAgg myagg = (BitmapAgg) agg; + return AbstractGenericUDFEWAHBitmapBop.bitmapToLongArray(myagg.bitmap.toArray()); } @Override public void merge(AggregationBuffer agg, Object partial) throws HiveException { BitmapAgg myagg = (BitmapAgg) agg; - ArrayList partialResult = (ArrayList) internalMergeOI.getList(partial); - BitmapObjectInput bitmapObjIn = new BitmapObjectInput(partialResult); - EWAHCompressedBitmap partialBitmap = new EWAHCompressedBitmap(); - try { - partialBitmap.readExternal(bitmapObjIn); - } catch (IOException e) { - throw new RuntimeException(e); - } - myagg.bitmap = myagg.bitmap.or(partialBitmap); + EWAHCompressedBitmap bitmap = + AbstractGenericUDFEWAHBitmapBop.wordArrayToBitmap(partial, internalMergeOI); + myagg.bitmap = myagg.bitmap.or(bitmap); } @Override public Object terminatePartial(AggregationBuffer agg) throws HiveException { BitmapAgg myagg = (BitmapAgg) agg; - BitmapObjectOutput bitmapObjOut = new BitmapObjectOutput(); - try { - myagg.bitmap.writeExternal(bitmapObjOut); - } catch (IOException e) { - throw new RuntimeException(e); - } - return bitmapObjOut.list(); + return AbstractGenericUDFEWAHBitmapBop.bitmapToIntArray(myagg.bitmap.toArray()); } private void addBitmap(int newRow, BitmapAgg myagg) { diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java index e3fb558..20eb82d 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java @@ -181,6 +181,20 @@ public abstract Object evaluate(DeferredObject[] arguments) */ public abstract String getDisplayString(String[] children); + protected String getDisplayString(String name, String[] children) { + StringBuilder sb = new StringBuilder(); + sb.append(name); + sb.append("("); + for (int i = 0; i < children.length; i++) { + sb.append(children[i]); + if (i + 1 != children.length) { + sb.append(","); + } + } + sb.append(")"); + return sb.toString(); + } + /** * Close GenericUDF. * This is only called in runtime of MapRedTask. diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFEWAHBitmapAnd.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFEWAHBitmapAnd.java index 7838b54..b942988 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFEWAHBitmapAnd.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFEWAHBitmapAnd.java @@ -18,7 +18,7 @@ package org.apache.hadoop.hive.ql.udf.generic; -import javaewah.EWAHCompressedBitmap; +import com.googlecode.javaewah.EWAHCompressedBitmap; import org.apache.hadoop.hive.ql.exec.Description; diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFEWAHBitmapEmpty.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFEWAHBitmapEmpty.java index 4a14a65..6d9f659 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFEWAHBitmapEmpty.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFEWAHBitmapEmpty.java @@ -18,92 +18,41 @@ package org.apache.hadoop.hive.ql.udf.generic; -import java.io.IOException; -import java.util.ArrayList; - -import javaewah.EWAHCompressedBitmap; - import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; -import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; -import org.apache.hadoop.hive.ql.index.bitmap.BitmapObjectInput; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; -import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.BooleanObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; -import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; -import org.apache.hadoop.io.BooleanWritable; -import org.apache.hadoop.io.LongWritable; @Description(name = "ewah_bitmap_empty", value = "_FUNC_(bitmap) - " + "Predicate that tests whether an EWAH-compressed bitmap is all zeros ") public class GenericUDFEWAHBitmapEmpty extends GenericUDF { - private transient ObjectInspector bitmapOI; - private transient BooleanObjectInspector boolOI; - -@Override -public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { - if (arguments.length != 1) { - throw new UDFArgumentLengthException( - "The function EWAH_BITMAP_EMPTY(b) takes exactly 1 argument"); - } + + private static final String name = "EWAH_BITMAP_EMPTY"; + + private transient ListObjectInspector bitmapOI; - if (arguments[0].getCategory().equals(Category.LIST)) { - bitmapOI = (ListObjectInspector) arguments[0]; - } else { - throw new UDFArgumentTypeException(0, "\"" - + Category.LIST.toString().toLowerCase() - + "\" is expected at function EWAH_BITMAP_EMPTY, but \"" - + arguments[0].getTypeName() + "\" is found"); - } - - boolOI = PrimitiveObjectInspectorFactory.writableBooleanObjectInspector; - return boolOI; + @Override + public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { + if (arguments.length != 1) { + throw new UDFArgumentLengthException( + "The function EWAH_BITMAP_EMPTY(b) takes exactly 1 argument"); + } + bitmapOI = AbstractGenericUDFEWAHBitmapBop.getPrimitiveListOI(arguments[0], name); + return PrimitiveObjectInspectorFactory.javaBooleanObjectInspector; } @Override public Object evaluate(DeferredObject[] arguments) throws HiveException { assert (arguments.length == 1); Object b = arguments[0].get(); - - ListObjectInspector lloi = (ListObjectInspector) bitmapOI; - int length = lloi.getListLength(b); - ArrayList bitmapArray = new ArrayList(); - for (int i = 0; i < length; i++) { - long l = PrimitiveObjectInspectorUtils.getLong( - lloi.getListElement(b, i), - (PrimitiveObjectInspector) lloi.getListElementObjectInspector()); - bitmapArray.add(new LongWritable(l)); - } - - BitmapObjectInput bitmapObjIn = new BitmapObjectInput(bitmapArray); - EWAHCompressedBitmap bitmap = new EWAHCompressedBitmap(); - try { - bitmap.readExternal(bitmapObjIn); - } catch (IOException e) { - throw new RuntimeException(e); - } - - // Add return true only if bitmap is all zeros. - return new BooleanWritable(!bitmap.iterator().hasNext()); + return AbstractGenericUDFEWAHBitmapBop.wordArrayToBitmap(b, bitmapOI).isEmpty(); } - @Override public String getDisplayString(String[] children) { - StringBuilder sb = new StringBuilder(); - sb.append("EWAH_BITMAP_EMPTY("); - for (int i = 0; i < children.length; i++) { - sb.append(children[i]); - if (i + 1 != children.length) { - sb.append(","); - } - } - sb.append(")"); - return sb.toString(); + return getDisplayString(name, children); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFEWAHBitmapOr.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFEWAHBitmapOr.java index d438f82..47e9447 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFEWAHBitmapOr.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFEWAHBitmapOr.java @@ -18,7 +18,7 @@ package org.apache.hadoop.hive.ql.udf.generic; -import javaewah.EWAHCompressedBitmap; +import com.googlecode.javaewah.EWAHCompressedBitmap; import org.apache.hadoop.hive.ql.exec.Description; diff --git ql/src/test/queries/clientpositive/index_bitmap2.q ql/src/test/queries/clientpositive/index_bitmap2.q index 89fbe76..208fda5 100644 --- ql/src/test/queries/clientpositive/index_bitmap2.q +++ ql/src/test/queries/clientpositive/index_bitmap2.q @@ -23,7 +23,7 @@ SELECT t.bucketname as `_bucketname`, COLLECT_SET(t.offset) AS `_offsets` FROM WHERE key = 0 AND NOT EWAH_BITMAP_EMPTY(`_bitmaps`) UNION ALL SELECT `_bucketname` AS bucketname, `_offset` AS offset FROM default__src_src2_index__ - WHERE value = "val2" AND NOT EWAH_BITMAP_EMPTY(`_bitmaps`)) t + WHERE value = "val_2" AND NOT EWAH_BITMAP_EMPTY(`_bitmaps`)) t GROUP BY t.bucketname; SET hive.index.blockfilter.file=${system:test.tmp.dir}/index_result; diff --git ql/src/test/queries/clientpositive/udf_bitmap_empty.q ql/src/test/queries/clientpositive/udf_bitmap_empty.q index 142b248..637f542 100644 --- ql/src/test/queries/clientpositive/udf_bitmap_empty.q +++ ql/src/test/queries/clientpositive/udf_bitmap_empty.q @@ -1,5 +1,5 @@ set hive.fetch.task.conversion=more; -select ewah_bitmap_empty(array(13,2,4,8589934592,0,0)) from src tablesample (1 rows); +select ewah_bitmap_empty(array()) from src tablesample (1 rows); select ewah_bitmap_empty(array(13,2,4,8589934592,4096,0)) from src tablesample (1 rows); diff --git ql/src/test/results/clientpositive/index_bitmap2.q.out ql/src/test/results/clientpositive/index_bitmap2.q.out index 73c5b90..f006070 100644 --- ql/src/test/results/clientpositive/index_bitmap2.q.out +++ ql/src/test/results/clientpositive/index_bitmap2.q.out @@ -86,7 +86,7 @@ SELECT t.bucketname as `_bucketname`, COLLECT_SET(t.offset) AS `_offsets` FROM WHERE key = 0 AND NOT EWAH_BITMAP_EMPTY(`_bitmaps`) UNION ALL SELECT `_bucketname` AS bucketname, `_offset` AS offset FROM default__src_src2_index__ - WHERE value = "val2" AND NOT EWAH_BITMAP_EMPTY(`_bitmaps`)) t + WHERE value = "val_2" AND NOT EWAH_BITMAP_EMPTY(`_bitmaps`)) t GROUP BY t.bucketname PREHOOK: type: QUERY PREHOOK: Input: default@default__src_src1_index__ @@ -98,7 +98,7 @@ SELECT t.bucketname as `_bucketname`, COLLECT_SET(t.offset) AS `_offsets` FROM WHERE key = 0 AND NOT EWAH_BITMAP_EMPTY(`_bitmaps`) UNION ALL SELECT `_bucketname` AS bucketname, `_offset` AS offset FROM default__src_src2_index__ - WHERE value = "val2" AND NOT EWAH_BITMAP_EMPTY(`_bitmaps`)) t + WHERE value = "val_2" AND NOT EWAH_BITMAP_EMPTY(`_bitmaps`)) t GROUP BY t.bucketname POSTHOOK: type: QUERY POSTHOOK: Input: default@default__src_src1_index__ diff --git ql/src/test/results/clientpositive/index_bitmap3.q.out ql/src/test/results/clientpositive/index_bitmap3.q.out index 599bf3a..1d327d0 100644 --- ql/src/test/results/clientpositive/index_bitmap3.q.out +++ ql/src/test/results/clientpositive/index_bitmap3.q.out @@ -113,35 +113,35 @@ STAGE PLANS: Map Operator Tree: TableScan alias: default__src_src1_index__ - Statistics: Num rows: 500 Data size: 46311 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 36811 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((key = 0) and _bucketname is not null) and _offset is not null) (type: boolean) - Statistics: Num rows: 63 Data size: 5835 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 63 Data size: 4638 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _bucketname (type: string), _offset (type: bigint), _bitmaps (type: array) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 63 Data size: 5835 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 63 Data size: 4638 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 63 Data size: 5835 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 63 Data size: 4638 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: array) TableScan alias: default__src_src2_index__ - Statistics: Num rows: 500 Data size: 48311 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 38811 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((value = 'val_0') and _bucketname is not null) and _offset is not null) (type: boolean) - Statistics: Num rows: 63 Data size: 6087 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 63 Data size: 4890 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _bucketname (type: string), _offset (type: bigint), _bitmaps (type: array) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 63 Data size: 6087 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 63 Data size: 4890 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 63 Data size: 6087 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 63 Data size: 4890 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: array) Reduce Operator Tree: Join Operator @@ -151,20 +151,20 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: bigint) 1 _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col5 - Statistics: Num rows: 69 Data size: 6418 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 69 Data size: 5101 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (not EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(_col2,_col5))) (type: boolean) - Statistics: Num rows: 35 Data size: 3255 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 35 Data size: 2587 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 35 Data size: 3255 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 35 Data size: 2587 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: collect_set(_col1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 35 Data size: 3255 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 35 Data size: 2587 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -180,7 +180,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 35 Data size: 3255 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 35 Data size: 2587 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: array) Reduce Operator Tree: Group By Operator @@ -188,14 +188,14 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 17 Data size: 1581 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 1256 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: array) outputColumnNames: _col0, _col1 - Statistics: Num rows: 17 Data size: 1581 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 1256 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 17 Data size: 1581 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 1256 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/index_bitmap_auto.q.out ql/src/test/results/clientpositive/index_bitmap_auto.q.out index 81c1795..10092be 100644 --- ql/src/test/results/clientpositive/index_bitmap_auto.q.out +++ ql/src/test/results/clientpositive/index_bitmap_auto.q.out @@ -132,35 +132,35 @@ STAGE PLANS: Map Operator Tree: TableScan alias: default__src_src1_index__ - Statistics: Num rows: 500 Data size: 46311 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 36811 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((key = 0) and _bucketname is not null) and _offset is not null) (type: boolean) - Statistics: Num rows: 63 Data size: 5835 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 63 Data size: 4638 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _bucketname (type: string), _offset (type: bigint), _bitmaps (type: array) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 63 Data size: 5835 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 63 Data size: 4638 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 63 Data size: 5835 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 63 Data size: 4638 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: array) TableScan alias: default__src_src2_index__ - Statistics: Num rows: 500 Data size: 48311 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 500 Data size: 38811 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (((value = 'val_0') and _bucketname is not null) and _offset is not null) (type: boolean) - Statistics: Num rows: 63 Data size: 6087 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 63 Data size: 4890 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _bucketname (type: string), _offset (type: bigint), _bitmaps (type: array) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 63 Data size: 6087 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 63 Data size: 4890 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: bigint) sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: bigint) - Statistics: Num rows: 63 Data size: 6087 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 63 Data size: 4890 Basic stats: COMPLETE Column stats: NONE value expressions: _col2 (type: array) Reduce Operator Tree: Join Operator @@ -170,20 +170,20 @@ STAGE PLANS: 0 _col0 (type: string), _col1 (type: bigint) 1 _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col5 - Statistics: Num rows: 69 Data size: 6418 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 69 Data size: 5101 Basic stats: COMPLETE Column stats: NONE Filter Operator predicate: (not EWAH_BITMAP_EMPTY(EWAH_BITMAP_AND(_col2,_col5))) (type: boolean) - Statistics: Num rows: 35 Data size: 3255 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 35 Data size: 2587 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: bigint) outputColumnNames: _col0, _col1 - Statistics: Num rows: 35 Data size: 3255 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 35 Data size: 2587 Basic stats: COMPLETE Column stats: NONE Group By Operator aggregations: collect_set(_col1) keys: _col0 (type: string) mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 35 Data size: 3255 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 35 Data size: 2587 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false table: @@ -199,7 +199,7 @@ STAGE PLANS: key expressions: _col0 (type: string) sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 35 Data size: 3255 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 35 Data size: 2587 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: array) Reduce Operator Tree: Group By Operator @@ -207,14 +207,14 @@ STAGE PLANS: keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 17 Data size: 1581 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 1256 Basic stats: COMPLETE Column stats: NONE Select Operator expressions: _col0 (type: string), _col1 (type: array) outputColumnNames: _col0, _col1 - Statistics: Num rows: 17 Data size: 1581 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 1256 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 17 Data size: 1581 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 17 Data size: 1256 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/udf_bitmap_and.q.out ql/src/test/results/clientpositive/udf_bitmap_and.q.out index 8c93398..520d8b1 100644 --- ql/src/test/results/clientpositive/udf_bitmap_and.q.out +++ ql/src/test/results/clientpositive/udf_bitmap_and.q.out @@ -6,7 +6,7 @@ POSTHOOK: query: select ewah_bitmap_and(array(13,2,4,8589934592,4096,0), array(1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -[13,2,4,8589934592,4096,0] +[0,2,4,13,4096] PREHOOK: query: select ewah_bitmap_and(array(13,2,4,8589934592,4096,0), array(8,2,4,8589934592,128,0)) from src tablesample (1 rows) PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -15,7 +15,7 @@ POSTHOOK: query: select ewah_bitmap_and(array(13,2,4,8589934592,4096,0), array(8 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -[13,1,4,2,0] +[0,2,4] PREHOOK: query: drop table bitmap_test PREHOOK: type: DROPTABLE POSTHOOK: query: drop table bitmap_test @@ -48,16 +48,16 @@ POSTHOOK: query: select ewah_bitmap_and(a,b) from bitmap_test POSTHOOK: type: QUERY POSTHOOK: Input: default@bitmap_test #### A masked pattern was here #### -[13,1,4,2,0] -[13,1,4,2,0] -[13,1,4,2,0] -[13,1,4,2,0] -[13,1,4,2,0] -[13,1,4,2,0] -[13,1,4,2,0] -[13,1,4,2,0] -[13,1,4,2,0] -[13,1,4,2,0] +[0,2,4] +[0,2,4] +[0,2,4] +[0,2,4] +[0,2,4] +[0,2,4] +[0,2,4] +[0,2,4] +[0,2,4] +[0,2,4] PREHOOK: query: drop table bitmap_test PREHOOK: type: DROPTABLE PREHOOK: Input: default@bitmap_test diff --git ql/src/test/results/clientpositive/udf_bitmap_empty.q.out ql/src/test/results/clientpositive/udf_bitmap_empty.q.out index ca96e78..73a1816 100644 --- ql/src/test/results/clientpositive/udf_bitmap_empty.q.out +++ ql/src/test/results/clientpositive/udf_bitmap_empty.q.out @@ -1,8 +1,8 @@ -PREHOOK: query: select ewah_bitmap_empty(array(13,2,4,8589934592,0,0)) from src tablesample (1 rows) +PREHOOK: query: select ewah_bitmap_empty(array()) from src tablesample (1 rows) PREHOOK: type: QUERY PREHOOK: Input: default@src #### A masked pattern was here #### -POSTHOOK: query: select ewah_bitmap_empty(array(13,2,4,8589934592,0,0)) from src tablesample (1 rows) +POSTHOOK: query: select ewah_bitmap_empty(array()) from src tablesample (1 rows) POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### diff --git ql/src/test/results/clientpositive/udf_bitmap_or.q.out ql/src/test/results/clientpositive/udf_bitmap_or.q.out index 43521da..9bfd9f9 100644 --- ql/src/test/results/clientpositive/udf_bitmap_or.q.out +++ ql/src/test/results/clientpositive/udf_bitmap_or.q.out @@ -6,7 +6,7 @@ POSTHOOK: query: select ewah_bitmap_or(array(13,2,4,8589934592,4096,0), array(13 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -[13,2,4,8589934592,4096,0] +[0,2,4,13,4096] PREHOOK: query: select ewah_bitmap_or(array(13,2,4,8589934592,4096,0), array(8,2,4,8589934592,128,0)) from src tablesample (1 rows) PREHOOK: type: QUERY PREHOOK: Input: default@src @@ -15,7 +15,7 @@ POSTHOOK: query: select ewah_bitmap_or(array(13,2,4,8589934592,4096,0), array(8, POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -[13,2,4,8589934592,4224,0] +[0,2,4,8,13,128,4096] PREHOOK: query: drop table bitmap_test PREHOOK: type: DROPTABLE POSTHOOK: query: drop table bitmap_test @@ -48,16 +48,16 @@ POSTHOOK: query: select ewah_bitmap_or(a,b) from bitmap_test POSTHOOK: type: QUERY POSTHOOK: Input: default@bitmap_test #### A masked pattern was here #### -[13,2,4,8589934592,4224,0] -[13,2,4,8589934592,4224,0] -[13,2,4,8589934592,4224,0] -[13,2,4,8589934592,4224,0] -[13,2,4,8589934592,4224,0] -[13,2,4,8589934592,4224,0] -[13,2,4,8589934592,4224,0] -[13,2,4,8589934592,4224,0] -[13,2,4,8589934592,4224,0] -[13,2,4,8589934592,4224,0] +[0,2,4,8,13,128,4096] +[0,2,4,8,13,128,4096] +[0,2,4,8,13,128,4096] +[0,2,4,8,13,128,4096] +[0,2,4,8,13,128,4096] +[0,2,4,8,13,128,4096] +[0,2,4,8,13,128,4096] +[0,2,4,8,13,128,4096] +[0,2,4,8,13,128,4096] +[0,2,4,8,13,128,4096] PREHOOK: query: drop table bitmap_test PREHOOK: type: DROPTABLE PREHOOK: Input: default@bitmap_test