diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java index 63110bb..3e3fe33 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java @@ -308,6 +308,7 @@ public Object terminate(AggregationBuffer agg) throws HiveException { */ private transient PrimitiveObjectInspector inputOI; private transient PrimitiveObjectInspector numVectorsOI; + private final static int MAX_BIT_VECTORS = 1024; /* Partial aggregation result returned by TerminatePartial. Partial result is a struct * containing a long field named "count". @@ -477,6 +478,10 @@ public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveExcep if (!emptyTable) { numVectors = PrimitiveObjectInspectorUtils.getInt(parameters[1], numVectorsOI); } + if (numVectors > MAX_BIT_VECTORS) { + throw new HiveException("The maximum allowed value for number of bit vectors " + + " is " + MAX_BIT_VECTORS + ", but was passed " + numVectors + " bit vectors"); + } initNDVEstimator(myagg, numVectors); myagg.firstItem = false; myagg.numBitVectors = numVectors; @@ -604,6 +609,7 @@ public Object terminate(AggregationBuffer agg) throws HiveException { */ private transient PrimitiveObjectInspector inputOI; private transient PrimitiveObjectInspector numVectorsOI; + private final static int MAX_BIT_VECTORS = 1024; /* Partial aggregation result returned by TerminatePartial. Partial result is a struct * containing a long field named "count". @@ -773,6 +779,12 @@ public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveExcep if (!emptyTable) { numVectors = PrimitiveObjectInspectorUtils.getInt(parameters[1], numVectorsOI); } + + if (numVectors > MAX_BIT_VECTORS) { + throw new HiveException("The maximum allowed value for number of bit vectors " + + " is " + MAX_BIT_VECTORS + ", but was passed " + numVectors + " bit vectors"); + } + initNDVEstimator(myagg, numVectors); myagg.firstItem = false; myagg.numBitVectors = numVectors; @@ -901,6 +913,7 @@ public Object terminate(AggregationBuffer agg) throws HiveException { */ private transient PrimitiveObjectInspector inputOI; private transient PrimitiveObjectInspector numVectorsOI; + private final static int MAX_BIT_VECTORS = 1024; /* Partial aggregation result returned by TerminatePartial. Partial result is a struct * containing a long field named "count". @@ -1081,6 +1094,12 @@ public void iterate(AggregationBuffer agg, Object[] parameters) throws HiveExcep if (!emptyTable) { numVectors = PrimitiveObjectInspectorUtils.getInt(parameters[1], numVectorsOI); } + + if (numVectors > MAX_BIT_VECTORS) { + throw new HiveException("The maximum allowed value for number of bit vectors " + + " is " + MAX_BIT_VECTORS + " , but was passed " + numVectors + " bit vectors"); + } + initNDVEstimator(myagg, numVectors); myagg.firstItem = false; myagg.numBitVectors = numVectors; diff --git ql/src/test/queries/clientnegative/compute_stats_long.q ql/src/test/queries/clientnegative/compute_stats_long.q new file mode 100644 index 0000000..e24f417 --- /dev/null +++ ql/src/test/queries/clientnegative/compute_stats_long.q @@ -0,0 +1,7 @@ +create table tab_int(a int); + +-- insert some data +LOAD DATA LOCAL INPATH "../data/files/int.txt" INTO TABLE tab_int; + +-- compute stats should raise an error since the number of bit vectors > 1024 +select compute_stats(a, 10000) from tab_int; diff --git ql/src/test/results/clientnegative/compute_stats_long.q.out ql/src/test/results/clientnegative/compute_stats_long.q.out new file mode 100644 index 0000000..f4ad813 --- /dev/null +++ ql/src/test/results/clientnegative/compute_stats_long.q.out @@ -0,0 +1,29 @@ +PREHOOK: query: create table tab_int(a int) +PREHOOK: type: CREATETABLE +POSTHOOK: query: create table tab_int(a int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: default@tab_int +PREHOOK: query: -- insert some data +LOAD DATA LOCAL INPATH "../data/files/int.txt" INTO TABLE tab_int +PREHOOK: type: LOAD +PREHOOK: Output: default@tab_int +POSTHOOK: query: -- insert some data +LOAD DATA LOCAL INPATH "../data/files/int.txt" INTO TABLE tab_int +POSTHOOK: type: LOAD +POSTHOOK: Output: default@tab_int +PREHOOK: query: -- compute stats should raise an error since the number of bit vectors > 1024 +select compute_stats(a, 10000) from tab_int +PREHOOK: type: QUERY +PREHOOK: Input: default@tab_int +#### A masked pattern was here #### +Execution failed with exit status: 2 +Obtaining error information + +Task failed! +Task ID: + Stage-1 + +Logs: + +#### A masked pattern was here #### +FAILED: Execution Error, return code 2 from org.apache.hadoop.hive.ql.exec.mr.MapRedTask