diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 97609cf..d2c127b 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -934,7 +934,8 @@ minillaplocal.query.files=\ partialdhj.q,\ stats_date.q,\ dst.q,\ - q93_with_constraints.q + q93_with_constraints.q,\ + approx_distinct.q encrypted.query.files=encryption_join_unencrypted_tbl.q,\ encryption_insert_partition_static.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index 0bc8d84..2c2f451 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -68,6 +68,7 @@ import org.apache.hadoop.hive.ql.udf.UDFHex; import org.apache.hadoop.hive.ql.udf.UDFHour; import org.apache.hadoop.hive.ql.udf.UDFJson; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFApproximateDistinct; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFLength; import org.apache.hadoop.hive.ql.udf.UDFLike; import org.apache.hadoop.hive.ql.udf.UDFLn; @@ -465,6 +466,7 @@ system.registerGenericUDAF("compute_stats", new GenericUDAFComputeStats()); system.registerGenericUDAF("bloom_filter", new GenericUDAFBloomFilter()); + system.registerGenericUDAF("approx_distinct", new GenericUDAFApproximateDistinct()); system.registerUDAF("percentile", UDAFPercentile.class); diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFApproximateDistinct.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFApproximateDistinct.java new file mode 100644 index 0000000..f6e4759 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFApproximateDistinct.java @@ -0,0 +1,218 @@ +package org.apache.hadoop.hive.ql.udf.generic; + +import java.io.ByteArrayInputStream; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.ObjectOutputStream; + +import org.apache.hadoop.hive.common.ndv.hll.HyperLogLog; +import org.apache.hadoop.hive.common.ndv.hll.HyperLogLogUtils; +import org.apache.hadoop.hive.common.type.Date; +import org.apache.hadoop.hive.common.type.HiveBaseChar; +import org.apache.hadoop.hive.common.type.HiveChar; +import org.apache.hadoop.hive.common.type.HiveDecimal; +import org.apache.hadoop.hive.common.type.HiveIntervalDayTime; +import org.apache.hadoop.hive.common.type.HiveVarchar; +import org.apache.hadoop.hive.common.type.Timestamp; +import org.apache.hadoop.hive.ql.exec.Description; +import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.parse.SemanticException; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.AbstractAggregationBuffer; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableBinaryObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.io.BytesWritable; +import org.apache.hadoop.io.LongWritable; + +@Description(name = "approx_distinct", value = "_FUNC_(x) - generate an approximate distinct from input column") +@SuppressWarnings("deprecation") +public class GenericUDAFApproximateDistinct extends AbstractGenericUDAFResolver { + + static final class HyperLogLogBuffer extends AbstractAggregationBuffer { + public HyperLogLog hll; + + public HyperLogLogBuffer() { + this.reset(); + } + + @Override + public int estimate() { + return 4096; /* 4kb usually */ + } + + public void reset() { + hll = HyperLogLog.builder().setNumRegisterIndexBits(12).build(); + } + } + + public static class HyperLogLogEvaluator extends GenericUDAFEvaluator { + + ObjectInspector inputOI; + WritableBinaryObjectInspector partialOI; + ByteArrayOutputStream output = new ByteArrayOutputStream(); + + /* + * All modes returns BINARY columns. + * + * PARTIAL1 takes in a primitive inspector + * + * @see org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator#init(org.apache.hadoop.hive.ql.udf.generic.GenericUDAFEvaluator.Mode, org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector[]) + */ + public ObjectInspector init(Mode m, ObjectInspector[] parameters) + throws HiveException { + super.init(m, parameters); + partialOI = PrimitiveObjectInspectorFactory.writableBinaryObjectInspector; + switch (m) { + case PARTIAL1: + inputOI = parameters[0]; + case PARTIAL2: + return partialOI; + case FINAL: + case COMPLETE: + return partialOI; + default: + throw new IllegalArgumentException("Unknown UDAF mode " + m); + } + } + + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + return new HyperLogLogBuffer(); + } + + @Override + public void iterate(AggregationBuffer agg, Object[] args) + throws HiveException { + if (args[0] == null) { + return; + } + HyperLogLog hll = ((HyperLogLogBuffer)agg).hll; + // should use BinarySortableSerDe, perhaps + Object val = ObjectInspectorUtils.copyToStandardJavaObject(args[0], inputOI); + try { + if (val instanceof Byte || val instanceof Character || val instanceof Short) { + hll.add(val.hashCode()); + } else if (val instanceof Integer) { + hll.addInt(((Integer) val).intValue()); + } else if(val instanceof Long) { + hll.addLong(((Long) val).longValue()); + } else if (val instanceof Float) { + hll.addFloat(((Float) val).floatValue()); + } else if (val instanceof Double) { + hll.addDouble((Double)val); + } else if (val instanceof String) { + hll.addString(val.toString()); + } else if (val instanceof HiveDecimal) { + hll.addToEstimator((HiveDecimal)val); + } else if (val instanceof Date) { + hll.addInt(((Date)val).toEpochDay()); + } else if (val instanceof Timestamp) { + hll.addLong(((Timestamp)val).toEpochMilli()); + } else if (val instanceof HiveIntervalDayTime) { + hll.addLong(((HiveIntervalDayTime)val).getTotalSeconds()); + } else if (val instanceof HiveBaseChar) { + hll.addString(((HiveBaseChar)val).toString()); + } else { + /* potential multi-key option (does this ever get used?) */ + output.reset(); + ObjectOutputStream out = new ObjectOutputStream(output); + out.writeObject(val); + byte[] key = output.toByteArray(); + hll.addBytes(key); + } + } catch(IOException ioe) { + throw new HiveException(ioe); + } + } + + @Override + public Object terminatePartial(AggregationBuffer agg) throws HiveException { + HyperLogLog hll = ((HyperLogLogBuffer)agg).hll; + output.reset(); + try { + HyperLogLogUtils.serializeHLL(output, hll); + } catch(IOException ioe) { + throw new HiveException(ioe); + } + return new BytesWritable(output.toByteArray()); + } + + @Override + public void merge(AggregationBuffer agg, Object partial) throws HiveException { + if (partial == null) { + return; + } + final BytesWritable bw = partialOI.getPrimitiveWritableObject(partial); + HyperLogLog hll = ((HyperLogLogBuffer)agg).hll; + merge(hll, bw); + } + + protected void merge(HyperLogLog hll, BytesWritable bw) throws HiveException { + try { + ByteArrayInputStream input = new ByteArrayInputStream(bw.getBytes(), 0, bw.getLength()); + HyperLogLog hll2 = HyperLogLogUtils.deserializeHLL(input); + hll.merge(hll2); + input.close(); + } catch (IOException ioe) { + throw new HiveException(ioe); + } + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + ((HyperLogLogBuffer)agg).reset(); + } + + @Override + public Object terminate(AggregationBuffer agg) throws HiveException { + HyperLogLog hll = ((HyperLogLogBuffer)agg).hll; + output.reset(); + try { + HyperLogLogUtils.serializeHLL(output, hll); + } catch(IOException ioe) { + throw new HiveException(ioe); + } + return new BytesWritable(output.toByteArray()); + } + } + + @Override + public GenericUDAFEvaluator getEvaluator(GenericUDAFParameterInfo info) + throws SemanticException { + return getEvaluator(info.getParameters()); + } + + public static final class CountApproximateDistinctEvaluator extends HyperLogLogEvaluator { + @Override + public ObjectInspector init(Mode m, ObjectInspector[] parameters) + throws HiveException { + ObjectInspector hyperloglog = super.init(m, parameters); + if(m == Mode.FINAL || m == Mode.COMPLETE) { + return PrimitiveObjectInspectorFactory.writableLongObjectInspector; + } + return hyperloglog; + } + + @Override + public Object terminate(AggregationBuffer agg) throws HiveException { + HyperLogLog hll = ((HyperLogLogBuffer)agg).hll; + return new LongWritable(hll.count()); + } + } + + @Override + public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) throws SemanticException { + if (parameters.length != 1) { + throw new IllegalArgumentException("Function only takes 1 parameter"); + } else if (parameters[0].getCategory() != ObjectInspector.Category.PRIMITIVE + && parameters[0].getCategory() != ObjectInspector.Category.STRUCT) { + throw new UDFArgumentTypeException(1, + "Only primitive/struct rows are accepted but " + + parameters[0].getTypeName() + " was passed."); + } + return new CountApproximateDistinctEvaluator(); + } +} diff --git ql/src/test/queries/clientpositive/approx_distinct.q ql/src/test/queries/clientpositive/approx_distinct.q new file mode 100644 index 0000000..6ab4875 --- /dev/null +++ ql/src/test/queries/clientpositive/approx_distinct.q @@ -0,0 +1,76 @@ + + +create temporary table random_types_table ( +cboolean boolean, +cint int, +cbigint bigint, +cfloat float, +cdouble double, +cdecimal64 decimal(7,2), +cdecimal128 decimal(38,18), +cdate date, +ctimestamp timestamp, +cstring string, +cvarchar varchar(3), +cchar char(3) +) stored as orc; + +select 'expect 1', approx_distinct(1); +select 'expect 1', approx_distinct(1.0); +select 'expect 1', approx_distinct(false); +select 'expect 1', approx_distinct('X'); +select 'expect 1', approx_distinct(current_date); +select 'expect 1', approx_distinct(current_timestamp); +select 'expect 1', approx_distinct(1.0BD); +select 'expect 1', approx_distinct(INTERVAL '1' DAY); + +-- No rows (all 0) + +select 'expect 0', approx_distinct(cboolean) from random_types_table; +select 'expect 0', approx_distinct(cint) from random_types_table; +select 'expect 0', approx_distinct(cbigint) from random_types_table; +select 'expect 0', approx_distinct(cfloat) from random_types_table; +select 'expect 0', approx_distinct(cdouble) from random_types_table; +select 'expect 0', approx_distinct(cdecimal64) from random_types_table; +select 'expect 0', approx_distinct(cdecimal128) from random_types_table; +select 'expect 0', approx_distinct(cdate) from random_types_table; +select 'expect 0', approx_distinct(ctimestamp) from random_types_table; +select 'expect 0', approx_distinct(cstring) from random_types_table; +select 'expect 0', approx_distinct(cvarchar) from random_types_table; +select 'expect 0', approx_distinct(cchar) from random_types_table; + +-- 1 row twice (all 1) + +insert into random_types_table values (true, 1, 1, 1.0, 1.0, 1.0BD, 1.0BD, '2000-01-01', '2000-01-01 00:00:01', 'A', 'B', 'C'); +insert into random_types_table values (true, 1, 1, 1.0, 1.0, 1.0BD, 1.0BD, '2000-01-01', '2000-01-01 00:00:01', 'A', 'B', 'C'); + +select 'expect 1', approx_distinct(cboolean) from random_types_table; +select 'expect 1', approx_distinct(cint) from random_types_table; +select 'expect 1', approx_distinct(cbigint) from random_types_table; +select 'expect 1', approx_distinct(cfloat) from random_types_table; +select 'expect 1', approx_distinct(cdouble) from random_types_table; +select 'expect 1', approx_distinct(cdecimal64) from random_types_table; +select 'expect 1', approx_distinct(cdecimal128) from random_types_table; +select 'expect 1', approx_distinct(cdate) from random_types_table; +select 'expect 1', approx_distinct(ctimestamp) from random_types_table; +select 'expect 1', approx_distinct(cstring) from random_types_table; +select 'expect 1', approx_distinct(cvarchar) from random_types_table; +select 'expect 1', approx_distinct(cchar) from random_types_table; + + + +insert into random_types_table values (false, 2, 2, 2.0, 2.0, 2.0BD, 2.0BD, '1999-12-31', '1999-12-31 00:00:01', 'X', 'Y', 'Z'); + +-- 2 unique rows (all 2) +select 'expect 2', approx_distinct(cboolean) from random_types_table; +select 'expect 2', approx_distinct(cint) from random_types_table; +select 'expect 2', approx_distinct(cbigint) from random_types_table; +select 'expect 2', approx_distinct(cfloat) from random_types_table; +select 'expect 2', approx_distinct(cdouble) from random_types_table; +select 'expect 2', approx_distinct(cdecimal64) from random_types_table; +select 'expect 2', approx_distinct(cdecimal128) from random_types_table; +select 'expect 2', approx_distinct(cdate) from random_types_table; +select 'expect 2', approx_distinct(ctimestamp) from random_types_table; +select 'expect 2', approx_distinct(cstring) from random_types_table; +select 'expect 2', approx_distinct(cvarchar) from random_types_table; +select 'expect 2', approx_distinct(cchar) from random_types_table; diff --git ql/src/test/results/clientpositive/llap/approx_distinct.q.out ql/src/test/results/clientpositive/llap/approx_distinct.q.out new file mode 100644 index 0000000..e727819 --- /dev/null +++ ql/src/test/results/clientpositive/llap/approx_distinct.q.out @@ -0,0 +1,490 @@ +PREHOOK: query: create temporary table random_types_table ( +cboolean boolean, +cint int, +cbigint bigint, +cfloat float, +cdouble double, +cdecimal64 decimal(7,2), +cdecimal128 decimal(38,18), +cdate date, +ctimestamp timestamp, +cstring string, +cvarchar varchar(3), +cchar char(3) +) stored as orc +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@random_types_table +POSTHOOK: query: create temporary table random_types_table ( +cboolean boolean, +cint int, +cbigint bigint, +cfloat float, +cdouble double, +cdecimal64 decimal(7,2), +cdecimal128 decimal(38,18), +cdate date, +ctimestamp timestamp, +cstring string, +cvarchar varchar(3), +cchar char(3) +) stored as orc +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@random_types_table +PREHOOK: query: select 'expect 1', approx_distinct(1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 1', approx_distinct(1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +expect 1 1 +PREHOOK: query: select 'expect 1', approx_distinct(1.0) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 1', approx_distinct(1.0) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +expect 1 1 +PREHOOK: query: select 'expect 1', approx_distinct(false) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 1', approx_distinct(false) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +expect 1 1 +PREHOOK: query: select 'expect 1', approx_distinct('X') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 1', approx_distinct('X') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +expect 1 1 +PREHOOK: query: select 'expect 1', approx_distinct(current_date) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 1', approx_distinct(current_date) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +expect 1 1 +PREHOOK: query: select 'expect 1', approx_distinct(current_timestamp) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 1', approx_distinct(current_timestamp) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +expect 1 1 +PREHOOK: query: select 'expect 1', approx_distinct(1.0BD) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 1', approx_distinct(1.0BD) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +expect 1 1 +PREHOOK: query: select 'expect 1', approx_distinct(INTERVAL '1' DAY) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 1', approx_distinct(INTERVAL '1' DAY) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +#### A masked pattern was here #### +expect 1 1 +PREHOOK: query: select 'expect 0', approx_distinct(cboolean) from random_types_table +PREHOOK: type: QUERY +PREHOOK: Input: default@random_types_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 0', approx_distinct(cboolean) from random_types_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@random_types_table +#### A masked pattern was here #### +expect 0 0 +PREHOOK: query: select 'expect 0', approx_distinct(cint) from random_types_table +PREHOOK: type: QUERY +PREHOOK: Input: default@random_types_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 0', approx_distinct(cint) from random_types_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@random_types_table +#### A masked pattern was here #### +expect 0 0 +PREHOOK: query: select 'expect 0', approx_distinct(cbigint) from random_types_table +PREHOOK: type: QUERY +PREHOOK: Input: default@random_types_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 0', approx_distinct(cbigint) from random_types_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@random_types_table +#### A masked pattern was here #### +expect 0 0 +PREHOOK: query: select 'expect 0', approx_distinct(cfloat) from random_types_table +PREHOOK: type: QUERY +PREHOOK: Input: default@random_types_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 0', approx_distinct(cfloat) from random_types_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@random_types_table +#### A masked pattern was here #### +expect 0 0 +PREHOOK: query: select 'expect 0', approx_distinct(cdouble) from random_types_table +PREHOOK: type: QUERY +PREHOOK: Input: default@random_types_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 0', approx_distinct(cdouble) from random_types_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@random_types_table +#### A masked pattern was here #### +expect 0 0 +PREHOOK: query: select 'expect 0', approx_distinct(cdecimal64) from random_types_table +PREHOOK: type: QUERY +PREHOOK: Input: default@random_types_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 0', approx_distinct(cdecimal64) from random_types_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@random_types_table +#### A masked pattern was here #### +expect 0 0 +PREHOOK: query: select 'expect 0', approx_distinct(cdecimal128) from random_types_table +PREHOOK: type: QUERY +PREHOOK: Input: default@random_types_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 0', approx_distinct(cdecimal128) from random_types_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@random_types_table +#### A masked pattern was here #### +expect 0 0 +PREHOOK: query: select 'expect 0', approx_distinct(cdate) from random_types_table +PREHOOK: type: QUERY +PREHOOK: Input: default@random_types_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 0', approx_distinct(cdate) from random_types_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@random_types_table +#### A masked pattern was here #### +expect 0 0 +PREHOOK: query: select 'expect 0', approx_distinct(ctimestamp) from random_types_table +PREHOOK: type: QUERY +PREHOOK: Input: default@random_types_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 0', approx_distinct(ctimestamp) from random_types_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@random_types_table +#### A masked pattern was here #### +expect 0 0 +PREHOOK: query: select 'expect 0', approx_distinct(cstring) from random_types_table +PREHOOK: type: QUERY +PREHOOK: Input: default@random_types_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 0', approx_distinct(cstring) from random_types_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@random_types_table +#### A masked pattern was here #### +expect 0 0 +PREHOOK: query: select 'expect 0', approx_distinct(cvarchar) from random_types_table +PREHOOK: type: QUERY +PREHOOK: Input: default@random_types_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 0', approx_distinct(cvarchar) from random_types_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@random_types_table +#### A masked pattern was here #### +expect 0 0 +PREHOOK: query: select 'expect 0', approx_distinct(cchar) from random_types_table +PREHOOK: type: QUERY +PREHOOK: Input: default@random_types_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 0', approx_distinct(cchar) from random_types_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@random_types_table +#### A masked pattern was here #### +expect 0 0 +PREHOOK: query: insert into random_types_table values (true, 1, 1, 1.0, 1.0, 1.0BD, 1.0BD, '2000-01-01', '2000-01-01 00:00:01', 'A', 'B', 'C') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@random_types_table +POSTHOOK: query: insert into random_types_table values (true, 1, 1, 1.0, 1.0, 1.0BD, 1.0BD, '2000-01-01', '2000-01-01 00:00:01', 'A', 'B', 'C') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@random_types_table +POSTHOOK: Lineage: random_types_table.cbigint SCRIPT [] +POSTHOOK: Lineage: random_types_table.cboolean SCRIPT [] +POSTHOOK: Lineage: random_types_table.cchar SCRIPT [] +POSTHOOK: Lineage: random_types_table.cdate SCRIPT [] +POSTHOOK: Lineage: random_types_table.cdecimal128 SCRIPT [] +POSTHOOK: Lineage: random_types_table.cdecimal64 SCRIPT [] +POSTHOOK: Lineage: random_types_table.cdouble SCRIPT [] +POSTHOOK: Lineage: random_types_table.cfloat SCRIPT [] +POSTHOOK: Lineage: random_types_table.cint SCRIPT [] +POSTHOOK: Lineage: random_types_table.cstring SCRIPT [] +POSTHOOK: Lineage: random_types_table.ctimestamp SCRIPT [] +POSTHOOK: Lineage: random_types_table.cvarchar SCRIPT [] +PREHOOK: query: insert into random_types_table values (true, 1, 1, 1.0, 1.0, 1.0BD, 1.0BD, '2000-01-01', '2000-01-01 00:00:01', 'A', 'B', 'C') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@random_types_table +POSTHOOK: query: insert into random_types_table values (true, 1, 1, 1.0, 1.0, 1.0BD, 1.0BD, '2000-01-01', '2000-01-01 00:00:01', 'A', 'B', 'C') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@random_types_table +POSTHOOK: Lineage: random_types_table.cbigint SCRIPT [] +POSTHOOK: Lineage: random_types_table.cboolean SCRIPT [] +POSTHOOK: Lineage: random_types_table.cchar SCRIPT [] +POSTHOOK: Lineage: random_types_table.cdate SCRIPT [] +POSTHOOK: Lineage: random_types_table.cdecimal128 SCRIPT [] +POSTHOOK: Lineage: random_types_table.cdecimal64 SCRIPT [] +POSTHOOK: Lineage: random_types_table.cdouble SCRIPT [] +POSTHOOK: Lineage: random_types_table.cfloat SCRIPT [] +POSTHOOK: Lineage: random_types_table.cint SCRIPT [] +POSTHOOK: Lineage: random_types_table.cstring SCRIPT [] +POSTHOOK: Lineage: random_types_table.ctimestamp SCRIPT [] +POSTHOOK: Lineage: random_types_table.cvarchar SCRIPT [] +PREHOOK: query: select 'expect 1', approx_distinct(cboolean) from random_types_table +PREHOOK: type: QUERY +PREHOOK: Input: default@random_types_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 1', approx_distinct(cboolean) from random_types_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@random_types_table +#### A masked pattern was here #### +expect 1 1 +PREHOOK: query: select 'expect 1', approx_distinct(cint) from random_types_table +PREHOOK: type: QUERY +PREHOOK: Input: default@random_types_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 1', approx_distinct(cint) from random_types_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@random_types_table +#### A masked pattern was here #### +expect 1 1 +PREHOOK: query: select 'expect 1', approx_distinct(cbigint) from random_types_table +PREHOOK: type: QUERY +PREHOOK: Input: default@random_types_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 1', approx_distinct(cbigint) from random_types_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@random_types_table +#### A masked pattern was here #### +expect 1 1 +PREHOOK: query: select 'expect 1', approx_distinct(cfloat) from random_types_table +PREHOOK: type: QUERY +PREHOOK: Input: default@random_types_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 1', approx_distinct(cfloat) from random_types_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@random_types_table +#### A masked pattern was here #### +expect 1 1 +PREHOOK: query: select 'expect 1', approx_distinct(cdouble) from random_types_table +PREHOOK: type: QUERY +PREHOOK: Input: default@random_types_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 1', approx_distinct(cdouble) from random_types_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@random_types_table +#### A masked pattern was here #### +expect 1 1 +PREHOOK: query: select 'expect 1', approx_distinct(cdecimal64) from random_types_table +PREHOOK: type: QUERY +PREHOOK: Input: default@random_types_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 1', approx_distinct(cdecimal64) from random_types_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@random_types_table +#### A masked pattern was here #### +expect 1 1 +PREHOOK: query: select 'expect 1', approx_distinct(cdecimal128) from random_types_table +PREHOOK: type: QUERY +PREHOOK: Input: default@random_types_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 1', approx_distinct(cdecimal128) from random_types_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@random_types_table +#### A masked pattern was here #### +expect 1 1 +PREHOOK: query: select 'expect 1', approx_distinct(cdate) from random_types_table +PREHOOK: type: QUERY +PREHOOK: Input: default@random_types_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 1', approx_distinct(cdate) from random_types_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@random_types_table +#### A masked pattern was here #### +expect 1 1 +PREHOOK: query: select 'expect 1', approx_distinct(ctimestamp) from random_types_table +PREHOOK: type: QUERY +PREHOOK: Input: default@random_types_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 1', approx_distinct(ctimestamp) from random_types_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@random_types_table +#### A masked pattern was here #### +expect 1 1 +PREHOOK: query: select 'expect 1', approx_distinct(cstring) from random_types_table +PREHOOK: type: QUERY +PREHOOK: Input: default@random_types_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 1', approx_distinct(cstring) from random_types_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@random_types_table +#### A masked pattern was here #### +expect 1 1 +PREHOOK: query: select 'expect 1', approx_distinct(cvarchar) from random_types_table +PREHOOK: type: QUERY +PREHOOK: Input: default@random_types_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 1', approx_distinct(cvarchar) from random_types_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@random_types_table +#### A masked pattern was here #### +expect 1 1 +PREHOOK: query: select 'expect 1', approx_distinct(cchar) from random_types_table +PREHOOK: type: QUERY +PREHOOK: Input: default@random_types_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 1', approx_distinct(cchar) from random_types_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@random_types_table +#### A masked pattern was here #### +expect 1 1 +PREHOOK: query: insert into random_types_table values (false, 2, 2, 2.0, 2.0, 2.0BD, 2.0BD, '1999-12-31', '1999-12-31 00:00:01', 'X', 'Y', 'Z') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@random_types_table +POSTHOOK: query: insert into random_types_table values (false, 2, 2, 2.0, 2.0, 2.0BD, 2.0BD, '1999-12-31', '1999-12-31 00:00:01', 'X', 'Y', 'Z') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@random_types_table +POSTHOOK: Lineage: random_types_table.cbigint SCRIPT [] +POSTHOOK: Lineage: random_types_table.cboolean SCRIPT [] +POSTHOOK: Lineage: random_types_table.cchar SCRIPT [] +POSTHOOK: Lineage: random_types_table.cdate SCRIPT [] +POSTHOOK: Lineage: random_types_table.cdecimal128 SCRIPT [] +POSTHOOK: Lineage: random_types_table.cdecimal64 SCRIPT [] +POSTHOOK: Lineage: random_types_table.cdouble SCRIPT [] +POSTHOOK: Lineage: random_types_table.cfloat SCRIPT [] +POSTHOOK: Lineage: random_types_table.cint SCRIPT [] +POSTHOOK: Lineage: random_types_table.cstring SCRIPT [] +POSTHOOK: Lineage: random_types_table.ctimestamp SCRIPT [] +POSTHOOK: Lineage: random_types_table.cvarchar SCRIPT [] +PREHOOK: query: select 'expect 2', approx_distinct(cboolean) from random_types_table +PREHOOK: type: QUERY +PREHOOK: Input: default@random_types_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 2', approx_distinct(cboolean) from random_types_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@random_types_table +#### A masked pattern was here #### +expect 2 2 +PREHOOK: query: select 'expect 2', approx_distinct(cint) from random_types_table +PREHOOK: type: QUERY +PREHOOK: Input: default@random_types_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 2', approx_distinct(cint) from random_types_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@random_types_table +#### A masked pattern was here #### +expect 2 2 +PREHOOK: query: select 'expect 2', approx_distinct(cbigint) from random_types_table +PREHOOK: type: QUERY +PREHOOK: Input: default@random_types_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 2', approx_distinct(cbigint) from random_types_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@random_types_table +#### A masked pattern was here #### +expect 2 2 +PREHOOK: query: select 'expect 2', approx_distinct(cfloat) from random_types_table +PREHOOK: type: QUERY +PREHOOK: Input: default@random_types_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 2', approx_distinct(cfloat) from random_types_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@random_types_table +#### A masked pattern was here #### +expect 2 2 +PREHOOK: query: select 'expect 2', approx_distinct(cdouble) from random_types_table +PREHOOK: type: QUERY +PREHOOK: Input: default@random_types_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 2', approx_distinct(cdouble) from random_types_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@random_types_table +#### A masked pattern was here #### +expect 2 2 +PREHOOK: query: select 'expect 2', approx_distinct(cdecimal64) from random_types_table +PREHOOK: type: QUERY +PREHOOK: Input: default@random_types_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 2', approx_distinct(cdecimal64) from random_types_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@random_types_table +#### A masked pattern was here #### +expect 2 2 +PREHOOK: query: select 'expect 2', approx_distinct(cdecimal128) from random_types_table +PREHOOK: type: QUERY +PREHOOK: Input: default@random_types_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 2', approx_distinct(cdecimal128) from random_types_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@random_types_table +#### A masked pattern was here #### +expect 2 2 +PREHOOK: query: select 'expect 2', approx_distinct(cdate) from random_types_table +PREHOOK: type: QUERY +PREHOOK: Input: default@random_types_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 2', approx_distinct(cdate) from random_types_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@random_types_table +#### A masked pattern was here #### +expect 2 2 +PREHOOK: query: select 'expect 2', approx_distinct(ctimestamp) from random_types_table +PREHOOK: type: QUERY +PREHOOK: Input: default@random_types_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 2', approx_distinct(ctimestamp) from random_types_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@random_types_table +#### A masked pattern was here #### +expect 2 2 +PREHOOK: query: select 'expect 2', approx_distinct(cstring) from random_types_table +PREHOOK: type: QUERY +PREHOOK: Input: default@random_types_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 2', approx_distinct(cstring) from random_types_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@random_types_table +#### A masked pattern was here #### +expect 2 2 +PREHOOK: query: select 'expect 2', approx_distinct(cvarchar) from random_types_table +PREHOOK: type: QUERY +PREHOOK: Input: default@random_types_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 2', approx_distinct(cvarchar) from random_types_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@random_types_table +#### A masked pattern was here #### +expect 2 2 +PREHOOK: query: select 'expect 2', approx_distinct(cchar) from random_types_table +PREHOOK: type: QUERY +PREHOOK: Input: default@random_types_table +#### A masked pattern was here #### +POSTHOOK: query: select 'expect 2', approx_distinct(cchar) from random_types_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@random_types_table +#### A masked pattern was here #### +expect 2 2