diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 42bc5df..118e1f6 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -813,6 +813,8 @@ schq_materialized.q,\ schq_analyze.q,\ schq_ingest.q,\ + sketches_hll.q,\ + sketches_theta.q,\ table_access_keys_stats.q,\ temp_table_llap_partitioned.q,\ tez_bmj_schema_evolution.q,\ diff --git pom.xml pom.xml index a005f35..165b526 100644 --- pom.xml +++ pom.xml @@ -225,6 +225,7 @@ 2.2.4 1.2 2.0.1 + 1.0.0-incubating diff --git ql/pom.xml ql/pom.xml index 8b0c02b..efad8b2 100644 --- ql/pom.xml +++ ql/pom.xml @@ -309,6 +309,11 @@ test + org.apache.datasketches + datasketches-hive + ${datasketches.version} + + com.lmax disruptor ${disruptor.version} @@ -987,6 +992,7 @@ io.dropwizard.metrics:metrics-jvm io.dropwizard.metrics:metrics-json com.zaxxer:HikariCP + org.apache.datasketches:* @@ -1014,6 +1020,10 @@ com.google.guava org.apache.hive.com.google.guava + + org.apache.datasketches + org.apache.hive.org.apache.datasketches + diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/DataSketchesFunctions.java ql/src/java/org/apache/hadoop/hive/ql/exec/DataSketchesFunctions.java new file mode 100644 index 0000000..6522d9c --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DataSketchesFunctions.java @@ -0,0 +1,218 @@ +package org.apache.hadoop.hive.ql.exec; + +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFResolver2; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; + +public class DataSketchesFunctions { + + private static final String DATA_TO_SKETCH = "datatosketch"; + private static final String SKETCH_TO_ESTIMATE_WITH_ERROR_BOUNDS = "sketchToEstimateWithErrorBounds"; + // FIXME: consider to rename it to simply "estimate" or "evaluate" - in case of the counting sketches the "sketchto..." doesnt add value + private static final String SKETCH_TO_ESTIMATE = "sketchToEstimate"; + private static final String SKETCH_TO_STRING = "sketchToString"; + // FIXME: probably use simply "union" instead unionSketch? + private static final String UNION_SKETCH = "unionSketch"; + private static final String GET_N = "getN"; + private static final String GET_CDF = "getCdf"; + private static final String GET_PMF = "getPmf"; + private static final String GET_QUANTILES = "GetQuantiles"; + private static final String GET_QUANTILE = "GetQuantile"; + private static final String GET_RANK = "getRank"; + private static final String INTERSECT_SKETCH = "intersection"; + private static final String EXCLUDE_SKETCH = "exclude"; + private static final String GET_K = "getK"; + private static final String GET_FREQUENT_ITEMS = "getFrequentItems"; + private static final String T_TEST = "TTest"; + private static final String SKETCH_TO_MEANS = "sketchtomeans"; + private static final String SKETCH_TO_NUMBER_OF_RETAINED_ENTRIES = "sketchtonumberofretainedentries"; + private static final String SKETCH_TO_QUANTILES_SKETCH = "sketchToQuantilesSketch"; + private static final String SKETCH_TO_VALUES = "sketchToValues"; + private static final String SKETCH_TO_VARIANCES = "sketchToVariances"; + private static final String SKETCH_TO_PERCENTILE = "sketchToPercentile"; + private static final String UNION_SKETCH1 = "unionSketch1"; + private static final String INTERSECT_SKETCH1 = "intersect"; + + private final Registry system; + + public DataSketchesFunctions(Registry system) { + this.system = system; + } + + public static void register(Registry system) { + DataSketchesFunctions dsf = new DataSketchesFunctions(system); + // FIXME: what this should be approx, ds ... other? + String prefix = "ds"; + dsf.registerHll(prefix); + dsf.registerCpc(prefix); + dsf.registerKll(prefix); + dsf.registerTheta(prefix); + dsf.registerTuple(prefix); + dsf.registerQuantiles(prefix); + dsf.registerFrequencies(prefix); + } + + private void registerHll(String prefix) { + String p = prefix + "_hll_"; + registerUDAF(org.apache.datasketches.hive.hll.DataToSketchUDAF.class, p + DATA_TO_SKETCH); + registerUDF(org.apache.datasketches.hive.hll.SketchToEstimateAndErrorBoundsUDF.class, + p + SKETCH_TO_ESTIMATE_WITH_ERROR_BOUNDS); + registerUDF(org.apache.datasketches.hive.hll.SketchToEstimateUDF.class, p + SKETCH_TO_ESTIMATE); + registerUDF(org.apache.datasketches.hive.hll.SketchToStringUDF.class, p + SKETCH_TO_STRING); + registerUDF(org.apache.datasketches.hive.hll.UnionSketchUDF.class, p + UNION_SKETCH1); + registerUDAF(org.apache.datasketches.hive.hll.UnionSketchUDAF.class, p + UNION_SKETCH); + } + + private void registerCpc(String prefix) { + String p = prefix + "_cpc_"; + registerUDAF(org.apache.datasketches.hive.cpc.DataToSketchUDAF.class, p + DATA_TO_SKETCH); + // FIXME: normalize GetEstimateAndErrorBoundsUDF vs SketchToEstimateAndErrorBoundsUDF + registerUDF(org.apache.datasketches.hive.cpc.GetEstimateAndErrorBoundsUDF.class, + p + SKETCH_TO_ESTIMATE_WITH_ERROR_BOUNDS); + // FIXME: normalize GetEstimateUDF vs SketchToEstimateUDF + registerUDF(org.apache.datasketches.hive.cpc.GetEstimateUDF.class, p + SKETCH_TO_ESTIMATE); + registerUDF(org.apache.datasketches.hive.cpc.SketchToStringUDF.class, p + SKETCH_TO_STRING); + registerUDF(org.apache.datasketches.hive.cpc.UnionSketchUDF.class, p + UNION_SKETCH1); + registerUDAF(org.apache.datasketches.hive.cpc.UnionSketchUDAF.class, p + UNION_SKETCH); + } + + private void registerKll(String prefix) { + String p = prefix + "_kll_"; + registerUDAF(org.apache.datasketches.hive.kll.DataToSketchUDAF.class, p + DATA_TO_SKETCH); + registerUDF(org.apache.datasketches.hive.kll.SketchToStringUDF.class, p + SKETCH_TO_STRING); + // registerUDF(org.apache.datasketches.hive.kll.UnionSketchUDF.class, p + UNION_SKETCH); + registerUDAF(org.apache.datasketches.hive.kll.UnionSketchUDAF.class, p + UNION_SKETCH); + + registerUDF(org.apache.datasketches.hive.kll.GetNUDF.class, p + GET_N); + registerUDF(org.apache.datasketches.hive.kll.GetCdfUDF.class, p + GET_CDF); + registerUDF(org.apache.datasketches.hive.kll.GetPmfUDF.class, p + GET_PMF); + registerUDF(org.apache.datasketches.hive.kll.GetQuantilesUDF.class, p + GET_QUANTILES); + registerUDF(org.apache.datasketches.hive.kll.GetQuantileUDF.class, p + GET_QUANTILE); + registerUDF(org.apache.datasketches.hive.kll.GetRankUDF.class, p + GET_RANK); + } + + private void registerTheta(String prefix) { + String p = prefix + "_theta_"; + registerUDAF(org.apache.datasketches.hive.theta.DataToSketchUDAF.class, p + DATA_TO_SKETCH); + // FIXME: missing? + //registerUDF(org.apache.datasketches.hive.theta.SketchToStringUDF.class, p + SKETCH_TO_STRING); + registerUDF(org.apache.datasketches.hive.theta.UnionSketchUDF.class, p + UNION_SKETCH1); + registerUDAF(org.apache.datasketches.hive.theta.UnionSketchUDAF.class, p + UNION_SKETCH); + registerUDF(org.apache.datasketches.hive.theta.IntersectSketchUDF.class, p + INTERSECT_SKETCH1); + registerUDAF(org.apache.datasketches.hive.theta.IntersectSketchUDAF.class, p + INTERSECT_SKETCH); + registerUDF(org.apache.datasketches.hive.theta.EstimateSketchUDF.class, p + SKETCH_TO_ESTIMATE); + registerUDF(org.apache.datasketches.hive.theta.ExcludeSketchUDF.class, p + EXCLUDE_SKETCH); + + } + + private void registerTuple(String prefix) { + registerTupleArrayOfDoubles(prefix + "_tuple_arrayofdouble"); + registerTupleDoubleSummary(prefix + "_tuple_doublesummary"); + } + + private void registerTupleArrayOfDoubles(String string) { + String p = string + "_"; + registerUDAF(org.apache.datasketches.hive.tuple.DataToArrayOfDoublesSketchUDAF.class, p + DATA_TO_SKETCH); + // FIXME: missing? + //registerUDF(org.apache.datasketches.hive.theta.SketchToStringUDF.class, p + SKETCH_TO_STRING); + registerUDAF(org.apache.datasketches.hive.tuple.UnionArrayOfDoublesSketchUDAF.class, p + UNION_SKETCH); + registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchesTTestUDF.class, p + T_TEST); + registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToEstimatesUDF.class, p + SKETCH_TO_ESTIMATE); + registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToEstimateAndErrorBoundsUDF.class, + p + SKETCH_TO_ESTIMATE_WITH_ERROR_BOUNDS); + registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToMeansUDF.class, p + SKETCH_TO_MEANS); + registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToNumberOfRetainedEntriesUDF.class, + p + SKETCH_TO_NUMBER_OF_RETAINED_ENTRIES); + registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToQuantilesSketchUDF.class, + p + SKETCH_TO_QUANTILES_SKETCH); + registerUDTF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToValuesUDTF.class, p + SKETCH_TO_VALUES); + registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToVariancesUDF.class, p + SKETCH_TO_VARIANCES); + } + + private void registerTupleDoubleSummary(String string) { + String p = string + "_"; + registerUDAF(org.apache.datasketches.hive.tuple.DataToDoubleSummarySketchUDAF.class, p + DATA_TO_SKETCH); + // FIXME: missing? + //registerUDF(org.apache.datasketches.hive.theta.SketchToStringUDF.class, p + SKETCH_TO_STRING); + registerUDAF(org.apache.datasketches.hive.tuple.UnionDoubleSummarySketchUDAF.class, p + UNION_SKETCH); + registerUDF(org.apache.datasketches.hive.tuple.DoubleSummarySketchToEstimatesUDF.class, p + SKETCH_TO_ESTIMATE); + registerUDF(org.apache.datasketches.hive.tuple.DoubleSummarySketchToPercentileUDF.class, p + SKETCH_TO_PERCENTILE); + } + + private void registerQuantiles(String prefix) { + registerQuantilesString(prefix + "_quantile"); + registerQuantilesDoubles(prefix + "_quantile"); + } + + private void registerFrequencies(String prefix) { + String p = prefix + "_"; + registerUDAF(org.apache.datasketches.hive.frequencies.DataToStringsSketchUDAF.class, p + DATA_TO_SKETCH); + // FIXME: missing? + //registerUDF(org.apache.datasketches.hive.frequencies.DoublesSketchToStringUDF.class, p + SKETCH_TO_STRING); + //registerUDF(org.apache.datasketches.hive.quantiles.UnionItemsSketchUDAF.class, p + UNION_SKETCH); + registerUDAF(org.apache.datasketches.hive.frequencies.UnionStringsSketchUDAF.class, p + UNION_SKETCH); + registerUDTF(org.apache.datasketches.hive.frequencies.GetFrequentItemsFromStringsSketchUDTF.class, + prefix + GET_FREQUENT_ITEMS); + } + + private void registerQuantilesString(String prefix) { + String p = prefix + "_strings_"; + registerUDAF(org.apache.datasketches.hive.quantiles.DataToStringsSketchUDAF.class, p + DATA_TO_SKETCH); + registerUDF(org.apache.datasketches.hive.quantiles.StringsSketchToStringUDF.class, p + SKETCH_TO_STRING); + //registerUDF(org.apache.datasketches.hive.quantiles.UnionItemsSketchUDAF.class, p + UNION_SKETCH); + registerUDAF(org.apache.datasketches.hive.quantiles.UnionStringsSketchUDAF.class, p + UNION_SKETCH); + registerUDF(org.apache.datasketches.hive.quantiles.GetNFromStringsSketchUDF.class, p + GET_N); + registerUDF(org.apache.datasketches.hive.quantiles.GetKFromStringsSketchUDF.class, p + GET_K); + registerUDF(org.apache.datasketches.hive.quantiles.GetCdfFromStringsSketchUDF.class, p + GET_CDF); + registerUDF(org.apache.datasketches.hive.quantiles.GetPmfFromStringsSketchUDF.class, p + GET_PMF); + registerUDF(org.apache.datasketches.hive.quantiles.GetQuantileFromStringsSketchUDF.class, p + GET_QUANTILE); + registerUDF(org.apache.datasketches.hive.quantiles.GetQuantilesFromStringsSketchUDF.class, p + GET_QUANTILES); + } + + private void registerQuantilesDoubles(String prefix) { + String p = prefix + "_doubles_"; + registerUDAF(org.apache.datasketches.hive.quantiles.DataToDoublesSketchUDAF.class, p + DATA_TO_SKETCH); + registerUDF(org.apache.datasketches.hive.quantiles.DoublesSketchToStringUDF.class, p + SKETCH_TO_STRING); + //registerUDF(org.apache.datasketches.hive.quantiles.UnionItemsSketchUDAF.class, p + UNION_SKETCH); + registerUDAF(org.apache.datasketches.hive.quantiles.UnionDoublesSketchUDAF.class, p + UNION_SKETCH); + registerUDF(org.apache.datasketches.hive.quantiles.GetNFromDoublesSketchUDF.class, p + GET_N); + registerUDF(org.apache.datasketches.hive.quantiles.GetKFromDoublesSketchUDF.class, p + GET_K); + registerUDF(org.apache.datasketches.hive.quantiles.GetCdfFromDoublesSketchUDF.class, p + GET_CDF); + registerUDF(org.apache.datasketches.hive.quantiles.GetPmfFromDoublesSketchUDF.class, p + GET_PMF); + registerUDF(org.apache.datasketches.hive.quantiles.GetQuantileFromDoublesSketchUDF.class, p + GET_QUANTILE); + registerUDF(org.apache.datasketches.hive.quantiles.GetQuantilesFromDoublesSketchUDF.class, p + GET_QUANTILES); + } + + private void registerUDF(Class udfClass, String name) { + system.registerUDF(name, udfClass, false); + } + + private void registerUDAF(Class udafClass, String name) { + try { + system.registerGenericUDAF(name, udafClass.newInstance()); + } catch (InstantiationException | IllegalAccessException e) { + throw new RuntimeException("Unable to register: " + name, e); + } + } + + private void registerUDTF(Class udtfClass, String name) { + system.registerGenericUDTF(name, udtfClass); + } + + private String getUDFName(Class clazz) { + Description desc = getDescription(clazz); + String name = desc.name().toLowerCase(); + if (name == null || name == "") { + throw new RuntimeException("The UDF class (" + clazz.getName() + ") doesn't have a valid name"); + } + return name; + } + + private Description getDescription(Class clazz) { + Description desc = clazz.getAnnotation(Description.class); + if (desc == null) { + throw new RuntimeException("no Description annotation on class: " + clazz.getName()); + } + return desc; + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index db5ee8d..dc3781a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -140,8 +140,6 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping; import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils; -import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; @@ -475,6 +473,7 @@ system.registerGenericUDAF("percentile_cont", new GenericUDAFPercentileCont()); system.registerGenericUDAF("percentile_disc", new GenericUDAFPercentileDisc()); + DataSketchesFunctions.register(system); // Generic UDFs system.registerGenericUDF("reflect", GenericUDFReflect.class); diff --git ql/src/test/queries/clientpositive/sketches_hll.q ql/src/test/queries/clientpositive/sketches_hll.q new file mode 100644 index 0000000..84408e9 --- /dev/null +++ ql/src/test/queries/clientpositive/sketches_hll.q @@ -0,0 +1,16 @@ +-- prepare input data +create temporary table sketch_input (id int, category char(1)); +insert into table sketch_input values + (1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'), + (6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b'); + +-- build sketches per category +create temporary table sketch_intermediate (category char(1), sketch binary); +insert into sketch_intermediate select category, ds_hll_dataToSketch(id) from sketch_input group by category; + +-- get unique count estimates per category +select category, ds_hll_sketchToEstimate(sketch) from sketch_intermediate; + + +-- union sketches across categories and get overall unique count estimate +select ds_hll_sketchToEstimate(ds_hll_unionSketch(sketch)) from sketch_intermediate; diff --git ql/src/test/queries/clientpositive/sketches_theta.q ql/src/test/queries/clientpositive/sketches_theta.q new file mode 100644 index 0000000..4c2124c --- /dev/null +++ ql/src/test/queries/clientpositive/sketches_theta.q @@ -0,0 +1,33 @@ +-- see here: https://datasketches.apache.org/docs/Theta/ThetaHiveUDFs.html + +create temporary table theta_input (id int, category char(1)); +insert into table theta_input values + (1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'), + (6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b'); + +create temporary table sketch_intermediate (category char(1), sketch binary); +insert into sketch_intermediate select category, ds_theta_datatosketch(id) from theta_input group by category; + +select category, ds_theta_sketchtoestimate(sketch) from sketch_intermediate; + +select ds_theta_sketchtoestimate(ds_theta_unionSketch(sketch)) from sketch_intermediate; + + + +create temporary table sketch_input (id1 int, id2 int); +insert into table sketch_input values + (1, 2), (2, 4), (3, 6), (4, 8), (5, 10), (6, 12), (7, 14), (8, 16), (9, 18), (10, 20); + +create temporary table sketch_intermediate2 (sketch1 binary, sketch2 binary); + +insert into sketch_intermediate2 select ds_theta_datatosketch(id1), ds_theta_datatosketch(id2) from sketch_input; + +select + ds_theta_sketchtoestimate(sketch1), + ds_theta_sketchtoestimate(sketch2), + ds_theta_sketchtoestimate(ds_theta_unionSketch1(sketch1, sketch2)), + ds_theta_sketchtoestimate(ds_theta_intersect(sketch1, sketch2)), + ds_theta_sketchtoestimate(ds_theta_exclude(sketch1, sketch2)), + ds_theta_sketchtoestimate(ds_theta_exclude(sketch2, sketch1)) +from sketch_intermediate2; + diff --git ql/src/test/results/clientpositive/llap/sketches_hll.q.out ql/src/test/results/clientpositive/llap/sketches_hll.q.out new file mode 100644 index 0000000..6512154 --- /dev/null +++ ql/src/test/results/clientpositive/llap/sketches_hll.q.out @@ -0,0 +1,59 @@ +PREHOOK: query: create temporary table sketch_input (id int, category char(1)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@sketch_input +POSTHOOK: query: create temporary table sketch_input (id int, category char(1)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@sketch_input +PREHOOK: query: insert into table sketch_input values + (1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'), + (6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@sketch_input +POSTHOOK: query: insert into table sketch_input values + (1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'), + (6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@sketch_input +POSTHOOK: Lineage: sketch_input.category SCRIPT [] +POSTHOOK: Lineage: sketch_input.id SCRIPT [] +PREHOOK: query: create temporary table sketch_intermediate (category char(1), sketch binary) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@sketch_intermediate +POSTHOOK: query: create temporary table sketch_intermediate (category char(1), sketch binary) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@sketch_intermediate +PREHOOK: query: insert into sketch_intermediate select category, ds_hll_dataToSketch(id) from sketch_input group by category +PREHOOK: type: QUERY +PREHOOK: Input: default@sketch_input +PREHOOK: Output: default@sketch_intermediate +POSTHOOK: query: insert into sketch_intermediate select category, ds_hll_dataToSketch(id) from sketch_input group by category +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sketch_input +POSTHOOK: Output: default@sketch_intermediate +POSTHOOK: Lineage: sketch_intermediate.category SIMPLE [(sketch_input)sketch_input.FieldSchema(name:category, type:char(1), comment:null), ] +POSTHOOK: Lineage: sketch_intermediate.sketch EXPRESSION [(sketch_input)sketch_input.FieldSchema(name:id, type:int, comment:null), ] +PREHOOK: query: select category, ds_hll_sketchToEstimate(sketch) from sketch_intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@sketch_intermediate +#### A masked pattern was here #### +POSTHOOK: query: select category, ds_hll_sketchToEstimate(sketch) from sketch_intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sketch_intermediate +#### A masked pattern was here #### +a 10.000000223517425 +b 10.000000223517425 +PREHOOK: query: select ds_hll_sketchToEstimate(ds_hll_unionSketch(sketch)) from sketch_intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@sketch_intermediate +#### A masked pattern was here #### +POSTHOOK: query: select ds_hll_sketchToEstimate(ds_hll_unionSketch(sketch)) from sketch_intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sketch_intermediate +#### A masked pattern was here #### +15.000000521540663 diff --git ql/src/test/results/clientpositive/llap/sketches_theta.q.out ql/src/test/results/clientpositive/llap/sketches_theta.q.out new file mode 100644 index 0000000..7916e5d --- /dev/null +++ ql/src/test/results/clientpositive/llap/sketches_theta.q.out @@ -0,0 +1,120 @@ +PREHOOK: query: create temporary table theta_input (id int, category char(1)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@theta_input +POSTHOOK: query: create temporary table theta_input (id int, category char(1)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@theta_input +PREHOOK: query: insert into table theta_input values + (1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'), + (6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@theta_input +POSTHOOK: query: insert into table theta_input values + (1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'), + (6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@theta_input +POSTHOOK: Lineage: theta_input.category SCRIPT [] +POSTHOOK: Lineage: theta_input.id SCRIPT [] +PREHOOK: query: create temporary table sketch_intermediate (category char(1), sketch binary) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@sketch_intermediate +POSTHOOK: query: create temporary table sketch_intermediate (category char(1), sketch binary) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@sketch_intermediate +PREHOOK: query: insert into sketch_intermediate select category, ds_theta_datatosketch(id) from theta_input group by category +PREHOOK: type: QUERY +PREHOOK: Input: default@theta_input +PREHOOK: Output: default@sketch_intermediate +POSTHOOK: query: insert into sketch_intermediate select category, ds_theta_datatosketch(id) from theta_input group by category +POSTHOOK: type: QUERY +POSTHOOK: Input: default@theta_input +POSTHOOK: Output: default@sketch_intermediate +POSTHOOK: Lineage: sketch_intermediate.category SIMPLE [(theta_input)theta_input.FieldSchema(name:category, type:char(1), comment:null), ] +POSTHOOK: Lineage: sketch_intermediate.sketch EXPRESSION [(theta_input)theta_input.FieldSchema(name:id, type:int, comment:null), ] +PREHOOK: query: select category, ds_theta_sketchtoestimate(sketch) from sketch_intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@sketch_intermediate +#### A masked pattern was here #### +POSTHOOK: query: select category, ds_theta_sketchtoestimate(sketch) from sketch_intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sketch_intermediate +#### A masked pattern was here #### +a 10.0 +b 10.0 +PREHOOK: query: select ds_theta_sketchtoestimate(ds_theta_unionSketch(sketch)) from sketch_intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@sketch_intermediate +#### A masked pattern was here #### +POSTHOOK: query: select ds_theta_sketchtoestimate(ds_theta_unionSketch(sketch)) from sketch_intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sketch_intermediate +#### A masked pattern was here #### +15.0 +PREHOOK: query: create temporary table sketch_input (id1 int, id2 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@sketch_input +POSTHOOK: query: create temporary table sketch_input (id1 int, id2 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@sketch_input +PREHOOK: query: insert into table sketch_input values + (1, 2), (2, 4), (3, 6), (4, 8), (5, 10), (6, 12), (7, 14), (8, 16), (9, 18), (10, 20) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@sketch_input +POSTHOOK: query: insert into table sketch_input values + (1, 2), (2, 4), (3, 6), (4, 8), (5, 10), (6, 12), (7, 14), (8, 16), (9, 18), (10, 20) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@sketch_input +POSTHOOK: Lineage: sketch_input.id1 SCRIPT [] +POSTHOOK: Lineage: sketch_input.id2 SCRIPT [] +PREHOOK: query: create temporary table sketch_intermediate2 (sketch1 binary, sketch2 binary) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@sketch_intermediate2 +POSTHOOK: query: create temporary table sketch_intermediate2 (sketch1 binary, sketch2 binary) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@sketch_intermediate2 +PREHOOK: query: insert into sketch_intermediate2 select ds_theta_datatosketch(id1), ds_theta_datatosketch(id2) from sketch_input +PREHOOK: type: QUERY +PREHOOK: Input: default@sketch_input +PREHOOK: Output: default@sketch_intermediate2 +POSTHOOK: query: insert into sketch_intermediate2 select ds_theta_datatosketch(id1), ds_theta_datatosketch(id2) from sketch_input +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sketch_input +POSTHOOK: Output: default@sketch_intermediate2 +POSTHOOK: Lineage: sketch_intermediate2.sketch1 EXPRESSION [(sketch_input)sketch_input.FieldSchema(name:id1, type:int, comment:null), ] +POSTHOOK: Lineage: sketch_intermediate2.sketch2 EXPRESSION [(sketch_input)sketch_input.FieldSchema(name:id2, type:int, comment:null), ] +PREHOOK: query: select + ds_theta_sketchtoestimate(sketch1), + ds_theta_sketchtoestimate(sketch2), + ds_theta_sketchtoestimate(ds_theta_unionSketch1(sketch1, sketch2)), + ds_theta_sketchtoestimate(ds_theta_intersect(sketch1, sketch2)), + ds_theta_sketchtoestimate(ds_theta_exclude(sketch1, sketch2)), + ds_theta_sketchtoestimate(ds_theta_exclude(sketch2, sketch1)) +from sketch_intermediate2 +PREHOOK: type: QUERY +PREHOOK: Input: default@sketch_intermediate2 +#### A masked pattern was here #### +POSTHOOK: query: select + ds_theta_sketchtoestimate(sketch1), + ds_theta_sketchtoestimate(sketch2), + ds_theta_sketchtoestimate(ds_theta_unionSketch1(sketch1, sketch2)), + ds_theta_sketchtoestimate(ds_theta_intersect(sketch1, sketch2)), + ds_theta_sketchtoestimate(ds_theta_exclude(sketch1, sketch2)), + ds_theta_sketchtoestimate(ds_theta_exclude(sketch2, sketch1)) +from sketch_intermediate2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sketch_intermediate2 +#### A masked pattern was here #### +10.0 10.0 15.0 5.0 5.0 5.0