diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 9d27b8d..3f6d42c 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -817,6 +817,8 @@ schq_materialized.q,\ schq_analyze.q,\ schq_ingest.q,\ + sketches_hll.q,\ + sketches_theta.q,\ table_access_keys_stats.q,\ temp_table_llap_partitioned.q,\ tez_bmj_schema_evolution.q,\ diff --git pom.xml pom.xml index d804a19..31f28c1 100644 --- pom.xml +++ pom.xml @@ -225,6 +225,7 @@ 2.2.4 1.2 2.0.1 + 1.0.0-incubating diff --git ql/pom.xml ql/pom.xml index d5c83b6..7d2b846 100644 --- ql/pom.xml +++ ql/pom.xml @@ -313,6 +313,11 @@ test + org.apache.datasketches + datasketches-hive + ${datasketches.version} + + com.lmax disruptor ${disruptor.version} @@ -991,6 +996,7 @@ io.dropwizard.metrics:metrics-jvm io.dropwizard.metrics:metrics-json com.zaxxer:HikariCP + org.apache.datasketches:* @@ -1018,6 +1024,10 @@ com.google.guava org.apache.hive.com.google.guava + + org.apache.datasketches + org.apache.hive.org.apache.datasketches + diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/DataSketchesFunctions.java ql/src/java/org/apache/hadoop/hive/ql/exec/DataSketchesFunctions.java new file mode 100644 index 0000000..b9d265f --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DataSketchesFunctions.java @@ -0,0 +1,221 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.exec; + +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFResolver2; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; + +/** + * Registers functions from the DataSketches library as builtin functions. + * + * In an effort to show a more consistent + */ +public class DataSketchesFunctions { + + private static final String DATA_TO_SKETCH = "sketch"; + private static final String SKETCH_TO_ESTIMATE_WITH_ERROR_BOUNDS = "estimate_bounds"; + private static final String SKETCH_TO_ESTIMATE = "estimate"; + private static final String SKETCH_TO_STRING = "stringify"; + private static final String UNION_SKETCH = "union"; + private static final String UNION_SKETCH1 = "union_f"; + private static final String GET_N = "n"; + private static final String GET_CDF = "cdf"; + private static final String GET_PMF = "pmf"; + private static final String GET_QUANTILES = "quantiles"; + private static final String GET_QUANTILE = "quantile"; + private static final String GET_RANK = "rank"; + private static final String INTERSECT_SKETCH = "intersect"; + private static final String INTERSECT_SKETCH1 = "intersect_f"; + private static final String EXCLUDE_SKETCH = "exclude"; + private static final String GET_K = "k"; + private static final String GET_FREQUENT_ITEMS = "frequent_items"; + private static final String T_TEST = "ttest"; + private static final String SKETCH_TO_MEANS = "means"; + private static final String SKETCH_TO_NUMBER_OF_RETAINED_ENTRIES = "n_retained"; + private static final String SKETCH_TO_QUANTILES_SKETCH = "quantiles_sketch"; + private static final String SKETCH_TO_VALUES = "values"; + private static final String SKETCH_TO_VARIANCES = "variances"; + private static final String SKETCH_TO_PERCENTILE = "percentile"; + + private final Registry system; + + public DataSketchesFunctions(Registry system) { + this.system = system; + } + + public static void register(Registry system) { + DataSketchesFunctions dsf = new DataSketchesFunctions(system); + String prefix = "ds"; + dsf.registerHll(prefix); + dsf.registerCpc(prefix); + dsf.registerKll(prefix); + dsf.registerTheta(prefix); + dsf.registerTuple(prefix); + dsf.registerQuantiles(prefix); + dsf.registerFrequencies(prefix); + } + + private void registerHll(String prefix) { + String p = prefix + "_hll_"; + registerUDAF(org.apache.datasketches.hive.hll.DataToSketchUDAF.class, p + DATA_TO_SKETCH); + registerUDF(org.apache.datasketches.hive.hll.SketchToEstimateAndErrorBoundsUDF.class, + p + SKETCH_TO_ESTIMATE_WITH_ERROR_BOUNDS); + registerUDF(org.apache.datasketches.hive.hll.SketchToEstimateUDF.class, p + SKETCH_TO_ESTIMATE); + registerUDF(org.apache.datasketches.hive.hll.SketchToStringUDF.class, p + SKETCH_TO_STRING); + registerUDF(org.apache.datasketches.hive.hll.UnionSketchUDF.class, p + UNION_SKETCH1); + registerUDAF(org.apache.datasketches.hive.hll.UnionSketchUDAF.class, p + UNION_SKETCH); + } + + private void registerCpc(String prefix) { + String p = prefix + "_cpc_"; + registerUDAF(org.apache.datasketches.hive.cpc.DataToSketchUDAF.class, p + DATA_TO_SKETCH); + // FIXME: normalize GetEstimateAndErrorBoundsUDF vs SketchToEstimateAndErrorBoundsUDF + registerUDF(org.apache.datasketches.hive.cpc.GetEstimateAndErrorBoundsUDF.class, + p + SKETCH_TO_ESTIMATE_WITH_ERROR_BOUNDS); + // FIXME: normalize GetEstimateUDF vs SketchToEstimateUDF + registerUDF(org.apache.datasketches.hive.cpc.GetEstimateUDF.class, p + SKETCH_TO_ESTIMATE); + registerUDF(org.apache.datasketches.hive.cpc.SketchToStringUDF.class, p + SKETCH_TO_STRING); + registerUDF(org.apache.datasketches.hive.cpc.UnionSketchUDF.class, p + UNION_SKETCH1); + registerUDAF(org.apache.datasketches.hive.cpc.UnionSketchUDAF.class, p + UNION_SKETCH); + } + + private void registerKll(String prefix) { + String p = prefix + "_kll_"; + registerUDAF(org.apache.datasketches.hive.kll.DataToSketchUDAF.class, p + DATA_TO_SKETCH); + registerUDF(org.apache.datasketches.hive.kll.SketchToStringUDF.class, p + SKETCH_TO_STRING); + // registerUDF(org.apache.datasketches.hive.kll.UnionSketchUDF.class, p + UNION_SKETCH); + registerUDAF(org.apache.datasketches.hive.kll.UnionSketchUDAF.class, p + UNION_SKETCH); + + registerUDF(org.apache.datasketches.hive.kll.GetNUDF.class, p + GET_N); + registerUDF(org.apache.datasketches.hive.kll.GetCdfUDF.class, p + GET_CDF); + registerUDF(org.apache.datasketches.hive.kll.GetPmfUDF.class, p + GET_PMF); + registerUDF(org.apache.datasketches.hive.kll.GetQuantilesUDF.class, p + GET_QUANTILES); + registerUDF(org.apache.datasketches.hive.kll.GetQuantileUDF.class, p + GET_QUANTILE); + registerUDF(org.apache.datasketches.hive.kll.GetRankUDF.class, p + GET_RANK); + } + + private void registerTheta(String prefix) { + String p = prefix + "_theta_"; + registerUDAF(org.apache.datasketches.hive.theta.DataToSketchUDAF.class, p + DATA_TO_SKETCH); + // FIXME: missing? + //registerUDF(org.apache.datasketches.hive.theta.SketchToStringUDF.class, p + SKETCH_TO_STRING); + registerUDF(org.apache.datasketches.hive.theta.UnionSketchUDF.class, p + UNION_SKETCH1); + registerUDAF(org.apache.datasketches.hive.theta.UnionSketchUDAF.class, p + UNION_SKETCH); + registerUDF(org.apache.datasketches.hive.theta.IntersectSketchUDF.class, p + INTERSECT_SKETCH1); + registerUDAF(org.apache.datasketches.hive.theta.IntersectSketchUDAF.class, p + INTERSECT_SKETCH); + registerUDF(org.apache.datasketches.hive.theta.EstimateSketchUDF.class, p + SKETCH_TO_ESTIMATE); + registerUDF(org.apache.datasketches.hive.theta.ExcludeSketchUDF.class, p + EXCLUDE_SKETCH); + + } + + private void registerTuple(String prefix) { + registerTupleArrayOfDoubles(prefix + "_tuple_arrayofdouble"); + registerTupleDoubleSummary(prefix + "_tuple_doublesummary"); + } + + private void registerTupleArrayOfDoubles(String string) { + String p = string + "_"; + registerUDAF(org.apache.datasketches.hive.tuple.DataToArrayOfDoublesSketchUDAF.class, p + DATA_TO_SKETCH); + // FIXME: missing? + //registerUDF(org.apache.datasketches.hive.theta.SketchToStringUDF.class, p + SKETCH_TO_STRING); + registerUDAF(org.apache.datasketches.hive.tuple.UnionArrayOfDoublesSketchUDAF.class, p + UNION_SKETCH); + registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchesTTestUDF.class, p + T_TEST); + registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToEstimatesUDF.class, p + SKETCH_TO_ESTIMATE); + registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToEstimateAndErrorBoundsUDF.class, + p + SKETCH_TO_ESTIMATE_WITH_ERROR_BOUNDS); + registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToMeansUDF.class, p + SKETCH_TO_MEANS); + registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToNumberOfRetainedEntriesUDF.class, + p + SKETCH_TO_NUMBER_OF_RETAINED_ENTRIES); + registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToQuantilesSketchUDF.class, + p + SKETCH_TO_QUANTILES_SKETCH); + registerUDTF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToValuesUDTF.class, p + SKETCH_TO_VALUES); + registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToVariancesUDF.class, p + SKETCH_TO_VARIANCES); + } + + private void registerTupleDoubleSummary(String string) { + String p = string + "_"; + registerUDAF(org.apache.datasketches.hive.tuple.DataToDoubleSummarySketchUDAF.class, p + DATA_TO_SKETCH); + // FIXME: missing? + //registerUDF(org.apache.datasketches.hive.theta.SketchToStringUDF.class, p + SKETCH_TO_STRING); + registerUDAF(org.apache.datasketches.hive.tuple.UnionDoubleSummarySketchUDAF.class, p + UNION_SKETCH); + registerUDF(org.apache.datasketches.hive.tuple.DoubleSummarySketchToEstimatesUDF.class, p + SKETCH_TO_ESTIMATE); + registerUDF(org.apache.datasketches.hive.tuple.DoubleSummarySketchToPercentileUDF.class, p + SKETCH_TO_PERCENTILE); + } + + private void registerQuantiles(String prefix) { + registerQuantilesString(prefix + "_quantile"); + registerQuantilesDoubles(prefix + "_quantile"); + } + + private void registerFrequencies(String prefix) { + String p = prefix + "_freq_"; + registerUDAF(org.apache.datasketches.hive.frequencies.DataToStringsSketchUDAF.class, p + DATA_TO_SKETCH); + // FIXME: missing? + //registerUDF(org.apache.datasketches.hive.frequencies.DoublesSketchToStringUDF.class, p + SKETCH_TO_STRING); + //registerUDF(org.apache.datasketches.hive.quantiles.UnionItemsSketchUDAF.class, p + UNION_SKETCH); + registerUDAF(org.apache.datasketches.hive.frequencies.UnionStringsSketchUDAF.class, p + UNION_SKETCH); + registerUDTF(org.apache.datasketches.hive.frequencies.GetFrequentItemsFromStringsSketchUDTF.class, + p + GET_FREQUENT_ITEMS); + } + + private void registerQuantilesString(String prefix) { + String p = prefix + "_strings_"; + registerUDAF(org.apache.datasketches.hive.quantiles.DataToStringsSketchUDAF.class, p + DATA_TO_SKETCH); + registerUDF(org.apache.datasketches.hive.quantiles.StringsSketchToStringUDF.class, p + SKETCH_TO_STRING); + //registerUDF(org.apache.datasketches.hive.quantiles.UnionItemsSketchUDAF.class, p + UNION_SKETCH); + registerUDAF(org.apache.datasketches.hive.quantiles.UnionStringsSketchUDAF.class, p + UNION_SKETCH); + registerUDF(org.apache.datasketches.hive.quantiles.GetNFromStringsSketchUDF.class, p + GET_N); + registerUDF(org.apache.datasketches.hive.quantiles.GetKFromStringsSketchUDF.class, p + GET_K); + registerUDF(org.apache.datasketches.hive.quantiles.GetCdfFromStringsSketchUDF.class, p + GET_CDF); + registerUDF(org.apache.datasketches.hive.quantiles.GetPmfFromStringsSketchUDF.class, p + GET_PMF); + registerUDF(org.apache.datasketches.hive.quantiles.GetQuantileFromStringsSketchUDF.class, p + GET_QUANTILE); + registerUDF(org.apache.datasketches.hive.quantiles.GetQuantilesFromStringsSketchUDF.class, p + GET_QUANTILES); + } + + private void registerQuantilesDoubles(String prefix) { + String p = prefix + "_doubles_"; + registerUDAF(org.apache.datasketches.hive.quantiles.DataToDoublesSketchUDAF.class, p + DATA_TO_SKETCH); + registerUDF(org.apache.datasketches.hive.quantiles.DoublesSketchToStringUDF.class, p + SKETCH_TO_STRING); + //registerUDF(org.apache.datasketches.hive.quantiles.UnionItemsSketchUDAF.class, p + UNION_SKETCH); + registerUDAF(org.apache.datasketches.hive.quantiles.UnionDoublesSketchUDAF.class, p + UNION_SKETCH); + registerUDF(org.apache.datasketches.hive.quantiles.GetNFromDoublesSketchUDF.class, p + GET_N); + registerUDF(org.apache.datasketches.hive.quantiles.GetKFromDoublesSketchUDF.class, p + GET_K); + registerUDF(org.apache.datasketches.hive.quantiles.GetCdfFromDoublesSketchUDF.class, p + GET_CDF); + registerUDF(org.apache.datasketches.hive.quantiles.GetPmfFromDoublesSketchUDF.class, p + GET_PMF); + registerUDF(org.apache.datasketches.hive.quantiles.GetQuantileFromDoublesSketchUDF.class, p + GET_QUANTILE); + registerUDF(org.apache.datasketches.hive.quantiles.GetQuantilesFromDoublesSketchUDF.class, p + GET_QUANTILES); + } + + private void registerUDF(Class udfClass, String name) { + system.registerUDF(name, udfClass, false); + } + + private void registerUDAF(Class udafClass, String name) { + try { + system.registerGenericUDAF(name, udafClass.newInstance()); + } catch (InstantiationException | IllegalAccessException e) { + throw new RuntimeException("Unable to register: " + name, e); + } + } + + private void registerUDTF(Class udtfClass, String name) { + system.registerGenericUDTF(name, udtfClass); + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index db5ee8d..dc3781a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -140,8 +140,6 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping; import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils; -import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo; -import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; @@ -475,6 +473,7 @@ system.registerGenericUDAF("percentile_cont", new GenericUDAFPercentileCont()); system.registerGenericUDAF("percentile_disc", new GenericUDAFPercentileDisc()); + DataSketchesFunctions.register(system); // Generic UDFs system.registerGenericUDF("reflect", GenericUDFReflect.class); diff --git ql/src/test/queries/clientpositive/sketches_hll.q ql/src/test/queries/clientpositive/sketches_hll.q new file mode 100644 index 0000000..56467a6 --- /dev/null +++ ql/src/test/queries/clientpositive/sketches_hll.q @@ -0,0 +1,16 @@ +-- prepare input data +create temporary table sketch_input (id int, category char(1)); +insert into table sketch_input values + (1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'), + (6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b'); + +-- build sketches per category +create temporary table sketch_intermediate (category char(1), sketch binary); +insert into sketch_intermediate select category, ds_hll_sketch(id) from sketch_input group by category; + +-- get unique count estimates per category +select category, ds_hll_estimate(sketch) from sketch_intermediate; + + +-- union sketches across categories and get overall unique count estimate +select ds_hll_estimate(ds_hll_union(sketch)) from sketch_intermediate; diff --git ql/src/test/queries/clientpositive/sketches_theta.q ql/src/test/queries/clientpositive/sketches_theta.q new file mode 100644 index 0000000..6ab7278 --- /dev/null +++ ql/src/test/queries/clientpositive/sketches_theta.q @@ -0,0 +1,33 @@ +-- see here: https://datasketches.apache.org/docs/Theta/ThetaHiveUDFs.html + +create temporary table theta_input (id int, category char(1)); +insert into table theta_input values + (1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'), + (6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b'); + +create temporary table sketch_intermediate (category char(1), sketch binary); +insert into sketch_intermediate select category, ds_theta_sketch(id) from theta_input group by category; + +select category, ds_theta_estimate(sketch) from sketch_intermediate; + +select ds_theta_estimate(ds_theta_union(sketch)) from sketch_intermediate; + + + +create temporary table sketch_input (id1 int, id2 int); +insert into table sketch_input values + (1, 2), (2, 4), (3, 6), (4, 8), (5, 10), (6, 12), (7, 14), (8, 16), (9, 18), (10, 20); + +create temporary table sketch_intermediate2 (sketch1 binary, sketch2 binary); + +insert into sketch_intermediate2 select ds_theta_sketch(id1), ds_theta_sketch(id2) from sketch_input; + +select + ds_theta_estimate(sketch1), + ds_theta_estimate(sketch2), + ds_theta_estimate(ds_theta_union_f(sketch1, sketch2)), + ds_theta_estimate(ds_theta_intersect_f(sketch1, sketch2)), + ds_theta_estimate(ds_theta_exclude(sketch1, sketch2)), + ds_theta_estimate(ds_theta_exclude(sketch2, sketch1)) +from sketch_intermediate2; + diff --git ql/src/test/results/clientpositive/llap/sketches_hll.q.out ql/src/test/results/clientpositive/llap/sketches_hll.q.out new file mode 100644 index 0000000..9ebce86 --- /dev/null +++ ql/src/test/results/clientpositive/llap/sketches_hll.q.out @@ -0,0 +1,59 @@ +PREHOOK: query: create temporary table sketch_input (id int, category char(1)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@sketch_input +POSTHOOK: query: create temporary table sketch_input (id int, category char(1)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@sketch_input +PREHOOK: query: insert into table sketch_input values + (1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'), + (6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@sketch_input +POSTHOOK: query: insert into table sketch_input values + (1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'), + (6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@sketch_input +POSTHOOK: Lineage: sketch_input.category SCRIPT [] +POSTHOOK: Lineage: sketch_input.id SCRIPT [] +PREHOOK: query: create temporary table sketch_intermediate (category char(1), sketch binary) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@sketch_intermediate +POSTHOOK: query: create temporary table sketch_intermediate (category char(1), sketch binary) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@sketch_intermediate +PREHOOK: query: insert into sketch_intermediate select category, ds_hll_sketch(id) from sketch_input group by category +PREHOOK: type: QUERY +PREHOOK: Input: default@sketch_input +PREHOOK: Output: default@sketch_intermediate +POSTHOOK: query: insert into sketch_intermediate select category, ds_hll_sketch(id) from sketch_input group by category +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sketch_input +POSTHOOK: Output: default@sketch_intermediate +POSTHOOK: Lineage: sketch_intermediate.category SIMPLE [(sketch_input)sketch_input.FieldSchema(name:category, type:char(1), comment:null), ] +POSTHOOK: Lineage: sketch_intermediate.sketch EXPRESSION [(sketch_input)sketch_input.FieldSchema(name:id, type:int, comment:null), ] +PREHOOK: query: select category, ds_hll_estimate(sketch) from sketch_intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@sketch_intermediate +#### A masked pattern was here #### +POSTHOOK: query: select category, ds_hll_estimate(sketch) from sketch_intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sketch_intermediate +#### A masked pattern was here #### +a 10.000000223517425 +b 10.000000223517425 +PREHOOK: query: select ds_hll_estimate(ds_hll_union(sketch)) from sketch_intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@sketch_intermediate +#### A masked pattern was here #### +POSTHOOK: query: select ds_hll_estimate(ds_hll_union(sketch)) from sketch_intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sketch_intermediate +#### A masked pattern was here #### +15.000000521540663 diff --git ql/src/test/results/clientpositive/llap/sketches_theta.q.out ql/src/test/results/clientpositive/llap/sketches_theta.q.out new file mode 100644 index 0000000..b3ea64d --- /dev/null +++ ql/src/test/results/clientpositive/llap/sketches_theta.q.out @@ -0,0 +1,120 @@ +PREHOOK: query: create temporary table theta_input (id int, category char(1)) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@theta_input +POSTHOOK: query: create temporary table theta_input (id int, category char(1)) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@theta_input +PREHOOK: query: insert into table theta_input values + (1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'), + (6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@theta_input +POSTHOOK: query: insert into table theta_input values + (1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'), + (6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@theta_input +POSTHOOK: Lineage: theta_input.category SCRIPT [] +POSTHOOK: Lineage: theta_input.id SCRIPT [] +PREHOOK: query: create temporary table sketch_intermediate (category char(1), sketch binary) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@sketch_intermediate +POSTHOOK: query: create temporary table sketch_intermediate (category char(1), sketch binary) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@sketch_intermediate +PREHOOK: query: insert into sketch_intermediate select category, ds_theta_sketch(id) from theta_input group by category +PREHOOK: type: QUERY +PREHOOK: Input: default@theta_input +PREHOOK: Output: default@sketch_intermediate +POSTHOOK: query: insert into sketch_intermediate select category, ds_theta_sketch(id) from theta_input group by category +POSTHOOK: type: QUERY +POSTHOOK: Input: default@theta_input +POSTHOOK: Output: default@sketch_intermediate +POSTHOOK: Lineage: sketch_intermediate.category SIMPLE [(theta_input)theta_input.FieldSchema(name:category, type:char(1), comment:null), ] +POSTHOOK: Lineage: sketch_intermediate.sketch EXPRESSION [(theta_input)theta_input.FieldSchema(name:id, type:int, comment:null), ] +PREHOOK: query: select category, ds_theta_estimate(sketch) from sketch_intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@sketch_intermediate +#### A masked pattern was here #### +POSTHOOK: query: select category, ds_theta_estimate(sketch) from sketch_intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sketch_intermediate +#### A masked pattern was here #### +a 10.0 +b 10.0 +PREHOOK: query: select ds_theta_estimate(ds_theta_union(sketch)) from sketch_intermediate +PREHOOK: type: QUERY +PREHOOK: Input: default@sketch_intermediate +#### A masked pattern was here #### +POSTHOOK: query: select ds_theta_estimate(ds_theta_union(sketch)) from sketch_intermediate +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sketch_intermediate +#### A masked pattern was here #### +15.0 +PREHOOK: query: create temporary table sketch_input (id1 int, id2 int) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@sketch_input +POSTHOOK: query: create temporary table sketch_input (id1 int, id2 int) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@sketch_input +PREHOOK: query: insert into table sketch_input values + (1, 2), (2, 4), (3, 6), (4, 8), (5, 10), (6, 12), (7, 14), (8, 16), (9, 18), (10, 20) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@sketch_input +POSTHOOK: query: insert into table sketch_input values + (1, 2), (2, 4), (3, 6), (4, 8), (5, 10), (6, 12), (7, 14), (8, 16), (9, 18), (10, 20) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@sketch_input +POSTHOOK: Lineage: sketch_input.id1 SCRIPT [] +POSTHOOK: Lineage: sketch_input.id2 SCRIPT [] +PREHOOK: query: create temporary table sketch_intermediate2 (sketch1 binary, sketch2 binary) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@sketch_intermediate2 +POSTHOOK: query: create temporary table sketch_intermediate2 (sketch1 binary, sketch2 binary) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@sketch_intermediate2 +PREHOOK: query: insert into sketch_intermediate2 select ds_theta_sketch(id1), ds_theta_sketch(id2) from sketch_input +PREHOOK: type: QUERY +PREHOOK: Input: default@sketch_input +PREHOOK: Output: default@sketch_intermediate2 +POSTHOOK: query: insert into sketch_intermediate2 select ds_theta_sketch(id1), ds_theta_sketch(id2) from sketch_input +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sketch_input +POSTHOOK: Output: default@sketch_intermediate2 +POSTHOOK: Lineage: sketch_intermediate2.sketch1 EXPRESSION [(sketch_input)sketch_input.FieldSchema(name:id1, type:int, comment:null), ] +POSTHOOK: Lineage: sketch_intermediate2.sketch2 EXPRESSION [(sketch_input)sketch_input.FieldSchema(name:id2, type:int, comment:null), ] +PREHOOK: query: select + ds_theta_estimate(sketch1), + ds_theta_estimate(sketch2), + ds_theta_estimate(ds_theta_union_f(sketch1, sketch2)), + ds_theta_estimate(ds_theta_intersect_f(sketch1, sketch2)), + ds_theta_estimate(ds_theta_exclude(sketch1, sketch2)), + ds_theta_estimate(ds_theta_exclude(sketch2, sketch1)) +from sketch_intermediate2 +PREHOOK: type: QUERY +PREHOOK: Input: default@sketch_intermediate2 +#### A masked pattern was here #### +POSTHOOK: query: select + ds_theta_estimate(sketch1), + ds_theta_estimate(sketch2), + ds_theta_estimate(ds_theta_union_f(sketch1, sketch2)), + ds_theta_estimate(ds_theta_intersect_f(sketch1, sketch2)), + ds_theta_estimate(ds_theta_exclude(sketch1, sketch2)), + ds_theta_estimate(ds_theta_exclude(sketch2, sketch1)) +from sketch_intermediate2 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@sketch_intermediate2 +#### A masked pattern was here #### +10.0 10.0 15.0 5.0 5.0 5.0 diff --git ql/src/test/results/clientpositive/show_functions.q.out ql/src/test/results/clientpositive/show_functions.q.out index 0453400..4b38cfb 100644 --- ql/src/test/results/clientpositive/show_functions.q.out +++ ql/src/test/results/clientpositive/show_functions.q.out @@ -105,6 +105,69 @@ degrees dense_rank div +ds_cpc_estimate +ds_cpc_estimate_bounds +ds_cpc_sketch +ds_cpc_stringify +ds_cpc_union +ds_cpc_union_f +ds_freq_frequent_items +ds_freq_sketch +ds_freq_union +ds_hll_estimate +ds_hll_estimate_bounds +ds_hll_sketch +ds_hll_stringify +ds_hll_union +ds_hll_union_f +ds_kll_cdf +ds_kll_n +ds_kll_pmf +ds_kll_quantile +ds_kll_quantiles +ds_kll_rank +ds_kll_sketch +ds_kll_stringify +ds_kll_union +ds_quantile_doubles_cdf +ds_quantile_doubles_k +ds_quantile_doubles_n +ds_quantile_doubles_pmf +ds_quantile_doubles_quantile +ds_quantile_doubles_quantiles +ds_quantile_doubles_sketch +ds_quantile_doubles_stringify +ds_quantile_doubles_union +ds_quantile_strings_cdf +ds_quantile_strings_k +ds_quantile_strings_n +ds_quantile_strings_pmf +ds_quantile_strings_quantile +ds_quantile_strings_quantiles +ds_quantile_strings_sketch +ds_quantile_strings_stringify +ds_quantile_strings_union +ds_theta_estimate +ds_theta_exclude +ds_theta_intersect +ds_theta_intersect_f +ds_theta_sketch +ds_theta_union +ds_theta_union_f +ds_tuple_arrayofdouble_estimate +ds_tuple_arrayofdouble_estimate_bounds +ds_tuple_arrayofdouble_means +ds_tuple_arrayofdouble_n_retained +ds_tuple_arrayofdouble_quantiles_sketch +ds_tuple_arrayofdouble_sketch +ds_tuple_arrayofdouble_ttest +ds_tuple_arrayofdouble_union +ds_tuple_arrayofdouble_values +ds_tuple_arrayofdouble_variances +ds_tuple_doublesummary_estimate +ds_tuple_doublesummary_percentile +ds_tuple_doublesummary_sketch +ds_tuple_doublesummary_union e elt encode @@ -392,6 +455,16 @@ current_database current_date decode +ds_cpc_estimate +ds_hll_estimate +ds_kll_quantile +ds_quantile_doubles_quantile +ds_quantile_strings_quantile +ds_theta_estimate +ds_theta_exclude +ds_tuple_arrayofdouble_estimate +ds_tuple_doublesummary_estimate +ds_tuple_doublesummary_percentile e encode explode @@ -540,6 +613,69 @@ degrees dense_rank div +ds_cpc_estimate +ds_cpc_estimate_bounds +ds_cpc_sketch +ds_cpc_stringify +ds_cpc_union +ds_cpc_union_f +ds_freq_frequent_items +ds_freq_sketch +ds_freq_union +ds_hll_estimate +ds_hll_estimate_bounds +ds_hll_sketch +ds_hll_stringify +ds_hll_union +ds_hll_union_f +ds_kll_cdf +ds_kll_n +ds_kll_pmf +ds_kll_quantile +ds_kll_quantiles +ds_kll_rank +ds_kll_sketch +ds_kll_stringify +ds_kll_union +ds_quantile_doubles_cdf +ds_quantile_doubles_k +ds_quantile_doubles_n +ds_quantile_doubles_pmf +ds_quantile_doubles_quantile +ds_quantile_doubles_quantiles +ds_quantile_doubles_sketch +ds_quantile_doubles_stringify +ds_quantile_doubles_union +ds_quantile_strings_cdf +ds_quantile_strings_k +ds_quantile_strings_n +ds_quantile_strings_pmf +ds_quantile_strings_quantile +ds_quantile_strings_quantiles +ds_quantile_strings_sketch +ds_quantile_strings_stringify +ds_quantile_strings_union +ds_theta_estimate +ds_theta_exclude +ds_theta_intersect +ds_theta_intersect_f +ds_theta_sketch +ds_theta_union +ds_theta_union_f +ds_tuple_arrayofdouble_estimate +ds_tuple_arrayofdouble_estimate_bounds +ds_tuple_arrayofdouble_means +ds_tuple_arrayofdouble_n_retained +ds_tuple_arrayofdouble_quantiles_sketch +ds_tuple_arrayofdouble_sketch +ds_tuple_arrayofdouble_ttest +ds_tuple_arrayofdouble_union +ds_tuple_arrayofdouble_values +ds_tuple_arrayofdouble_variances +ds_tuple_doublesummary_estimate +ds_tuple_doublesummary_percentile +ds_tuple_doublesummary_sketch +ds_tuple_doublesummary_union e elt encode