diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties
index 42bc5df..118e1f6 100644
--- itests/src/test/resources/testconfiguration.properties
+++ itests/src/test/resources/testconfiguration.properties
@@ -813,6 +813,8 @@
schq_materialized.q,\
schq_analyze.q,\
schq_ingest.q,\
+ sketches_hll.q,\
+ sketches_theta.q,\
table_access_keys_stats.q,\
temp_table_llap_partitioned.q,\
tez_bmj_schema_evolution.q,\
diff --git pom.xml pom.xml
index a005f35..165b526 100644
--- pom.xml
+++ pom.xml
@@ -225,6 +225,7 @@
2.2.4
1.2
2.0.1
+ 1.0.0-incubating
diff --git ql/pom.xml ql/pom.xml
index 8b0c02b..efad8b2 100644
--- ql/pom.xml
+++ ql/pom.xml
@@ -309,6 +309,11 @@
test
+ org.apache.datasketches
+ datasketches-hive
+ ${datasketches.version}
+
+
com.lmax
disruptor
${disruptor.version}
@@ -987,6 +992,7 @@
io.dropwizard.metrics:metrics-jvm
io.dropwizard.metrics:metrics-json
com.zaxxer:HikariCP
+ org.apache.datasketches:*
@@ -1014,6 +1020,10 @@
com.google.guava
org.apache.hive.com.google.guava
+
+ org.apache.datasketches
+ org.apache.hive.org.apache.datasketches
+
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/DataSketchesFunctions.java ql/src/java/org/apache/hadoop/hive/ql/exec/DataSketchesFunctions.java
new file mode 100644
index 0000000..6522d9c
--- /dev/null
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/DataSketchesFunctions.java
@@ -0,0 +1,218 @@
+package org.apache.hadoop.hive.ql.exec;
+
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFResolver2;
+import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF;
+
+public class DataSketchesFunctions {
+
+ private static final String DATA_TO_SKETCH = "datatosketch";
+ private static final String SKETCH_TO_ESTIMATE_WITH_ERROR_BOUNDS = "sketchToEstimateWithErrorBounds";
+ // FIXME: consider to rename it to simply "estimate" or "evaluate" - in case of the counting sketches the "sketchto..." doesnt add value
+ private static final String SKETCH_TO_ESTIMATE = "sketchToEstimate";
+ private static final String SKETCH_TO_STRING = "sketchToString";
+ // FIXME: probably use simply "union" instead unionSketch?
+ private static final String UNION_SKETCH = "unionSketch";
+ private static final String GET_N = "getN";
+ private static final String GET_CDF = "getCdf";
+ private static final String GET_PMF = "getPmf";
+ private static final String GET_QUANTILES = "GetQuantiles";
+ private static final String GET_QUANTILE = "GetQuantile";
+ private static final String GET_RANK = "getRank";
+ private static final String INTERSECT_SKETCH = "intersection";
+ private static final String EXCLUDE_SKETCH = "exclude";
+ private static final String GET_K = "getK";
+ private static final String GET_FREQUENT_ITEMS = "getFrequentItems";
+ private static final String T_TEST = "TTest";
+ private static final String SKETCH_TO_MEANS = "sketchtomeans";
+ private static final String SKETCH_TO_NUMBER_OF_RETAINED_ENTRIES = "sketchtonumberofretainedentries";
+ private static final String SKETCH_TO_QUANTILES_SKETCH = "sketchToQuantilesSketch";
+ private static final String SKETCH_TO_VALUES = "sketchToValues";
+ private static final String SKETCH_TO_VARIANCES = "sketchToVariances";
+ private static final String SKETCH_TO_PERCENTILE = "sketchToPercentile";
+ private static final String UNION_SKETCH1 = "unionSketch1";
+ private static final String INTERSECT_SKETCH1 = "intersect";
+
+ private final Registry system;
+
+ public DataSketchesFunctions(Registry system) {
+ this.system = system;
+ }
+
+ public static void register(Registry system) {
+ DataSketchesFunctions dsf = new DataSketchesFunctions(system);
+ // FIXME: what this should be approx, ds ... other?
+ String prefix = "ds";
+ dsf.registerHll(prefix);
+ dsf.registerCpc(prefix);
+ dsf.registerKll(prefix);
+ dsf.registerTheta(prefix);
+ dsf.registerTuple(prefix);
+ dsf.registerQuantiles(prefix);
+ dsf.registerFrequencies(prefix);
+ }
+
+ private void registerHll(String prefix) {
+ String p = prefix + "_hll_";
+ registerUDAF(org.apache.datasketches.hive.hll.DataToSketchUDAF.class, p + DATA_TO_SKETCH);
+ registerUDF(org.apache.datasketches.hive.hll.SketchToEstimateAndErrorBoundsUDF.class,
+ p + SKETCH_TO_ESTIMATE_WITH_ERROR_BOUNDS);
+ registerUDF(org.apache.datasketches.hive.hll.SketchToEstimateUDF.class, p + SKETCH_TO_ESTIMATE);
+ registerUDF(org.apache.datasketches.hive.hll.SketchToStringUDF.class, p + SKETCH_TO_STRING);
+ registerUDF(org.apache.datasketches.hive.hll.UnionSketchUDF.class, p + UNION_SKETCH1);
+ registerUDAF(org.apache.datasketches.hive.hll.UnionSketchUDAF.class, p + UNION_SKETCH);
+ }
+
+ private void registerCpc(String prefix) {
+ String p = prefix + "_cpc_";
+ registerUDAF(org.apache.datasketches.hive.cpc.DataToSketchUDAF.class, p + DATA_TO_SKETCH);
+ // FIXME: normalize GetEstimateAndErrorBoundsUDF vs SketchToEstimateAndErrorBoundsUDF
+ registerUDF(org.apache.datasketches.hive.cpc.GetEstimateAndErrorBoundsUDF.class,
+ p + SKETCH_TO_ESTIMATE_WITH_ERROR_BOUNDS);
+ // FIXME: normalize GetEstimateUDF vs SketchToEstimateUDF
+ registerUDF(org.apache.datasketches.hive.cpc.GetEstimateUDF.class, p + SKETCH_TO_ESTIMATE);
+ registerUDF(org.apache.datasketches.hive.cpc.SketchToStringUDF.class, p + SKETCH_TO_STRING);
+ registerUDF(org.apache.datasketches.hive.cpc.UnionSketchUDF.class, p + UNION_SKETCH1);
+ registerUDAF(org.apache.datasketches.hive.cpc.UnionSketchUDAF.class, p + UNION_SKETCH);
+ }
+
+ private void registerKll(String prefix) {
+ String p = prefix + "_kll_";
+ registerUDAF(org.apache.datasketches.hive.kll.DataToSketchUDAF.class, p + DATA_TO_SKETCH);
+ registerUDF(org.apache.datasketches.hive.kll.SketchToStringUDF.class, p + SKETCH_TO_STRING);
+ // registerUDF(org.apache.datasketches.hive.kll.UnionSketchUDF.class, p + UNION_SKETCH);
+ registerUDAF(org.apache.datasketches.hive.kll.UnionSketchUDAF.class, p + UNION_SKETCH);
+
+ registerUDF(org.apache.datasketches.hive.kll.GetNUDF.class, p + GET_N);
+ registerUDF(org.apache.datasketches.hive.kll.GetCdfUDF.class, p + GET_CDF);
+ registerUDF(org.apache.datasketches.hive.kll.GetPmfUDF.class, p + GET_PMF);
+ registerUDF(org.apache.datasketches.hive.kll.GetQuantilesUDF.class, p + GET_QUANTILES);
+ registerUDF(org.apache.datasketches.hive.kll.GetQuantileUDF.class, p + GET_QUANTILE);
+ registerUDF(org.apache.datasketches.hive.kll.GetRankUDF.class, p + GET_RANK);
+ }
+
+ private void registerTheta(String prefix) {
+ String p = prefix + "_theta_";
+ registerUDAF(org.apache.datasketches.hive.theta.DataToSketchUDAF.class, p + DATA_TO_SKETCH);
+ // FIXME: missing?
+ //registerUDF(org.apache.datasketches.hive.theta.SketchToStringUDF.class, p + SKETCH_TO_STRING);
+ registerUDF(org.apache.datasketches.hive.theta.UnionSketchUDF.class, p + UNION_SKETCH1);
+ registerUDAF(org.apache.datasketches.hive.theta.UnionSketchUDAF.class, p + UNION_SKETCH);
+ registerUDF(org.apache.datasketches.hive.theta.IntersectSketchUDF.class, p + INTERSECT_SKETCH1);
+ registerUDAF(org.apache.datasketches.hive.theta.IntersectSketchUDAF.class, p + INTERSECT_SKETCH);
+ registerUDF(org.apache.datasketches.hive.theta.EstimateSketchUDF.class, p + SKETCH_TO_ESTIMATE);
+ registerUDF(org.apache.datasketches.hive.theta.ExcludeSketchUDF.class, p + EXCLUDE_SKETCH);
+
+ }
+
+ private void registerTuple(String prefix) {
+ registerTupleArrayOfDoubles(prefix + "_tuple_arrayofdouble");
+ registerTupleDoubleSummary(prefix + "_tuple_doublesummary");
+ }
+
+ private void registerTupleArrayOfDoubles(String string) {
+ String p = string + "_";
+ registerUDAF(org.apache.datasketches.hive.tuple.DataToArrayOfDoublesSketchUDAF.class, p + DATA_TO_SKETCH);
+ // FIXME: missing?
+ //registerUDF(org.apache.datasketches.hive.theta.SketchToStringUDF.class, p + SKETCH_TO_STRING);
+ registerUDAF(org.apache.datasketches.hive.tuple.UnionArrayOfDoublesSketchUDAF.class, p + UNION_SKETCH);
+ registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchesTTestUDF.class, p + T_TEST);
+ registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToEstimatesUDF.class, p + SKETCH_TO_ESTIMATE);
+ registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToEstimateAndErrorBoundsUDF.class,
+ p + SKETCH_TO_ESTIMATE_WITH_ERROR_BOUNDS);
+ registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToMeansUDF.class, p + SKETCH_TO_MEANS);
+ registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToNumberOfRetainedEntriesUDF.class,
+ p + SKETCH_TO_NUMBER_OF_RETAINED_ENTRIES);
+ registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToQuantilesSketchUDF.class,
+ p + SKETCH_TO_QUANTILES_SKETCH);
+ registerUDTF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToValuesUDTF.class, p + SKETCH_TO_VALUES);
+ registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToVariancesUDF.class, p + SKETCH_TO_VARIANCES);
+ }
+
+ private void registerTupleDoubleSummary(String string) {
+ String p = string + "_";
+ registerUDAF(org.apache.datasketches.hive.tuple.DataToDoubleSummarySketchUDAF.class, p + DATA_TO_SKETCH);
+ // FIXME: missing?
+ //registerUDF(org.apache.datasketches.hive.theta.SketchToStringUDF.class, p + SKETCH_TO_STRING);
+ registerUDAF(org.apache.datasketches.hive.tuple.UnionDoubleSummarySketchUDAF.class, p + UNION_SKETCH);
+ registerUDF(org.apache.datasketches.hive.tuple.DoubleSummarySketchToEstimatesUDF.class, p + SKETCH_TO_ESTIMATE);
+ registerUDF(org.apache.datasketches.hive.tuple.DoubleSummarySketchToPercentileUDF.class, p + SKETCH_TO_PERCENTILE);
+ }
+
+ private void registerQuantiles(String prefix) {
+ registerQuantilesString(prefix + "_quantile");
+ registerQuantilesDoubles(prefix + "_quantile");
+ }
+
+ private void registerFrequencies(String prefix) {
+ String p = prefix + "_";
+ registerUDAF(org.apache.datasketches.hive.frequencies.DataToStringsSketchUDAF.class, p + DATA_TO_SKETCH);
+ // FIXME: missing?
+ //registerUDF(org.apache.datasketches.hive.frequencies.DoublesSketchToStringUDF.class, p + SKETCH_TO_STRING);
+ //registerUDF(org.apache.datasketches.hive.quantiles.UnionItemsSketchUDAF.class, p + UNION_SKETCH);
+ registerUDAF(org.apache.datasketches.hive.frequencies.UnionStringsSketchUDAF.class, p + UNION_SKETCH);
+ registerUDTF(org.apache.datasketches.hive.frequencies.GetFrequentItemsFromStringsSketchUDTF.class,
+ prefix + GET_FREQUENT_ITEMS);
+ }
+
+ private void registerQuantilesString(String prefix) {
+ String p = prefix + "_strings_";
+ registerUDAF(org.apache.datasketches.hive.quantiles.DataToStringsSketchUDAF.class, p + DATA_TO_SKETCH);
+ registerUDF(org.apache.datasketches.hive.quantiles.StringsSketchToStringUDF.class, p + SKETCH_TO_STRING);
+ //registerUDF(org.apache.datasketches.hive.quantiles.UnionItemsSketchUDAF.class, p + UNION_SKETCH);
+ registerUDAF(org.apache.datasketches.hive.quantiles.UnionStringsSketchUDAF.class, p + UNION_SKETCH);
+ registerUDF(org.apache.datasketches.hive.quantiles.GetNFromStringsSketchUDF.class, p + GET_N);
+ registerUDF(org.apache.datasketches.hive.quantiles.GetKFromStringsSketchUDF.class, p + GET_K);
+ registerUDF(org.apache.datasketches.hive.quantiles.GetCdfFromStringsSketchUDF.class, p + GET_CDF);
+ registerUDF(org.apache.datasketches.hive.quantiles.GetPmfFromStringsSketchUDF.class, p + GET_PMF);
+ registerUDF(org.apache.datasketches.hive.quantiles.GetQuantileFromStringsSketchUDF.class, p + GET_QUANTILE);
+ registerUDF(org.apache.datasketches.hive.quantiles.GetQuantilesFromStringsSketchUDF.class, p + GET_QUANTILES);
+ }
+
+ private void registerQuantilesDoubles(String prefix) {
+ String p = prefix + "_doubles_";
+ registerUDAF(org.apache.datasketches.hive.quantiles.DataToDoublesSketchUDAF.class, p + DATA_TO_SKETCH);
+ registerUDF(org.apache.datasketches.hive.quantiles.DoublesSketchToStringUDF.class, p + SKETCH_TO_STRING);
+ //registerUDF(org.apache.datasketches.hive.quantiles.UnionItemsSketchUDAF.class, p + UNION_SKETCH);
+ registerUDAF(org.apache.datasketches.hive.quantiles.UnionDoublesSketchUDAF.class, p + UNION_SKETCH);
+ registerUDF(org.apache.datasketches.hive.quantiles.GetNFromDoublesSketchUDF.class, p + GET_N);
+ registerUDF(org.apache.datasketches.hive.quantiles.GetKFromDoublesSketchUDF.class, p + GET_K);
+ registerUDF(org.apache.datasketches.hive.quantiles.GetCdfFromDoublesSketchUDF.class, p + GET_CDF);
+ registerUDF(org.apache.datasketches.hive.quantiles.GetPmfFromDoublesSketchUDF.class, p + GET_PMF);
+ registerUDF(org.apache.datasketches.hive.quantiles.GetQuantileFromDoublesSketchUDF.class, p + GET_QUANTILE);
+ registerUDF(org.apache.datasketches.hive.quantiles.GetQuantilesFromDoublesSketchUDF.class, p + GET_QUANTILES);
+ }
+
+ private void registerUDF(Class extends UDF> udfClass, String name) {
+ system.registerUDF(name, udfClass, false);
+ }
+
+ private void registerUDAF(Class extends GenericUDAFResolver2> udafClass, String name) {
+ try {
+ system.registerGenericUDAF(name, udafClass.newInstance());
+ } catch (InstantiationException | IllegalAccessException e) {
+ throw new RuntimeException("Unable to register: " + name, e);
+ }
+ }
+
+ private void registerUDTF(Class extends GenericUDTF> udtfClass, String name) {
+ system.registerGenericUDTF(name, udtfClass);
+ }
+
+ private String getUDFName(Class> clazz) {
+ Description desc = getDescription(clazz);
+ String name = desc.name().toLowerCase();
+ if (name == null || name == "") {
+ throw new RuntimeException("The UDF class (" + clazz.getName() + ") doesn't have a valid name");
+ }
+ return name;
+ }
+
+ private Description getDescription(Class> clazz) {
+ Description desc = clazz.getAnnotation(Description.class);
+ if (desc == null) {
+ throw new RuntimeException("no Description annotation on class: " + clazz.getName());
+ }
+ return desc;
+ }
+
+}
diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
index db5ee8d..dc3781a 100644
--- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
+++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
@@ -140,8 +140,6 @@
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils;
import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils.PrimitiveGrouping;
import org.apache.hadoop.hive.serde2.typeinfo.HiveDecimalUtils;
-import org.apache.hadoop.hive.serde2.typeinfo.ListTypeInfo;
-import org.apache.hadoop.hive.serde2.typeinfo.MapTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
@@ -475,6 +473,7 @@
system.registerGenericUDAF("percentile_cont", new GenericUDAFPercentileCont());
system.registerGenericUDAF("percentile_disc", new GenericUDAFPercentileDisc());
+ DataSketchesFunctions.register(system);
// Generic UDFs
system.registerGenericUDF("reflect", GenericUDFReflect.class);
diff --git ql/src/test/queries/clientpositive/sketches_hll.q ql/src/test/queries/clientpositive/sketches_hll.q
new file mode 100644
index 0000000..84408e9
--- /dev/null
+++ ql/src/test/queries/clientpositive/sketches_hll.q
@@ -0,0 +1,16 @@
+-- prepare input data
+create temporary table sketch_input (id int, category char(1));
+insert into table sketch_input values
+ (1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'),
+ (6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b');
+
+-- build sketches per category
+create temporary table sketch_intermediate (category char(1), sketch binary);
+insert into sketch_intermediate select category, ds_hll_dataToSketch(id) from sketch_input group by category;
+
+-- get unique count estimates per category
+select category, ds_hll_sketchToEstimate(sketch) from sketch_intermediate;
+
+
+-- union sketches across categories and get overall unique count estimate
+select ds_hll_sketchToEstimate(ds_hll_unionSketch(sketch)) from sketch_intermediate;
diff --git ql/src/test/queries/clientpositive/sketches_theta.q ql/src/test/queries/clientpositive/sketches_theta.q
new file mode 100644
index 0000000..4c2124c
--- /dev/null
+++ ql/src/test/queries/clientpositive/sketches_theta.q
@@ -0,0 +1,33 @@
+-- see here: https://datasketches.apache.org/docs/Theta/ThetaHiveUDFs.html
+
+create temporary table theta_input (id int, category char(1));
+insert into table theta_input values
+ (1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'),
+ (6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b');
+
+create temporary table sketch_intermediate (category char(1), sketch binary);
+insert into sketch_intermediate select category, ds_theta_datatosketch(id) from theta_input group by category;
+
+select category, ds_theta_sketchtoestimate(sketch) from sketch_intermediate;
+
+select ds_theta_sketchtoestimate(ds_theta_unionSketch(sketch)) from sketch_intermediate;
+
+
+
+create temporary table sketch_input (id1 int, id2 int);
+insert into table sketch_input values
+ (1, 2), (2, 4), (3, 6), (4, 8), (5, 10), (6, 12), (7, 14), (8, 16), (9, 18), (10, 20);
+
+create temporary table sketch_intermediate2 (sketch1 binary, sketch2 binary);
+
+insert into sketch_intermediate2 select ds_theta_datatosketch(id1), ds_theta_datatosketch(id2) from sketch_input;
+
+select
+ ds_theta_sketchtoestimate(sketch1),
+ ds_theta_sketchtoestimate(sketch2),
+ ds_theta_sketchtoestimate(ds_theta_unionSketch1(sketch1, sketch2)),
+ ds_theta_sketchtoestimate(ds_theta_intersect(sketch1, sketch2)),
+ ds_theta_sketchtoestimate(ds_theta_exclude(sketch1, sketch2)),
+ ds_theta_sketchtoestimate(ds_theta_exclude(sketch2, sketch1))
+from sketch_intermediate2;
+
diff --git ql/src/test/results/clientpositive/llap/sketches_hll.q.out ql/src/test/results/clientpositive/llap/sketches_hll.q.out
new file mode 100644
index 0000000..6512154
--- /dev/null
+++ ql/src/test/results/clientpositive/llap/sketches_hll.q.out
@@ -0,0 +1,59 @@
+PREHOOK: query: create temporary table sketch_input (id int, category char(1))
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@sketch_input
+POSTHOOK: query: create temporary table sketch_input (id int, category char(1))
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@sketch_input
+PREHOOK: query: insert into table sketch_input values
+ (1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'),
+ (6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@sketch_input
+POSTHOOK: query: insert into table sketch_input values
+ (1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'),
+ (6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@sketch_input
+POSTHOOK: Lineage: sketch_input.category SCRIPT []
+POSTHOOK: Lineage: sketch_input.id SCRIPT []
+PREHOOK: query: create temporary table sketch_intermediate (category char(1), sketch binary)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@sketch_intermediate
+POSTHOOK: query: create temporary table sketch_intermediate (category char(1), sketch binary)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@sketch_intermediate
+PREHOOK: query: insert into sketch_intermediate select category, ds_hll_dataToSketch(id) from sketch_input group by category
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sketch_input
+PREHOOK: Output: default@sketch_intermediate
+POSTHOOK: query: insert into sketch_intermediate select category, ds_hll_dataToSketch(id) from sketch_input group by category
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sketch_input
+POSTHOOK: Output: default@sketch_intermediate
+POSTHOOK: Lineage: sketch_intermediate.category SIMPLE [(sketch_input)sketch_input.FieldSchema(name:category, type:char(1), comment:null), ]
+POSTHOOK: Lineage: sketch_intermediate.sketch EXPRESSION [(sketch_input)sketch_input.FieldSchema(name:id, type:int, comment:null), ]
+PREHOOK: query: select category, ds_hll_sketchToEstimate(sketch) from sketch_intermediate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sketch_intermediate
+#### A masked pattern was here ####
+POSTHOOK: query: select category, ds_hll_sketchToEstimate(sketch) from sketch_intermediate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sketch_intermediate
+#### A masked pattern was here ####
+a 10.000000223517425
+b 10.000000223517425
+PREHOOK: query: select ds_hll_sketchToEstimate(ds_hll_unionSketch(sketch)) from sketch_intermediate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sketch_intermediate
+#### A masked pattern was here ####
+POSTHOOK: query: select ds_hll_sketchToEstimate(ds_hll_unionSketch(sketch)) from sketch_intermediate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sketch_intermediate
+#### A masked pattern was here ####
+15.000000521540663
diff --git ql/src/test/results/clientpositive/llap/sketches_theta.q.out ql/src/test/results/clientpositive/llap/sketches_theta.q.out
new file mode 100644
index 0000000..7916e5d
--- /dev/null
+++ ql/src/test/results/clientpositive/llap/sketches_theta.q.out
@@ -0,0 +1,120 @@
+PREHOOK: query: create temporary table theta_input (id int, category char(1))
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@theta_input
+POSTHOOK: query: create temporary table theta_input (id int, category char(1))
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@theta_input
+PREHOOK: query: insert into table theta_input values
+ (1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'),
+ (6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@theta_input
+POSTHOOK: query: insert into table theta_input values
+ (1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'),
+ (6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@theta_input
+POSTHOOK: Lineage: theta_input.category SCRIPT []
+POSTHOOK: Lineage: theta_input.id SCRIPT []
+PREHOOK: query: create temporary table sketch_intermediate (category char(1), sketch binary)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@sketch_intermediate
+POSTHOOK: query: create temporary table sketch_intermediate (category char(1), sketch binary)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@sketch_intermediate
+PREHOOK: query: insert into sketch_intermediate select category, ds_theta_datatosketch(id) from theta_input group by category
+PREHOOK: type: QUERY
+PREHOOK: Input: default@theta_input
+PREHOOK: Output: default@sketch_intermediate
+POSTHOOK: query: insert into sketch_intermediate select category, ds_theta_datatosketch(id) from theta_input group by category
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@theta_input
+POSTHOOK: Output: default@sketch_intermediate
+POSTHOOK: Lineage: sketch_intermediate.category SIMPLE [(theta_input)theta_input.FieldSchema(name:category, type:char(1), comment:null), ]
+POSTHOOK: Lineage: sketch_intermediate.sketch EXPRESSION [(theta_input)theta_input.FieldSchema(name:id, type:int, comment:null), ]
+PREHOOK: query: select category, ds_theta_sketchtoestimate(sketch) from sketch_intermediate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sketch_intermediate
+#### A masked pattern was here ####
+POSTHOOK: query: select category, ds_theta_sketchtoestimate(sketch) from sketch_intermediate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sketch_intermediate
+#### A masked pattern was here ####
+a 10.0
+b 10.0
+PREHOOK: query: select ds_theta_sketchtoestimate(ds_theta_unionSketch(sketch)) from sketch_intermediate
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sketch_intermediate
+#### A masked pattern was here ####
+POSTHOOK: query: select ds_theta_sketchtoestimate(ds_theta_unionSketch(sketch)) from sketch_intermediate
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sketch_intermediate
+#### A masked pattern was here ####
+15.0
+PREHOOK: query: create temporary table sketch_input (id1 int, id2 int)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@sketch_input
+POSTHOOK: query: create temporary table sketch_input (id1 int, id2 int)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@sketch_input
+PREHOOK: query: insert into table sketch_input values
+ (1, 2), (2, 4), (3, 6), (4, 8), (5, 10), (6, 12), (7, 14), (8, 16), (9, 18), (10, 20)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@sketch_input
+POSTHOOK: query: insert into table sketch_input values
+ (1, 2), (2, 4), (3, 6), (4, 8), (5, 10), (6, 12), (7, 14), (8, 16), (9, 18), (10, 20)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@sketch_input
+POSTHOOK: Lineage: sketch_input.id1 SCRIPT []
+POSTHOOK: Lineage: sketch_input.id2 SCRIPT []
+PREHOOK: query: create temporary table sketch_intermediate2 (sketch1 binary, sketch2 binary)
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@sketch_intermediate2
+POSTHOOK: query: create temporary table sketch_intermediate2 (sketch1 binary, sketch2 binary)
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@sketch_intermediate2
+PREHOOK: query: insert into sketch_intermediate2 select ds_theta_datatosketch(id1), ds_theta_datatosketch(id2) from sketch_input
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sketch_input
+PREHOOK: Output: default@sketch_intermediate2
+POSTHOOK: query: insert into sketch_intermediate2 select ds_theta_datatosketch(id1), ds_theta_datatosketch(id2) from sketch_input
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sketch_input
+POSTHOOK: Output: default@sketch_intermediate2
+POSTHOOK: Lineage: sketch_intermediate2.sketch1 EXPRESSION [(sketch_input)sketch_input.FieldSchema(name:id1, type:int, comment:null), ]
+POSTHOOK: Lineage: sketch_intermediate2.sketch2 EXPRESSION [(sketch_input)sketch_input.FieldSchema(name:id2, type:int, comment:null), ]
+PREHOOK: query: select
+ ds_theta_sketchtoestimate(sketch1),
+ ds_theta_sketchtoestimate(sketch2),
+ ds_theta_sketchtoestimate(ds_theta_unionSketch1(sketch1, sketch2)),
+ ds_theta_sketchtoestimate(ds_theta_intersect(sketch1, sketch2)),
+ ds_theta_sketchtoestimate(ds_theta_exclude(sketch1, sketch2)),
+ ds_theta_sketchtoestimate(ds_theta_exclude(sketch2, sketch1))
+from sketch_intermediate2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@sketch_intermediate2
+#### A masked pattern was here ####
+POSTHOOK: query: select
+ ds_theta_sketchtoestimate(sketch1),
+ ds_theta_sketchtoestimate(sketch2),
+ ds_theta_sketchtoestimate(ds_theta_unionSketch1(sketch1, sketch2)),
+ ds_theta_sketchtoestimate(ds_theta_intersect(sketch1, sketch2)),
+ ds_theta_sketchtoestimate(ds_theta_exclude(sketch1, sketch2)),
+ ds_theta_sketchtoestimate(ds_theta_exclude(sketch2, sketch1))
+from sketch_intermediate2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@sketch_intermediate2
+#### A masked pattern was here ####
+10.0 10.0 15.0 5.0 5.0 5.0