diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 3510016..94afef5 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -820,6 +820,7 @@ schq_ingest.q,\ sketches_hll.q,\ sketches_theta.q,\ + sketches_materialized_view_rollup.q,\ table_access_keys_stats.q,\ temp_table_llap_partitioned.q,\ tez_bmj_schema_evolution.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/DataSketchesFunctions.java ql/src/java/org/apache/hadoop/hive/ql/exec/DataSketchesFunctions.java index b9d265f..eec90c6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/DataSketchesFunctions.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/DataSketchesFunctions.java @@ -18,15 +18,35 @@ package org.apache.hadoop.hive.ql.exec; +import java.util.ArrayList; +import java.util.Collection; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import org.apache.calcite.rel.type.RelDataTypeImpl; +import org.apache.calcite.rel.type.RelProtoDataType; +import org.apache.calcite.sql.SqlFunction; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.type.InferTypes; +import org.apache.calcite.sql.type.OperandTypes; +import org.apache.calcite.sql.type.ReturnTypes; +import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveMergeableAggregate; import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFResolver2; import org.apache.hadoop.hive.ql.udf.generic.GenericUDTF; +import org.apache.hive.plugin.api.HiveUDFPlugin; /** * Registers functions from the DataSketches library as builtin functions. * * In an effort to show a more consistent */ -public class DataSketchesFunctions { +public final class DataSketchesFunctions implements HiveUDFPlugin { + + public static final DataSketchesFunctions INSTANCE = new DataSketchesFunctions(); + + private static final String DATASKETCHES_PREFIX = "ds"; private static final String DATA_TO_SKETCH = "sketch"; private static final String SKETCH_TO_ESTIMATE_WITH_ERROR_BOUNDS = "estimate_bounds"; @@ -53,169 +73,276 @@ private static final String SKETCH_TO_VARIANCES = "variances"; private static final String SKETCH_TO_PERCENTILE = "percentile"; - private final Registry system; + private final List sketchClasses; + private final ArrayList descriptors; - public DataSketchesFunctions(Registry system) { - this.system = system; + private DataSketchesFunctions() { + this.sketchClasses = new ArrayList(); + this.descriptors = new ArrayList(); + registerHll(); + registerCpc(); + registerKll(); + registerTheta(); + registerTuple(); + registerQuantiles(); + registerFrequencies(); + + buildCalciteFns(); + buildDescritors(); } - public static void register(Registry system) { - DataSketchesFunctions dsf = new DataSketchesFunctions(system); - String prefix = "ds"; - dsf.registerHll(prefix); - dsf.registerCpc(prefix); - dsf.registerKll(prefix); - dsf.registerTheta(prefix); - dsf.registerTuple(prefix); - dsf.registerQuantiles(prefix); - dsf.registerFrequencies(prefix); + @Override + public Iterable getDescriptors() { + return descriptors; } - private void registerHll(String prefix) { - String p = prefix + "_hll_"; - registerUDAF(org.apache.datasketches.hive.hll.DataToSketchUDAF.class, p + DATA_TO_SKETCH); - registerUDF(org.apache.datasketches.hive.hll.SketchToEstimateAndErrorBoundsUDF.class, - p + SKETCH_TO_ESTIMATE_WITH_ERROR_BOUNDS); - registerUDF(org.apache.datasketches.hive.hll.SketchToEstimateUDF.class, p + SKETCH_TO_ESTIMATE); - registerUDF(org.apache.datasketches.hive.hll.SketchToStringUDF.class, p + SKETCH_TO_STRING); - registerUDF(org.apache.datasketches.hive.hll.UnionSketchUDF.class, p + UNION_SKETCH1); - registerUDAF(org.apache.datasketches.hive.hll.UnionSketchUDAF.class, p + UNION_SKETCH); - } - - private void registerCpc(String prefix) { - String p = prefix + "_cpc_"; - registerUDAF(org.apache.datasketches.hive.cpc.DataToSketchUDAF.class, p + DATA_TO_SKETCH); - // FIXME: normalize GetEstimateAndErrorBoundsUDF vs SketchToEstimateAndErrorBoundsUDF - registerUDF(org.apache.datasketches.hive.cpc.GetEstimateAndErrorBoundsUDF.class, - p + SKETCH_TO_ESTIMATE_WITH_ERROR_BOUNDS); - // FIXME: normalize GetEstimateUDF vs SketchToEstimateUDF - registerUDF(org.apache.datasketches.hive.cpc.GetEstimateUDF.class, p + SKETCH_TO_ESTIMATE); - registerUDF(org.apache.datasketches.hive.cpc.SketchToStringUDF.class, p + SKETCH_TO_STRING); - registerUDF(org.apache.datasketches.hive.cpc.UnionSketchUDF.class, p + UNION_SKETCH1); - registerUDAF(org.apache.datasketches.hive.cpc.UnionSketchUDAF.class, p + UNION_SKETCH); - } - - private void registerKll(String prefix) { - String p = prefix + "_kll_"; - registerUDAF(org.apache.datasketches.hive.kll.DataToSketchUDAF.class, p + DATA_TO_SKETCH); - registerUDF(org.apache.datasketches.hive.kll.SketchToStringUDF.class, p + SKETCH_TO_STRING); - // registerUDF(org.apache.datasketches.hive.kll.UnionSketchUDF.class, p + UNION_SKETCH); - registerUDAF(org.apache.datasketches.hive.kll.UnionSketchUDAF.class, p + UNION_SKETCH); - - registerUDF(org.apache.datasketches.hive.kll.GetNUDF.class, p + GET_N); - registerUDF(org.apache.datasketches.hive.kll.GetCdfUDF.class, p + GET_CDF); - registerUDF(org.apache.datasketches.hive.kll.GetPmfUDF.class, p + GET_PMF); - registerUDF(org.apache.datasketches.hive.kll.GetQuantilesUDF.class, p + GET_QUANTILES); - registerUDF(org.apache.datasketches.hive.kll.GetQuantileUDF.class, p + GET_QUANTILE); - registerUDF(org.apache.datasketches.hive.kll.GetRankUDF.class, p + GET_RANK); - } - - private void registerTheta(String prefix) { - String p = prefix + "_theta_"; - registerUDAF(org.apache.datasketches.hive.theta.DataToSketchUDAF.class, p + DATA_TO_SKETCH); - // FIXME: missing? - //registerUDF(org.apache.datasketches.hive.theta.SketchToStringUDF.class, p + SKETCH_TO_STRING); - registerUDF(org.apache.datasketches.hive.theta.UnionSketchUDF.class, p + UNION_SKETCH1); - registerUDAF(org.apache.datasketches.hive.theta.UnionSketchUDAF.class, p + UNION_SKETCH); - registerUDF(org.apache.datasketches.hive.theta.IntersectSketchUDF.class, p + INTERSECT_SKETCH1); - registerUDAF(org.apache.datasketches.hive.theta.IntersectSketchUDAF.class, p + INTERSECT_SKETCH); - registerUDF(org.apache.datasketches.hive.theta.EstimateSketchUDF.class, p + SKETCH_TO_ESTIMATE); - registerUDF(org.apache.datasketches.hive.theta.ExcludeSketchUDF.class, p + EXCLUDE_SKETCH); - - } - - private void registerTuple(String prefix) { - registerTupleArrayOfDoubles(prefix + "_tuple_arrayofdouble"); - registerTupleDoubleSummary(prefix + "_tuple_doublesummary"); - } - - private void registerTupleArrayOfDoubles(String string) { - String p = string + "_"; - registerUDAF(org.apache.datasketches.hive.tuple.DataToArrayOfDoublesSketchUDAF.class, p + DATA_TO_SKETCH); - // FIXME: missing? - //registerUDF(org.apache.datasketches.hive.theta.SketchToStringUDF.class, p + SKETCH_TO_STRING); - registerUDAF(org.apache.datasketches.hive.tuple.UnionArrayOfDoublesSketchUDAF.class, p + UNION_SKETCH); - registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchesTTestUDF.class, p + T_TEST); - registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToEstimatesUDF.class, p + SKETCH_TO_ESTIMATE); - registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToEstimateAndErrorBoundsUDF.class, - p + SKETCH_TO_ESTIMATE_WITH_ERROR_BOUNDS); - registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToMeansUDF.class, p + SKETCH_TO_MEANS); - registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToNumberOfRetainedEntriesUDF.class, - p + SKETCH_TO_NUMBER_OF_RETAINED_ENTRIES); - registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToQuantilesSketchUDF.class, - p + SKETCH_TO_QUANTILES_SKETCH); - registerUDTF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToValuesUDTF.class, p + SKETCH_TO_VALUES); - registerUDF(org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToVariancesUDF.class, p + SKETCH_TO_VARIANCES); - } - - private void registerTupleDoubleSummary(String string) { - String p = string + "_"; - registerUDAF(org.apache.datasketches.hive.tuple.DataToDoubleSummarySketchUDAF.class, p + DATA_TO_SKETCH); - // FIXME: missing? - //registerUDF(org.apache.datasketches.hive.theta.SketchToStringUDF.class, p + SKETCH_TO_STRING); - registerUDAF(org.apache.datasketches.hive.tuple.UnionDoubleSummarySketchUDAF.class, p + UNION_SKETCH); - registerUDF(org.apache.datasketches.hive.tuple.DoubleSummarySketchToEstimatesUDF.class, p + SKETCH_TO_ESTIMATE); - registerUDF(org.apache.datasketches.hive.tuple.DoubleSummarySketchToPercentileUDF.class, p + SKETCH_TO_PERCENTILE); - } - - private void registerQuantiles(String prefix) { - registerQuantilesString(prefix + "_quantile"); - registerQuantilesDoubles(prefix + "_quantile"); - } - - private void registerFrequencies(String prefix) { - String p = prefix + "_freq_"; - registerUDAF(org.apache.datasketches.hive.frequencies.DataToStringsSketchUDAF.class, p + DATA_TO_SKETCH); - // FIXME: missing? - //registerUDF(org.apache.datasketches.hive.frequencies.DoublesSketchToStringUDF.class, p + SKETCH_TO_STRING); - //registerUDF(org.apache.datasketches.hive.quantiles.UnionItemsSketchUDAF.class, p + UNION_SKETCH); - registerUDAF(org.apache.datasketches.hive.frequencies.UnionStringsSketchUDAF.class, p + UNION_SKETCH); - registerUDTF(org.apache.datasketches.hive.frequencies.GetFrequentItemsFromStringsSketchUDTF.class, - p + GET_FREQUENT_ITEMS); - } - - private void registerQuantilesString(String prefix) { - String p = prefix + "_strings_"; - registerUDAF(org.apache.datasketches.hive.quantiles.DataToStringsSketchUDAF.class, p + DATA_TO_SKETCH); - registerUDF(org.apache.datasketches.hive.quantiles.StringsSketchToStringUDF.class, p + SKETCH_TO_STRING); - //registerUDF(org.apache.datasketches.hive.quantiles.UnionItemsSketchUDAF.class, p + UNION_SKETCH); - registerUDAF(org.apache.datasketches.hive.quantiles.UnionStringsSketchUDAF.class, p + UNION_SKETCH); - registerUDF(org.apache.datasketches.hive.quantiles.GetNFromStringsSketchUDF.class, p + GET_N); - registerUDF(org.apache.datasketches.hive.quantiles.GetKFromStringsSketchUDF.class, p + GET_K); - registerUDF(org.apache.datasketches.hive.quantiles.GetCdfFromStringsSketchUDF.class, p + GET_CDF); - registerUDF(org.apache.datasketches.hive.quantiles.GetPmfFromStringsSketchUDF.class, p + GET_PMF); - registerUDF(org.apache.datasketches.hive.quantiles.GetQuantileFromStringsSketchUDF.class, p + GET_QUANTILE); - registerUDF(org.apache.datasketches.hive.quantiles.GetQuantilesFromStringsSketchUDF.class, p + GET_QUANTILES); - } - - private void registerQuantilesDoubles(String prefix) { - String p = prefix + "_doubles_"; - registerUDAF(org.apache.datasketches.hive.quantiles.DataToDoublesSketchUDAF.class, p + DATA_TO_SKETCH); - registerUDF(org.apache.datasketches.hive.quantiles.DoublesSketchToStringUDF.class, p + SKETCH_TO_STRING); - //registerUDF(org.apache.datasketches.hive.quantiles.UnionItemsSketchUDAF.class, p + UNION_SKETCH); - registerUDAF(org.apache.datasketches.hive.quantiles.UnionDoublesSketchUDAF.class, p + UNION_SKETCH); - registerUDF(org.apache.datasketches.hive.quantiles.GetNFromDoublesSketchUDF.class, p + GET_N); - registerUDF(org.apache.datasketches.hive.quantiles.GetKFromDoublesSketchUDF.class, p + GET_K); - registerUDF(org.apache.datasketches.hive.quantiles.GetCdfFromDoublesSketchUDF.class, p + GET_CDF); - registerUDF(org.apache.datasketches.hive.quantiles.GetPmfFromDoublesSketchUDF.class, p + GET_PMF); - registerUDF(org.apache.datasketches.hive.quantiles.GetQuantileFromDoublesSketchUDF.class, p + GET_QUANTILE); - registerUDF(org.apache.datasketches.hive.quantiles.GetQuantilesFromDoublesSketchUDF.class, p + GET_QUANTILES); - } - - private void registerUDF(Class udfClass, String name) { - system.registerUDF(name, udfClass, false); - } - - private void registerUDAF(Class udafClass, String name) { - try { - system.registerGenericUDAF(name, udafClass.newInstance()); - } catch (InstantiationException | IllegalAccessException e) { - throw new RuntimeException("Unable to register: " + name, e); + private void buildDescritors() { + for (SketchDescriptor sketchDescriptor : sketchClasses) { + descriptors.addAll(sketchDescriptor.fnMap.values()); } } - private void registerUDTF(Class udtfClass, String name) { - system.registerGenericUDTF(name, udtfClass); + private void buildCalciteFns() { + for (SketchDescriptor sd : sketchClasses) { + // Mergability is exposed to Calcite; which enables to use it during rollup. + RelProtoDataType sketchType = RelDataTypeImpl.proto(SqlTypeName.BINARY, true); + + SketchFunctionDescriptor sketchSFD = sd.fnMap.get(DATA_TO_SKETCH); + SketchFunctionDescriptor unionSFD = sd.fnMap.get(UNION_SKETCH); + + if (sketchSFD == null || unionSFD == null) { + continue; + } + + HiveMergeableAggregate unionFn = new HiveMergeableAggregate(unionSFD.name, + SqlKind.OTHER_FUNCTION, + ReturnTypes.explicit(sketchType), + InferTypes.ANY_NULLABLE, + OperandTypes.family(), + null); + + HiveMergeableAggregate sketchFn = new HiveMergeableAggregate(sketchSFD.name, + SqlKind.OTHER_FUNCTION, + ReturnTypes.explicit(sketchType), + InferTypes.ANY_NULLABLE, + OperandTypes.family(), + unionFn); + + unionSFD.setCalciteFunction(unionFn); + sketchSFD.setCalciteFunction(sketchFn); + } + } + + + private void registerHiveFunctionsInternal(Registry system) { + for (SketchDescriptor sketchDescriptor : sketchClasses) { + Collection functions = sketchDescriptor.fnMap.values(); + for (SketchFunctionDescriptor fn : functions) { + if (UDF.class.isAssignableFrom(fn.udfClass)) { + system.registerUDF(fn.name, (Class) fn.udfClass, false); + continue; + } + if (GenericUDAFResolver2.class.isAssignableFrom(fn.udfClass)) { + String name = fn.name; + try { + system.registerGenericUDAF(name, ((Class) fn.udfClass).newInstance()); + } catch (InstantiationException | IllegalAccessException e) { + throw new RuntimeException("Unable to register: " + name, e); + } + continue; + } + if (GenericUDTF.class.isAssignableFrom(fn.udfClass)) { + system.registerGenericUDTF(fn.name, (Class) fn.udfClass); + continue; + } + throw new RuntimeException("Don't know how to register: " + fn.name); + } + } + + } + + private static class SketchFunctionDescriptor implements HiveUDFPlugin.UDFDescriptor { + String name; + Class udfClass; + private SqlFunction calciteFunction; + + public SketchFunctionDescriptor(String name, Class udfClass) { + this.name = name; + this.udfClass = udfClass; + } + + @Override + public Class getUDFClass() { + return udfClass; + } + + @Override + public String getFunctionName() { + return name; + } + + @Override + public Optional getCalciteFunction() { + return Optional.ofNullable(calciteFunction); + } + + public void setCalciteFunction(SqlFunction calciteFunction) { + this.calciteFunction = calciteFunction; + } + } + + private static class SketchDescriptor { + Map fnMap; + private String functionPrefix; + + public SketchDescriptor(String string) { + fnMap = new HashMap(); + functionPrefix = DATASKETCHES_PREFIX + "_" + string + "_"; + } + + private void register(String name, Class clazz) { + fnMap.put(name, new SketchFunctionDescriptor(functionPrefix + name, clazz)); + } + } + + private void registerHll() { + SketchDescriptor sd = new SketchDescriptor("hll"); + sd.register(DATA_TO_SKETCH, org.apache.datasketches.hive.hll.DataToSketchUDAF.class); + sd.register(SKETCH_TO_ESTIMATE_WITH_ERROR_BOUNDS, + org.apache.datasketches.hive.hll.SketchToEstimateAndErrorBoundsUDF.class); + sd.register(SKETCH_TO_ESTIMATE, org.apache.datasketches.hive.hll.SketchToEstimateUDF.class); + sd.register(SKETCH_TO_STRING, org.apache.datasketches.hive.hll.SketchToStringUDF.class); + sd.register(UNION_SKETCH1, org.apache.datasketches.hive.hll.UnionSketchUDF.class); + sd.register(UNION_SKETCH, org.apache.datasketches.hive.hll.UnionSketchUDAF.class); + sketchClasses.add(sd); + } + + private void registerCpc() { + SketchDescriptor sd = new SketchDescriptor("cpc"); + sd.register(DATA_TO_SKETCH, org.apache.datasketches.hive.cpc.DataToSketchUDAF.class); + // FIXME: normalize GetEstimateAndErrorBoundsUDF vs SketchToEstimateAndErrorBoundsUDF + sd.register(SKETCH_TO_ESTIMATE_WITH_ERROR_BOUNDS, + org.apache.datasketches.hive.cpc.GetEstimateAndErrorBoundsUDF.class); + // FIXME: normalize GetEstimateUDF vs SketchToEstimateUDF + sd.register(SKETCH_TO_ESTIMATE, org.apache.datasketches.hive.cpc.GetEstimateUDF.class); + sd.register(SKETCH_TO_STRING, org.apache.datasketches.hive.cpc.SketchToStringUDF.class); + sd.register(UNION_SKETCH1, org.apache.datasketches.hive.cpc.UnionSketchUDF.class); + sd.register(UNION_SKETCH, org.apache.datasketches.hive.cpc.UnionSketchUDAF.class); + sketchClasses.add(sd); + } + + private void registerKll() { + SketchDescriptor sd = new SketchDescriptor("kll"); + sd.register(DATA_TO_SKETCH, org.apache.datasketches.hive.kll.DataToSketchUDAF.class); + sd.register(SKETCH_TO_STRING, org.apache.datasketches.hive.kll.SketchToStringUDF.class); + // registerUDF(org.apache.datasketches.hive.kll.UnionSketchUDF.class, p , UNION_SKETCH); + sd.register(UNION_SKETCH, org.apache.datasketches.hive.kll.UnionSketchUDAF.class); + + sd.register(GET_N, org.apache.datasketches.hive.kll.GetNUDF.class); + sd.register(GET_CDF, org.apache.datasketches.hive.kll.GetCdfUDF.class); + sd.register(GET_PMF, org.apache.datasketches.hive.kll.GetPmfUDF.class); + sd.register(GET_QUANTILES, org.apache.datasketches.hive.kll.GetQuantilesUDF.class); + sd.register(GET_QUANTILE, org.apache.datasketches.hive.kll.GetQuantileUDF.class); + sd.register(GET_RANK, org.apache.datasketches.hive.kll.GetRankUDF.class); + sketchClasses.add(sd); + } + + private void registerTheta() { + SketchDescriptor sd = new SketchDescriptor("theta"); + sd.register(DATA_TO_SKETCH, org.apache.datasketches.hive.theta.DataToSketchUDAF.class); + // FIXME: missing? + //registerUDF(org.apache.datasketches.hive.theta.SketchToStringUDF.class, p + SKETCH_TO_STRING); + sd.register(UNION_SKETCH1, org.apache.datasketches.hive.theta.UnionSketchUDF.class); + sd.register(UNION_SKETCH, org.apache.datasketches.hive.theta.UnionSketchUDAF.class); + sd.register(INTERSECT_SKETCH1, org.apache.datasketches.hive.theta.IntersectSketchUDF.class); + sd.register(INTERSECT_SKETCH, org.apache.datasketches.hive.theta.IntersectSketchUDAF.class); + sd.register(SKETCH_TO_ESTIMATE, org.apache.datasketches.hive.theta.EstimateSketchUDF.class); + sd.register(EXCLUDE_SKETCH, org.apache.datasketches.hive.theta.ExcludeSketchUDF.class); + sketchClasses.add(sd); + + } + + private void registerTuple() { + registerTupleArrayOfDoubles(); + registerTupleDoubleSummary(); + } + + private void registerTupleArrayOfDoubles() { + SketchDescriptor sd = new SketchDescriptor("tuple_arrayofdouble"); + sd.register(DATA_TO_SKETCH, org.apache.datasketches.hive.tuple.DataToArrayOfDoublesSketchUDAF.class); + // FIXME: missing? + //registerUDF(org.apache.datasketches.hive.theta.SketchToStringUDF.class, p , SKETCH_TO_STRING); + sd.register(UNION_SKETCH, org.apache.datasketches.hive.tuple.UnionArrayOfDoublesSketchUDAF.class); + sd.register(T_TEST, org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchesTTestUDF.class); + sd.register(SKETCH_TO_ESTIMATE, org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToEstimatesUDF.class); + sd.register(SKETCH_TO_ESTIMATE_WITH_ERROR_BOUNDS, + org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToEstimateAndErrorBoundsUDF.class); + sd.register(SKETCH_TO_MEANS, org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToMeansUDF.class); + sd.register(SKETCH_TO_NUMBER_OF_RETAINED_ENTRIES, + org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToNumberOfRetainedEntriesUDF.class); + sd.register(SKETCH_TO_QUANTILES_SKETCH, + org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToQuantilesSketchUDF.class); + sd.register(SKETCH_TO_VALUES, org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToValuesUDTF.class); + sd.register(SKETCH_TO_VARIANCES, org.apache.datasketches.hive.tuple.ArrayOfDoublesSketchToVariancesUDF.class); + sketchClasses.add(sd); + } + + private void registerTupleDoubleSummary() { + SketchDescriptor sd = new SketchDescriptor("tuple_doublesummary"); + sd.register(DATA_TO_SKETCH, org.apache.datasketches.hive.tuple.DataToDoubleSummarySketchUDAF.class); + //registerUDF(org.apache.datasketches.hive.theta.SketchToStringUDF.class, p + SKETCH_TO_STRING); + sd.register(UNION_SKETCH, org.apache.datasketches.hive.tuple.UnionDoubleSummarySketchUDAF.class); + sd.register(SKETCH_TO_ESTIMATE, org.apache.datasketches.hive.tuple.DoubleSummarySketchToEstimatesUDF.class); + sd.register(SKETCH_TO_PERCENTILE, org.apache.datasketches.hive.tuple.DoubleSummarySketchToPercentileUDF.class); + sketchClasses.add(sd); + } + + private void registerQuantiles() { + registerQuantilesString(); + registerQuantilesDoubles(); + } + + private void registerFrequencies() { + SketchDescriptor sd = new SketchDescriptor("freq"); + + sd.register(DATA_TO_SKETCH, org.apache.datasketches.hive.frequencies.DataToStringsSketchUDAF.class); + // FIXME: missing? + //registerUDF(org.apache.datasketches.hive.frequencies.DoublesSketchToStringUDF.class, p + SKETCH_TO_STRING); + //registerUDF(org.apache.datasketches.hive.quantiles.UnionItemsSketchUDAF.class, p + UNION_SKETCH); + sd.register(UNION_SKETCH, org.apache.datasketches.hive.frequencies.UnionStringsSketchUDAF.class); + sd.register(GET_FREQUENT_ITEMS, + org.apache.datasketches.hive.frequencies.GetFrequentItemsFromStringsSketchUDTF.class); + sketchClasses.add(sd); + } + + private void registerQuantilesString() { + SketchDescriptor sd = new SketchDescriptor("quantile_strings"); + sd.register(DATA_TO_SKETCH, org.apache.datasketches.hive.quantiles.DataToStringsSketchUDAF.class); + sd.register(SKETCH_TO_STRING, org.apache.datasketches.hive.quantiles.StringsSketchToStringUDF.class); + //registerUDF(org.apache.datasketches.hive.quantiles.UnionItemsSketchUDAF.class, p , UNION_SKETCH); + sd.register(UNION_SKETCH, org.apache.datasketches.hive.quantiles.UnionStringsSketchUDAF.class); + sd.register(GET_N, org.apache.datasketches.hive.quantiles.GetNFromStringsSketchUDF.class); + sd.register(GET_K, org.apache.datasketches.hive.quantiles.GetKFromStringsSketchUDF.class); + sd.register(GET_CDF, org.apache.datasketches.hive.quantiles.GetCdfFromStringsSketchUDF.class); + sd.register(GET_PMF, org.apache.datasketches.hive.quantiles.GetPmfFromStringsSketchUDF.class); + sd.register(GET_QUANTILE, org.apache.datasketches.hive.quantiles.GetQuantileFromStringsSketchUDF.class); + sd.register(GET_QUANTILES, org.apache.datasketches.hive.quantiles.GetQuantilesFromStringsSketchUDF.class); + sketchClasses.add(sd); + } + + private void registerQuantilesDoubles() { + SketchDescriptor sd = new SketchDescriptor("quantile_doubles"); + sd.register(DATA_TO_SKETCH, org.apache.datasketches.hive.quantiles.DataToDoublesSketchUDAF.class); + sd.register(SKETCH_TO_STRING, org.apache.datasketches.hive.quantiles.DoublesSketchToStringUDF.class); + //registerUDF(org.apache.datasketches.hive.quantiles.UnionItemsSketchUDAF.class, p , UNION_SKETCH); + sd.register(UNION_SKETCH, org.apache.datasketches.hive.quantiles.UnionDoublesSketchUDAF.class); + sd.register(GET_N, org.apache.datasketches.hive.quantiles.GetNFromDoublesSketchUDF.class); + sd.register(GET_K, org.apache.datasketches.hive.quantiles.GetKFromDoublesSketchUDF.class); + sd.register(GET_CDF, org.apache.datasketches.hive.quantiles.GetCdfFromDoublesSketchUDF.class); + sd.register(GET_PMF, org.apache.datasketches.hive.quantiles.GetPmfFromDoublesSketchUDF.class); + sd.register(GET_QUANTILE, org.apache.datasketches.hive.quantiles.GetQuantileFromDoublesSketchUDF.class); + sd.register(GET_QUANTILES, org.apache.datasketches.hive.quantiles.GetQuantilesFromDoublesSketchUDF.class); + sketchClasses.add(sd); } } diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java index dc3781a..b0c5862 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java @@ -473,7 +473,7 @@ system.registerGenericUDAF("percentile_cont", new GenericUDAFPercentileCont()); system.registerGenericUDAF("percentile_disc", new GenericUDAFPercentileDisc()); - DataSketchesFunctions.register(system); + system.registerUDFPlugin(DataSketchesFunctions.INSTANCE); // Generic UDFs system.registerGenericUDF("reflect", GenericUDFReflect.class); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/Registry.java ql/src/java/org/apache/hadoop/hive/ql/exec/Registry.java index 76dd66e..40e9e97 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/Registry.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/Registry.java @@ -50,6 +50,8 @@ import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hive.common.util.ReflectionUtil; +import org.apache.hive.plugin.api.HiveUDFPlugin; +import org.apache.hive.plugin.api.HiveUDFPlugin.UDFDescriptor; import java.io.IOException; import java.util.Collections; @@ -812,4 +814,28 @@ } return null; } + + public void registerUDFPlugin(HiveUDFPlugin instance) { + Iterable x = instance.getDescriptors(); + for (UDFDescriptor fn : x) { + if (UDF.class.isAssignableFrom(fn.getUDFClass())) { + registerUDF(fn.getFunctionName(), (Class) fn.getUDFClass(), false); + continue; + } + if (GenericUDAFResolver2.class.isAssignableFrom(fn.getUDFClass())) { + String name = fn.getFunctionName(); + try { + registerGenericUDAF(name, ((Class) fn.getUDFClass()).newInstance()); + } catch (InstantiationException | IllegalAccessException e) { + throw new RuntimeException("Unable to register: " + name, e); + } + continue; + } + if (GenericUDTF.class.isAssignableFrom(fn.getUDFClass())) { + registerGenericUDTF(fn.getFunctionName(), (Class) fn.getUDFClass()); + continue; + } + throw new RuntimeException("Don't know how to register: " + fn.getFunctionName()); + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelBuilder.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelBuilder.java index f50779d..184a026 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelBuilder.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/HiveRelBuilder.java @@ -35,6 +35,7 @@ import org.apache.calcite.tools.Frameworks; import org.apache.calcite.tools.RelBuilder; import org.apache.calcite.tools.RelBuilderFactory; +import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveMergeableAggregate; import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlCountAggFunction; import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlMinMaxAggFunction; import org.apache.hadoop.hive.ql.optimizer.calcite.functions.HiveSqlSumAggFunction; @@ -139,6 +140,10 @@ } public static SqlAggFunction getRollup(SqlAggFunction aggregation) { + if (aggregation instanceof HiveMergeableAggregate) { + HiveMergeableAggregate mAgg = (HiveMergeableAggregate) aggregation; + return mAgg.getMergeAggFunction(); + } if (aggregation instanceof HiveSqlSumAggFunction || aggregation instanceof HiveSqlMinMaxAggFunction || aggregation instanceof HiveSqlSumEmptyIsZeroAggFunction) { diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveMergeableAggregate.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveMergeableAggregate.java new file mode 100644 index 0000000..041345a --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveMergeableAggregate.java @@ -0,0 +1,66 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.optimizer.calcite.functions; + +import org.apache.calcite.sql.SqlAggFunction; +import org.apache.calcite.sql.SqlFunctionCategory; +import org.apache.calcite.sql.SqlKind; +import org.apache.calcite.sql.type.SqlOperandTypeChecker; +import org.apache.calcite.sql.type.SqlOperandTypeInference; +import org.apache.calcite.sql.type.SqlReturnTypeInference; + +/** + * Mergeable aggregate. + * + * A mergeable aggregate is: + * - accepts the same kind as inputs as the output (an X^n -> X function) + * + * Example: the SUM function is a great example; since SUM of SUM -s is the overall sum. + */ +public class HiveMergeableAggregate extends SqlAggFunction { + + private SqlAggFunction mergeAgg; + + public HiveMergeableAggregate(String string, SqlKind kind, SqlReturnTypeInference returnTypeInference, + SqlOperandTypeInference operandTypeInference, + SqlOperandTypeChecker operandTypeChecker) { + this(string, kind, returnTypeInference, operandTypeInference, operandTypeChecker, null); + } + + public HiveMergeableAggregate(String string, SqlKind kind, SqlReturnTypeInference returnTypeInference, + SqlOperandTypeInference operandTypeInference, SqlOperandTypeChecker operandTypeChecker, + SqlAggFunction unionFn) { + super( + string, kind, + returnTypeInference, + operandTypeInference, + operandTypeChecker, + SqlFunctionCategory.NUMERIC); + if (unionFn == null) { + this.mergeAgg = this; + } else { + this.mergeAgg = unionFn; + } + + } + + public SqlAggFunction getMergeAggFunction() { + return mergeAgg; + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlSumAggFunction.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlSumAggFunction.java index 468e6f8..974dab1 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlSumAggFunction.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlSumAggFunction.java @@ -125,5 +125,3 @@ } } } - -// End SqlSumAggFunction.java diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java index a555749..07ca87f 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java @@ -20,9 +20,10 @@ import java.lang.annotation.Annotation; import java.util.List; import java.util.Map; - +import java.util.Optional; import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.sql.SqlAggFunction; +import org.apache.calcite.sql.SqlFunction; import org.apache.calcite.sql.SqlFunctionCategory; import org.apache.calcite.sql.SqlKind; import org.apache.calcite.sql.SqlOperator; @@ -37,6 +38,7 @@ import org.apache.calcite.sql.type.SqlTypeFamily; import org.apache.calcite.util.Util; import org.apache.commons.lang3.StringUtils; +import org.apache.hadoop.hive.ql.exec.DataSketchesFunctions; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.FunctionInfo; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; @@ -78,6 +80,8 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; import org.apache.hadoop.hive.serde2.typeinfo.VarcharTypeInfo; +import org.apache.hive.plugin.api.HiveUDFPlugin; +import org.apache.hive.plugin.api.HiveUDFPlugin.UDFDescriptor; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -468,6 +472,19 @@ ); registerFunction("date_add", HiveDateAddSqlOperator.INSTANCE, hToken(HiveParser.Identifier, "date_add")); registerFunction("date_sub", HiveDateSubSqlOperator.INSTANCE, hToken(HiveParser.Identifier, "date_sub")); + + registerPlugin(DataSketchesFunctions.INSTANCE); + } + + private void registerPlugin(HiveUDFPlugin plugin) { + for (UDFDescriptor udfDesc : plugin.getDescriptors()) { + Optional calciteFunction = udfDesc.getCalciteFunction(); + if (calciteFunction.isPresent()) { + registerDuplicateFunction(udfDesc.getFunctionName(), calciteFunction.get(), + hToken(HiveParser.Identifier, udfDesc.getFunctionName())); + } + } + } private void registerFunction(String name, SqlOperator calciteFn, HiveToken hiveToken) { @@ -525,7 +542,7 @@ } private static CalciteUDFInfo getUDFInfo(String hiveUdfName, - ImmutableList calciteArgTypes, RelDataType calciteRetType) { + List calciteArgTypes, RelDataType calciteRetType) { CalciteUDFInfo udfInfo = new CalciteUDFInfo(); udfInfo.udfName = hiveUdfName; udfInfo.returnTypeInference = ReturnTypes.explicit(calciteRetType); diff --git ql/src/java/org/apache/hive/plugin/api/HiveUDFPlugin.java ql/src/java/org/apache/hive/plugin/api/HiveUDFPlugin.java new file mode 100644 index 0000000..41c198c --- /dev/null +++ ql/src/java/org/apache/hive/plugin/api/HiveUDFPlugin.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hive.plugin.api; + +import java.util.Optional; + +import org.apache.calcite.sql.SqlFunction; + +public interface HiveUDFPlugin { + + public interface UDFDescriptor { + Class getUDFClass(); + String getFunctionName(); + Optional getCalciteFunction(); + } + + Iterable getDescriptors(); + +} diff --git ql/src/test/queries/clientpositive/sketches_materialized_view_rollup.q ql/src/test/queries/clientpositive/sketches_materialized_view_rollup.q new file mode 100644 index 0000000..ab17a61 --- /dev/null +++ ql/src/test/queries/clientpositive/sketches_materialized_view_rollup.q @@ -0,0 +1,30 @@ + +set hive.support.concurrency=true; +set hive.txn.manager=org.apache.hadoop.hive.ql.lockmgr.DbTxnManager; +set hive.strict.checks.cartesian.product=false; +set hive.stats.fetch.column.stats=true; +set hive.materializedview.rewriting=true; +set hive.fetch.task.conversion=none; + +create table sketch_input (id int, category char(1)) +STORED AS ORC +TBLPROPERTIES ('transactional'='true'); + +insert into table sketch_input values + (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'), + (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b') +; + +-- create an mv for the intermediate results +create materialized view mv_1 as + select category, ds_hll_sketch(id),count(id) from sketch_input group by category; + +-- see if we use the mv +explain +select category, round(ds_hll_estimate(ds_hll_sketch(id))) from sketch_input group by category; +select category, round(ds_hll_estimate(ds_hll_sketch(id))) from sketch_input group by category; + +-- the mv should be used - the rollup should be possible +explain +select round(ds_hll_estimate(ds_hll_sketch(id))) from sketch_input; +select round(ds_hll_estimate(ds_hll_sketch(id))) from sketch_input; diff --git ql/src/test/results/clientpositive/llap/sketches_materialized_view_rollup.q.out ql/src/test/results/clientpositive/llap/sketches_materialized_view_rollup.q.out new file mode 100644 index 0000000..26a6761 --- /dev/null +++ ql/src/test/results/clientpositive/llap/sketches_materialized_view_rollup.q.out @@ -0,0 +1,179 @@ +PREHOOK: query: create table sketch_input (id int, category char(1)) +STORED AS ORC +TBLPROPERTIES ('transactional'='true') +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@sketch_input +POSTHOOK: query: create table sketch_input (id int, category char(1)) +STORED AS ORC +TBLPROPERTIES ('transactional'='true') +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@sketch_input +PREHOOK: query: insert into table sketch_input values + (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'), + (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@sketch_input +POSTHOOK: query: insert into table sketch_input values + (1,'a'),(1, 'a'), (2, 'a'), (3, 'a'), (4, 'a'), (5, 'a'), (6, 'a'), (7, 'a'), (8, 'a'), (9, 'a'), (10, 'a'), + (6,'b'),(6, 'b'), (7, 'b'), (8, 'b'), (9, 'b'), (10, 'b'), (11, 'b'), (12, 'b'), (13, 'b'), (14, 'b'), (15, 'b') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@sketch_input +POSTHOOK: Lineage: sketch_input.category SCRIPT [] +POSTHOOK: Lineage: sketch_input.id SCRIPT [] +PREHOOK: query: create materialized view mv_1 as + select category, ds_hll_sketch(id),count(id) from sketch_input group by category +PREHOOK: type: CREATE_MATERIALIZED_VIEW +PREHOOK: Input: default@sketch_input +PREHOOK: Output: database:default +PREHOOK: Output: default@mv_1 +POSTHOOK: query: create materialized view mv_1 as + select category, ds_hll_sketch(id),count(id) from sketch_input group by category +POSTHOOK: type: CREATE_MATERIALIZED_VIEW +POSTHOOK: Input: default@sketch_input +POSTHOOK: Output: database:default +POSTHOOK: Output: default@mv_1 +PREHOOK: query: explain +select category, round(ds_hll_estimate(ds_hll_sketch(id))) from sketch_input group by category +PREHOOK: type: QUERY +PREHOOK: Input: default@mv_1 +PREHOOK: Input: default@sketch_input +#### A masked pattern was here #### +POSTHOOK: query: explain +select category, round(ds_hll_estimate(ds_hll_sketch(id))) from sketch_input group by category +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv_1 +POSTHOOK: Input: default@sketch_input +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: default.mv_1 + Statistics: Num rows: 2 Data size: 362 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: category (type: char(1)), round(ds_hll_estimate(_c1)) (type: double) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 186 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: all inputs + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select category, round(ds_hll_estimate(ds_hll_sketch(id))) from sketch_input group by category +PREHOOK: type: QUERY +PREHOOK: Input: default@mv_1 +PREHOOK: Input: default@sketch_input +#### A masked pattern was here #### +POSTHOOK: query: select category, round(ds_hll_estimate(ds_hll_sketch(id))) from sketch_input group by category +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv_1 +POSTHOOK: Input: default@sketch_input +#### A masked pattern was here #### +a 10.0 +b 10.0 +PREHOOK: query: explain +select round(ds_hll_estimate(ds_hll_sketch(id))) from sketch_input +PREHOOK: type: QUERY +PREHOOK: Input: default@mv_1 +PREHOOK: Input: default@sketch_input +#### A masked pattern was here #### +POSTHOOK: query: explain +select round(ds_hll_estimate(ds_hll_sketch(id))) from sketch_input +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv_1 +POSTHOOK: Input: default@sketch_input +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Edges: + Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: default.mv_1 + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _c1 (type: binary) + outputColumnNames: _c1 + Statistics: Num rows: 2 Data size: 192 Basic stats: COMPLETE Column stats: COMPLETE + Group By Operator + aggregations: ds_hll_union(_c1) + minReductionHashAggr: 0.5 + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE + Reduce Output Operator + null sort order: + sort order: + Statistics: Num rows: 1 Data size: 388 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct) + Execution mode: llap + LLAP IO: all inputs + Reducer 2 + Execution mode: llap + Reduce Operator Tree: + Group By Operator + aggregations: ds_hll_union(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 144 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: round(ds_hll_estimate(_col0)) (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select round(ds_hll_estimate(ds_hll_sketch(id))) from sketch_input +PREHOOK: type: QUERY +PREHOOK: Input: default@mv_1 +PREHOOK: Input: default@sketch_input +#### A masked pattern was here #### +POSTHOOK: query: select round(ds_hll_estimate(ds_hll_sketch(id))) from sketch_input +POSTHOOK: type: QUERY +POSTHOOK: Input: default@mv_1 +POSTHOOK: Input: default@sketch_input +#### A masked pattern was here #### +15.0