diff --git a/data/files/windowing_distinct.txt b/data/files/windowing_distinct.txt new file mode 100644 index 0000000..9271202 --- /dev/null +++ b/data/files/windowing_distinct.txt @@ -0,0 +1,6 @@ +1|19|442|65553|4294967380|26.43|37.77|true|alice zipper|2013-03-01 09:11:58.703217|29.62|^Ahistory^B +2|124|336|65664|4294967435|74.72|42.47|true|bob davidson|2013-03-01 09:11:58.703302|45.40|^Ayard duty^B +1|19|442|65553|4294967380|26.43|37.77|true|alice zipper|2013-03-01 09:11:58.703217|29.62|^Ahistory^B +1|35|387|65619|4294967459|96.91|18.86|false|katie davidson|2013-03-01 09:11:58.703079|27.32|^Ahistory^B +2|111|372|65656|4294967312|13.01|34.95|false|xavier quirinius|2013-03-01 09:11:58.703310|23.91|^Atopology^B +2|124|336|65664|4294967435|74.72|42.47|true|bob davidson|2013-03-01 09:11:58.703302|45.40|^Ayard duty^B diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlCountAggFunction.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlCountAggFunction.java index 7937040..58191e5 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlCountAggFunction.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlCountAggFunction.java @@ -32,11 +32,12 @@ public class HiveSqlCountAggFunction extends SqlAggFunction { + final boolean isDistinct; final SqlReturnTypeInference returnTypeInference; final SqlOperandTypeInference operandTypeInference; final SqlOperandTypeChecker operandTypeChecker; - public HiveSqlCountAggFunction(SqlReturnTypeInference returnTypeInference, + public HiveSqlCountAggFunction(boolean isDistinct, SqlReturnTypeInference returnTypeInference, SqlOperandTypeInference operandTypeInference, SqlOperandTypeChecker operandTypeChecker) { super( "count", @@ -45,11 +46,16 @@ public HiveSqlCountAggFunction(SqlReturnTypeInference returnTypeInference, operandTypeInference, operandTypeChecker, SqlFunctionCategory.NUMERIC); + this.isDistinct = isDistinct; this.returnTypeInference = returnTypeInference; this.operandTypeChecker = operandTypeChecker; this.operandTypeInference = operandTypeInference; } + public boolean isDistinct() { + return isDistinct; + } + @Override public T unwrap(Class clazz) { if (clazz == SqlSplittableAggFunction.class) { @@ -64,7 +70,7 @@ public HiveSqlCountAggFunction(SqlReturnTypeInference returnTypeInference, public AggregateCall other(RelDataTypeFactory typeFactory, AggregateCall e) { return AggregateCall.create( - new HiveSqlCountAggFunction(returnTypeInference, operandTypeInference, operandTypeChecker), + new HiveSqlCountAggFunction(isDistinct, returnTypeInference, operandTypeInference, operandTypeChecker), false, ImmutableIntList.of(), -1, typeFactory.createTypeWithNullability(typeFactory.createSqlType(SqlTypeName.BIGINT), true), "count"); } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlSumAggFunction.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlSumAggFunction.java index 8f62970..056eaeb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlSumAggFunction.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/functions/HiveSqlSumAggFunction.java @@ -47,14 +47,14 @@ * is the same type. */ public class HiveSqlSumAggFunction extends SqlAggFunction { - + final boolean isDistinct; final SqlReturnTypeInference returnTypeInference; final SqlOperandTypeInference operandTypeInference; final SqlOperandTypeChecker operandTypeChecker; //~ Constructors ----------------------------------------------------------- - public HiveSqlSumAggFunction(SqlReturnTypeInference returnTypeInference, + public HiveSqlSumAggFunction(boolean isDistinct, SqlReturnTypeInference returnTypeInference, SqlOperandTypeInference operandTypeInference, SqlOperandTypeChecker operandTypeChecker) { super( "sum", @@ -66,6 +66,7 @@ public HiveSqlSumAggFunction(SqlReturnTypeInference returnTypeInference, this.returnTypeInference = returnTypeInference; this.operandTypeChecker = operandTypeChecker; this.operandTypeInference = operandTypeInference; + this.isDistinct = isDistinct; } //~ Methods ---------------------------------------------------------------- @@ -85,7 +86,7 @@ public HiveSqlSumAggFunction(SqlReturnTypeInference returnTypeInference, public AggregateCall other(RelDataTypeFactory typeFactory, AggregateCall e) { RelDataType countRetType = typeFactory.createTypeWithNullability(typeFactory.createSqlType(SqlTypeName.BIGINT), true); return AggregateCall.create( - new HiveSqlCountAggFunction(ReturnTypes.explicit(countRetType), operandTypeInference, operandTypeChecker), + new HiveSqlCountAggFunction(isDistinct, ReturnTypes.explicit(countRetType), operandTypeInference, operandTypeChecker), false, ImmutableIntList.of(), -1, countRetType, "count"); } @@ -116,7 +117,7 @@ public AggregateCall topSplit(RexBuilder rexBuilder, throw new AssertionError("unexpected count " + merges); } int ordinal = extra.register(node); - return AggregateCall.create(new HiveSqlSumAggFunction(returnTypeInference, operandTypeInference, operandTypeChecker), + return AggregateCall.create(new HiveSqlSumAggFunction(isDistinct, returnTypeInference, operandTypeInference, operandTypeChecker), false, ImmutableList.of(ordinal), -1, aggregateCall.type, aggregateCall.name); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java index e2fbb4f..1a543fb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/PlanModifierForASTConv.java @@ -379,7 +379,7 @@ private static void replaceEmptyGroupAggr(final RelNode rel, RelNode parent) { RelDataType longType = TypeConverter.convert(TypeInfoFactory.longTypeInfo, typeFactory); RelDataType intType = TypeConverter.convert(TypeInfoFactory.intTypeInfo, typeFactory); // Create the dummy aggregation. - SqlAggFunction countFn = SqlFunctionConverter.getCalciteAggFn("count", + SqlAggFunction countFn = SqlFunctionConverter.getCalciteAggFn("count", false, ImmutableList.of(intType), longType); // TODO: Using 0 might be wrong; might need to walk down to find the // proper index of a dummy. diff --git a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java index 37249f9..75c38fa 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/SqlFunctionConverter.java @@ -217,10 +217,18 @@ public static ASTNode buildAST(SqlOperator op, List children) { } else if (op.kind == SqlKind.PLUS_PREFIX) { node = (ASTNode) ParseDriver.adaptor.create(HiveParser.PLUS, "PLUS"); } else { - if (op.getName().toUpperCase().equals(SqlStdOperatorTable.COUNT.getName()) - && children.size() == 0) { - node = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_FUNCTIONSTAR, + // Handle 'COUNT' function for the case of COUNT(*) and COUNT(DISTINCT) + if (op instanceof HiveSqlCountAggFunction) { + if (children.size() == 0) { + node = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_FUNCTIONSTAR, "TOK_FUNCTIONSTAR"); + } else { + HiveSqlCountAggFunction countFunction = (HiveSqlCountAggFunction)op; + if (countFunction.isDistinct()) { + node = (ASTNode) ParseDriver.adaptor.create(HiveParser.TOK_FUNCTIONDI, + "TOK_FUNCTIONDI"); + } + } } node.addChild((ASTNode) ParseDriver.adaptor.create(HiveParser.Identifier, op.getName())); } @@ -416,33 +424,46 @@ public static SqlOperator getCalciteFn(String hiveUdfName, return calciteOp; } - public static SqlAggFunction getCalciteAggFn(String hiveUdfName, + public static SqlAggFunction getCalciteAggFn(String hiveUdfName, boolean isDistinct, ImmutableList calciteArgTypes, RelDataType calciteRetType) { SqlAggFunction calciteAggFn = (SqlAggFunction) hiveToCalcite.get(hiveUdfName); if (calciteAggFn == null) { - CalciteUDFInfo uInf = getUDFInfo(hiveUdfName, calciteArgTypes, calciteRetType); + CalciteUDFInfo udfInfo = getUDFInfo(hiveUdfName, calciteArgTypes, calciteRetType); switch (hiveUdfName.toLowerCase()) { case "sum": - calciteAggFn = new HiveSqlSumAggFunction(uInf.returnTypeInference, - uInf.operandTypeInference, uInf.operandTypeChecker); + calciteAggFn = new HiveSqlSumAggFunction( + isDistinct, + udfInfo.returnTypeInference, + udfInfo.operandTypeInference, + udfInfo.operandTypeChecker); break; case "count": - calciteAggFn = new HiveSqlCountAggFunction(uInf.returnTypeInference, - uInf.operandTypeInference, uInf.operandTypeChecker); + calciteAggFn = new HiveSqlCountAggFunction( + isDistinct, + udfInfo.returnTypeInference, + udfInfo.operandTypeInference, + udfInfo.operandTypeChecker); break; case "min": - calciteAggFn = new HiveSqlMinMaxAggFunction(uInf.returnTypeInference, - uInf.operandTypeInference, uInf.operandTypeChecker, true); + calciteAggFn = new HiveSqlMinMaxAggFunction( + udfInfo.returnTypeInference, + udfInfo.operandTypeInference, + udfInfo.operandTypeChecker, true); break; case "max": - calciteAggFn = new HiveSqlMinMaxAggFunction(uInf.returnTypeInference, - uInf.operandTypeInference, uInf.operandTypeChecker, false); + calciteAggFn = new HiveSqlMinMaxAggFunction( + udfInfo.returnTypeInference, + udfInfo.operandTypeInference, + udfInfo.operandTypeChecker, false); break; default: - calciteAggFn = new CalciteUDAF(uInf.udfName, uInf.returnTypeInference, - uInf.operandTypeInference, uInf.operandTypeChecker); + calciteAggFn = new CalciteUDAF( + udfInfo.udfName, + udfInfo.returnTypeInference, + udfInfo.operandTypeInference, + udfInfo.operandTypeChecker); break; } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java index 1c44ade..5aab7a8 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java @@ -1950,7 +1950,7 @@ private AggregateCall convertGBAgg(AggInfo agg, RelNode input, List gbC // 3. Get Aggregation FN from Calcite given name, ret type and input arg // type - final SqlAggFunction aggregation = SqlFunctionConverter.getCalciteAggFn(agg.m_udfName, + final SqlAggFunction aggregation = SqlFunctionConverter.getCalciteAggFn(agg.m_udfName, agg.m_distinct, aggArgRelDTBldr.build(), aggFnRetType); return new AggregateCall(aggregation, agg.m_distinct, argList, aggFnRetType, null); @@ -2646,7 +2646,7 @@ private int getWindowSpecIndx(ASTNode wndAST) { // 5. Get Calcite Agg Fn final SqlAggFunction calciteAggFn = SqlFunctionConverter.getCalciteAggFn( - hiveAggInfo.m_udfName, calciteAggFnArgsType, calciteAggFnRetType); + hiveAggInfo.m_udfName, hiveAggInfo.m_distinct, calciteAggFnArgsType, calciteAggFnRetType); // 6. Translate Window spec RowResolver inputRR = relToHiveRR.get(srcRel); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g index 15ca754..61bd10c 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/IdentifiersParser.g @@ -195,7 +195,7 @@ function RPAREN (KW_OVER ws=window_specification)? -> {$star != null}? ^(TOK_FUNCTIONSTAR functionName $ws?) -> {$dist == null}? ^(TOK_FUNCTION functionName (selectExpression+)? $ws?) - -> ^(TOK_FUNCTIONDI functionName (selectExpression+)?) + -> ^(TOK_FUNCTIONDI functionName (selectExpression+)? $ws?) ; functionName diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFInvocationSpec.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFInvocationSpec.java index 29b8510..a8980eb 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFInvocationSpec.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/PTFInvocationSpec.java @@ -411,6 +411,18 @@ public void setExpressions(ArrayList columns) this.expressions = columns; } + /** + * Add order expressions from the list of expressions in the format of ASTNode + * @param args + */ + public void addExpressions(ArrayList nodes) { + for (int i = 0; i < nodes.size(); i++) { + OrderExpression expr = new OrderExpression(); + expr.setExpression(nodes.get(i)); + addExpression(expr); + } + } + public void addExpression(OrderExpression c) { expressions = expressions == null ? new ArrayList() : expressions; @@ -500,7 +512,9 @@ public String toString() { Order order; - public OrderExpression() {} + public OrderExpression() { + order = Order.ASC; + } public OrderExpression(PartitionExpression peSpec) { diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java index a0251fb..7154152 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/SemanticAnalyzer.java @@ -11632,11 +11632,6 @@ private WindowFunctionSpec processWindowFunction(ASTNode node, ASTNode wsNode) break; } - if ( wfSpec.isDistinct() ) { - throw new SemanticException(generateErrorMessage(node, - "Count/Sum distinct not supported with Windowing")); - } - wfSpec.setExpression(node); ASTNode nameNode = (ASTNode) node.getChild(0); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/WindowingSpec.java b/ql/src/java/org/apache/hadoop/hive/ql/parse/WindowingSpec.java index a181f7c..1bfe8d9 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/parse/WindowingSpec.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/WindowingSpec.java @@ -122,6 +122,9 @@ public void validateAndMakeEffective() throws SemanticException { WindowFunctionSpec wFn = (WindowFunctionSpec) expr; WindowSpec wdwSpec = wFn.getWindowSpec(); + // 0. Precheck supported syntax + precheckSyntax(wFn, wdwSpec); + // 1. For Wdw Specs that refer to Window Defns, inherit missing components if ( wdwSpec != null ) { ArrayList sources = new ArrayList(); @@ -144,7 +147,15 @@ public void validateAndMakeEffective() throws SemanticException { validateWindowFrame(wdwSpec); // 5. Add the Partition expressions as the Order if there is no Order and validate Order spec. - setAndValidateOrderSpec(wdwSpec); + setAndValidateOrderSpec(wFn, wdwSpec); + } + } + + private void precheckSyntax(WindowFunctionSpec wFn, WindowSpec wdwSpec) throws SemanticException { + if (wdwSpec != null ) { + if (wFn.isDistinct && (wdwSpec.windowFrame != null || wdwSpec.getOrder() != null) ) { + throw new SemanticException("Function with DISTINCT cannot work with partition ORDER BY or windowing clause."); + } } } @@ -274,8 +285,8 @@ private void validateWindowFrame(WindowSpec wdwSpec) throws SemanticException { * @param wdwSpec * @throws SemanticException */ - private void setAndValidateOrderSpec(WindowSpec wdwSpec) throws SemanticException { - wdwSpec.ensureOrderSpec(); + private void setAndValidateOrderSpec(WindowFunctionSpec wFn, WindowSpec wdwSpec) throws SemanticException { + wdwSpec.ensureOrderSpec(wFn); WindowFrameSpec wFrame = wdwSpec.getWindowFrame(); OrderSpec order = wdwSpec.getOrder(); @@ -479,10 +490,13 @@ public void setOrder(OrderSpec orderSpec) { * Partition expressions when the OrderSpec is null; but for now we are setting up * an OrderSpec that copies the Partition expressions. */ - protected void ensureOrderSpec() { + protected void ensureOrderSpec(WindowFunctionSpec wFn) throws SemanticException { if ( getOrder() == null ) { OrderSpec order = new OrderSpec(); order.prefixBy(getPartition()); + if (wFn.isDistinct) { + order.addExpressions(wFn.getArgs()); + } setOrder(order); } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java index eaf112e..ffffbba 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFCount.java @@ -17,6 +17,7 @@ */ package org.apache.hadoop.hive.ql.udf.generic; +import org.apache.commons.lang.ArrayUtils; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.metadata.HiveException; @@ -27,6 +28,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.io.LongWritable; +import org.apache.hadoop.io.Text; /** * This class implements the COUNT aggregation function as in SQL. @@ -67,8 +69,11 @@ public GenericUDAFEvaluator getEvaluator(GenericUDAFParameterInfo paramInfo) assert !paramInfo.isAllColumns() : "* not supported in expression list"; } - return new GenericUDAFCountEvaluator().setCountAllColumns( - paramInfo.isAllColumns()); + GenericUDAFCountEvaluator countEvaluator = new GenericUDAFCountEvaluator(); + countEvaluator.setCountAllColumns(paramInfo.isAllColumns()); + countEvaluator.setCountDistinct(paramInfo.isDistinct()); + + return countEvaluator; } /** @@ -77,6 +82,7 @@ public GenericUDAFEvaluator getEvaluator(GenericUDAFParameterInfo paramInfo) */ public static class GenericUDAFCountEvaluator extends GenericUDAFEvaluator { private boolean countAllColumns = false; + private boolean countDistinct = false; private LongObjectInspector partialCountAggOI; private LongWritable result; @@ -91,14 +97,18 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) return PrimitiveObjectInspectorFactory.writableLongObjectInspector; } - private GenericUDAFCountEvaluator setCountAllColumns(boolean countAllCols) { + private void setCountAllColumns(boolean countAllCols) { countAllColumns = countAllCols; - return this; + } + + private void setCountDistinct(boolean countDistinct) { + this.countDistinct = countDistinct; } /** class for storing count value. */ @AggregationType(estimable = true) static class CountAgg extends AbstractAggregationBuffer { + Object[] prevColumns = null; // Column values from previous row. Used to compare with current row for the case of COUNT(DISTINCT manager). long value; @Override public int estimate() { return JavaDataModel.PRIMITIVES2; } @@ -113,6 +123,7 @@ public AggregationBuffer getNewAggregationBuffer() throws HiveException { @Override public void reset(AggregationBuffer agg) throws HiveException { + ((CountAgg) agg).prevColumns = null; ((CountAgg) agg).value = 0; } @@ -134,6 +145,31 @@ public void iterate(AggregationBuffer agg, Object[] parameters) break; } } + + // Skip the counting if the values are the same for COUNT(DISTINCT) case + if (countThisRow && countDistinct) { + Object[] prevColumns = ((CountAgg) agg).prevColumns; + if (prevColumns == null) { + ((CountAgg) agg).prevColumns = new Object[parameters.length]; + } else { + if (ArrayUtils.isEquals(prevColumns, parameters)) { + countThisRow = false; + } + } + // Note: we need to keep a copy of values from previous row. Objects + // of some types are reused during the read so make sure to make a copy + // if founded. + if (countThisRow) { + for (int i = 0; i < parameters.length; i++) { + if (parameters[i] instanceof Text) { + ((CountAgg) agg).prevColumns[i] = new Text(((Text)parameters[i]).copyBytes()); + } else { + ((CountAgg) agg).prevColumns[i] = parameters[i]; + } + } + } + } + if (countThisRow) { ((CountAgg) agg).value++; } diff --git a/ql/src/test/queries/clientpositive/windowing_distinct.q b/ql/src/test/queries/clientpositive/windowing_distinct.q new file mode 100644 index 0000000..19e9c44 --- /dev/null +++ b/ql/src/test/queries/clientpositive/windowing_distinct.q @@ -0,0 +1,28 @@ +drop table windowing_distinct; + +create table windowing_distinct( + index int, + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal, + bin binary) + row format delimited + fields terminated by '|'; + +load data local inpath '../../data/files/windowing_distinct.txt' into table windowing_distinct; + + +SELECT COUNT(DISTINCT t) OVER (PARTITION BY index), + COUNT(DISTINCT s) OVER (PARTITION BY index), + COUNT(DISTINCT concat('Mr.', s)) OVER (PARTITION BY index), + COUNT(DISTINCT ts) OVER (PARTITION BY index), + COUNT(DISTINCT dec) OVER (PARTITION BY index), + COUNT(DISTINCT bin) OVER (PARTITION BY index) +FROM windowing_distinct; diff --git a/ql/src/test/results/clientpositive/windowing_distinct.q.out b/ql/src/test/results/clientpositive/windowing_distinct.q.out new file mode 100644 index 0000000..f5335f7 --- /dev/null +++ b/ql/src/test/results/clientpositive/windowing_distinct.q.out @@ -0,0 +1,74 @@ +PREHOOK: query: drop table windowing_distinct +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table windowing_distinct +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table windowing_distinct( + index int, + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal, + bin binary) + row format delimited + fields terminated by '|' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@windowing_distinct +POSTHOOK: query: create table windowing_distinct( + index int, + t tinyint, + si smallint, + i int, + b bigint, + f float, + d double, + bo boolean, + s string, + ts timestamp, + dec decimal, + bin binary) + row format delimited + fields terminated by '|' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@windowing_distinct +PREHOOK: query: load data local inpath '../../data/files/windowing_distinct.txt' into table windowing_distinct +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@windowing_distinct +POSTHOOK: query: load data local inpath '../../data/files/windowing_distinct.txt' into table windowing_distinct +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@windowing_distinct +PREHOOK: query: SELECT COUNT(DISTINCT t) OVER (PARTITION BY index), + COUNT(DISTINCT s) OVER (PARTITION BY index), + COUNT(DISTINCT concat('Mr.', s)) OVER (PARTITION BY index), + COUNT(DISTINCT ts) OVER (PARTITION BY index), + COUNT(DISTINCT dec) OVER (PARTITION BY index), + COUNT(DISTINCT bin) OVER (PARTITION BY index) +FROM windowing_distinct +PREHOOK: type: QUERY +PREHOOK: Input: default@windowing_distinct +#### A masked pattern was here #### +POSTHOOK: query: SELECT COUNT(DISTINCT t) OVER (PARTITION BY index), + COUNT(DISTINCT s) OVER (PARTITION BY index), + COUNT(DISTINCT concat('Mr.', s)) OVER (PARTITION BY index), + COUNT(DISTINCT ts) OVER (PARTITION BY index), + COUNT(DISTINCT dec) OVER (PARTITION BY index), + COUNT(DISTINCT bin) OVER (PARTITION BY index) +FROM windowing_distinct +POSTHOOK: type: QUERY +POSTHOOK: Input: default@windowing_distinct +#### A masked pattern was here #### +2 2 2 2 2 1 +2 2 2 2 2 1 +2 2 2 2 2 1 +2 2 2 2 2 2 +2 2 2 2 2 2 +2 2 2 2 2 2