diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java index ec1b0ed..bb18b32 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java @@ -67,6 +67,7 @@ DATE (0x040), TIMESTAMP (0x080), DATETIME_FAMILY (DATE.value | TIMESTAMP.value), + INT_TIMESTAMP_FAMILY (INT_FAMILY.value | TIMESTAMP.value), INT_DATETIME_FAMILY (INT_FAMILY.value | DATETIME_FAMILY.value), STRING_DATETIME_FAMILY (STRING_FAMILY.value | DATETIME_FAMILY.value), ALL_FAMILY (0xFFF); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index acee377..4c3f29b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -1888,47 +1888,47 @@ static String getUndecoratedName(String hiveTypeName) { // TODO: And, investigate if different reduce-side versions are needed for var* and std*, or if map-side aggregate can be used.. Right now they are conservatively // marked map-side (HASH). static ArrayList aggregatesDefinition = new ArrayList() {{ - add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, null, VectorUDAFMinLong.class)); - add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, null, VectorUDAFMinDouble.class)); - add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, null, VectorUDAFMinString.class)); - add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.DECIMAL, null, VectorUDAFMinDecimal.class)); - add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, null, VectorUDAFMaxLong.class)); - add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, null, VectorUDAFMaxDouble.class)); - add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, null, VectorUDAFMaxString.class)); - add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.DECIMAL, null, VectorUDAFMaxDecimal.class)); - add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.NONE, GroupByDesc.Mode.HASH, VectorUDAFCountStar.class)); - add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); - add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.MERGEPARTIAL, VectorUDAFSumLong.class)); - add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); - add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); - add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); - add(new AggregateDefinition("sum", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, null, VectorUDAFSumLong.class)); - add(new AggregateDefinition("sum", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, null, VectorUDAFSumDouble.class)); - add(new AggregateDefinition("sum", VectorExpressionDescriptor.ArgumentType.DECIMAL, null, VectorUDAFSumDecimal.class)); - add(new AggregateDefinition("avg", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFAvgLong.class)); - add(new AggregateDefinition("avg", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFAvgDouble.class)); - add(new AggregateDefinition("avg", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFAvgDecimal.class)); - add(new AggregateDefinition("variance", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopLong.class)); - add(new AggregateDefinition("var_pop", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopLong.class)); - add(new AggregateDefinition("variance", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopDouble.class)); - add(new AggregateDefinition("var_pop", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopDouble.class)); - add(new AggregateDefinition("variance", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFVarPopDecimal.class)); - add(new AggregateDefinition("var_pop", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFVarPopDecimal.class)); - add(new AggregateDefinition("var_samp", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarSampLong.class)); - add(new AggregateDefinition("var_samp" , VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarSampDouble.class)); - add(new AggregateDefinition("var_samp" , VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFVarSampDecimal.class)); - add(new AggregateDefinition("std", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopLong.class)); - add(new AggregateDefinition("stddev", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopLong.class)); - add(new AggregateDefinition("stddev_pop", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopLong.class)); - add(new AggregateDefinition("std", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopDouble.class)); - add(new AggregateDefinition("stddev", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopDouble.class)); - add(new AggregateDefinition("stddev_pop", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopDouble.class)); - add(new AggregateDefinition("std", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdPopDecimal.class)); - add(new AggregateDefinition("stddev", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdPopDecimal.class)); - add(new AggregateDefinition("stddev_pop", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdPopDecimal.class)); - add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdSampLong.class)); - add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdSampDouble.class)); - add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdSampDecimal.class)); + add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.INT_DATETIME_FAMILY, null, VectorUDAFMinLong.class)); + add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, null, VectorUDAFMinDouble.class)); + add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, null, VectorUDAFMinString.class)); + add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.DECIMAL, null, VectorUDAFMinDecimal.class)); + add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.INT_DATETIME_FAMILY, null, VectorUDAFMaxLong.class)); + add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, null, VectorUDAFMaxDouble.class)); + add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, null, VectorUDAFMaxString.class)); + add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.DECIMAL, null, VectorUDAFMaxDecimal.class)); + add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.NONE, GroupByDesc.Mode.HASH, VectorUDAFCountStar.class)); + add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.INT_DATETIME_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); + add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.MERGEPARTIAL, VectorUDAFSumLong.class)); + add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); + add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); + add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); + add(new AggregateDefinition("sum", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, null, VectorUDAFSumLong.class)); + add(new AggregateDefinition("sum", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, null, VectorUDAFSumDouble.class)); + add(new AggregateDefinition("sum", VectorExpressionDescriptor.ArgumentType.DECIMAL, null, VectorUDAFSumDecimal.class)); + add(new AggregateDefinition("avg", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFAvgLong.class)); + add(new AggregateDefinition("avg", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFAvgDouble.class)); + add(new AggregateDefinition("avg", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFAvgDecimal.class)); + add(new AggregateDefinition("variance", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopLong.class)); + add(new AggregateDefinition("var_pop", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopLong.class)); + add(new AggregateDefinition("variance", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopDouble.class)); + add(new AggregateDefinition("var_pop", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopDouble.class)); + add(new AggregateDefinition("variance", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFVarPopDecimal.class)); + add(new AggregateDefinition("var_pop", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFVarPopDecimal.class)); + add(new AggregateDefinition("var_samp", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarSampLong.class)); + add(new AggregateDefinition("var_samp" , VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarSampDouble.class)); + add(new AggregateDefinition("var_samp" , VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFVarSampDecimal.class)); + add(new AggregateDefinition("std", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopLong.class)); + add(new AggregateDefinition("stddev", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopLong.class)); + add(new AggregateDefinition("stddev_pop", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopLong.class)); + add(new AggregateDefinition("std", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopDouble.class)); + add(new AggregateDefinition("stddev", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopDouble.class)); + add(new AggregateDefinition("stddev_pop", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopDouble.class)); + add(new AggregateDefinition("std", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdPopDecimal.class)); + add(new AggregateDefinition("stddev", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdPopDecimal.class)); + add(new AggregateDefinition("stddev_pop", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdPopDecimal.class)); + add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdSampLong.class)); + add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdSampDouble.class)); + add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdSampDecimal.class)); }}; public VectorAggregateExpression getAggregatorExpression(AggregationDesc desc, boolean isReduce) diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index e682dba..9e83d96 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -791,6 +791,7 @@ public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException { boolean validateMapWorkOperator(Operator op, boolean isTez) { boolean ret = false; + LOG.info("Validating MapWork operator " + op.getType().name()); switch (op.getType()) { case MAPJOIN: if (op instanceof MapJoinOperator) { @@ -827,6 +828,7 @@ boolean validateMapWorkOperator(Operator op, boolean isT boolean validateReduceWorkOperator(Operator op) { boolean ret = false; + LOG.info("Validating ReduceWork operator " + op.getType().name()); switch (op.getType()) { case EXTRACT: ret = validateExtractOperator((ExtractOperator) op); @@ -1071,11 +1073,11 @@ boolean validateExprNodeDesc(ExprNodeDesc desc, VectorExpressionDescriptor.Mode VectorizationContext vc = new ValidatorVectorizationContext(); if (vc.getVectorExpression(desc, mode) == null) { // TODO: this cannot happen - VectorizationContext throws in such cases. - LOG.info("getVectorExpression returned null"); + LOG.debug("getVectorExpression returned null"); return false; } } catch (Exception e) { - LOG.info("Failed to vectorize", e); + LOG.debug("Failed to vectorize", e); return false; } return true; @@ -1098,19 +1100,19 @@ private boolean validateAggregationDesc(AggregationDesc aggDesc, boolean isReduc if (!supportedAggregationUdfs.contains(aggDesc.getGenericUDAFName().toLowerCase())) { return false; } - if (aggDesc.getParameters() != null) { - return validateExprNodeDesc(aggDesc.getParameters()); + if (aggDesc.getParameters() != null && !validateExprNodeDesc(aggDesc.getParameters())) { + return false; } // See if we can vectorize the aggregation. try { VectorizationContext vc = new ValidatorVectorizationContext(); if (vc.getAggregatorExpression(aggDesc, isReduce) == null) { // TODO: this cannot happen - VectorizationContext throws in such cases. - LOG.info("getAggregatorExpression returned null"); + LOG.debug("getAggregatorExpression returned null"); return false; } } catch (Exception e) { - LOG.info("Failed to vectorize", e); + LOG.debug("Failed to vectorize", e); return false; } return true; diff --git ql/src/test/queries/clientpositive/vectorized_date_funcs.q ql/src/test/queries/clientpositive/vectorized_date_funcs.q index 6392fc9..99ed172 100644 --- ql/src/test/queries/clientpositive/vectorized_date_funcs.q +++ ql/src/test/queries/clientpositive/vectorized_date_funcs.q @@ -122,4 +122,18 @@ SELECT FROM date_udf_flight_orc LIMIT 10; -- Test extracting the date part of expression that includes time -SELECT to_date('2009-07-30 04:17:52') FROM date_udf_flight_orc LIMIT 1; \ No newline at end of file +SELECT to_date('2009-07-30 04:17:52') FROM date_udf_flight_orc LIMIT 1; + +EXPLAIN SELECT + min(fl_date) AS c1, + max(fl_date), + count(fl_date) +FROM date_udf_flight_orc +ORDER BY c1; + +SELECT + min(fl_date) AS c1, + max(fl_date), + count(fl_date) +FROM date_udf_flight_orc +ORDER BY c1; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q index 95eedd3..1e2f1a2 100644 --- ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q +++ ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q @@ -122,3 +122,44 @@ SELECT second(stimestamp1) FROM alltypesorc_wrong ORDER BY c1; + +EXPLAIN SELECT + min(ctimestamp1) AS c1, + max(ctimestamp1), + count(ctimestamp1) +FROM alltypesorc_string +ORDER BY c1; + +SELECT + min(ctimestamp1) AS c1, + max(ctimestamp1), + count(ctimestamp1) +FROM alltypesorc_string +ORDER BY c1; + +-- Currently, we do not expect these functions to vectorize but we do expect them to succeed-- see HIVE-8063. +EXPLAIN SELECT + sum(ctimestamp1) as c1, + avg(ctimestamp1), + variance(ctimestamp1), + var_pop(ctimestamp1), + var_samp(ctimestamp1), + std(ctimestamp1), + stddev(ctimestamp1), + stddev_pop(ctimestamp1), + stddev_samp(ctimestamp1) +FROM alltypesorc_string +ORDER BY c1; + +SELECT + sum(ctimestamp1) as c1, + avg(ctimestamp1), + variance(ctimestamp1), + var_pop(ctimestamp1), + var_samp(ctimestamp1), + std(ctimestamp1), + stddev(ctimestamp1), + stddev_pop(ctimestamp1), + stddev_samp(ctimestamp1) +FROM alltypesorc_string +ORDER BY c1; \ No newline at end of file diff --git ql/src/test/results/clientpositive/tez/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/tez/vectorized_timestamp_funcs.q.out index b8e46e9..ce92d10 100644 --- ql/src/test/results/clientpositive/tez/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/tez/vectorized_timestamp_funcs.q.out @@ -604,3 +604,231 @@ POSTHOOK: Input: default@alltypesorc_wrong NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +PREHOOK: query: EXPLAIN SELECT + min(ctimestamp1) AS c1, + max(ctimestamp1), + count(ctimestamp1) +FROM alltypesorc_string +ORDER BY c1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT + min(ctimestamp1) AS c1, + max(ctimestamp1), + count(ctimestamp1) +FROM alltypesorc_string +ORDER BY c1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc_string + Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctimestamp1 (type: timestamp) + outputColumnNames: ctimestamp1 + Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(ctimestamp1), max(ctimestamp1), count(ctimestamp1) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: bigint) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp), _col2 (type: bigint) + Execution mode: vectorized + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: timestamp), VALUE._col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + min(ctimestamp1) AS c1, + max(ctimestamp1), + count(ctimestamp1) +FROM alltypesorc_string +ORDER BY c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc_string +#### A masked pattern was here #### +POSTHOOK: query: SELECT + min(ctimestamp1) AS c1, + max(ctimestamp1), + count(ctimestamp1) +FROM alltypesorc_string +ORDER BY c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc_string +#### A masked pattern was here #### +1969-12-31 23:59:46.674 1969-12-31 23:59:46.674 39 +PREHOOK: query: -- Currently, we do not expect these functions to vectorize but we do expect them to succeed-- see HIVE-8063. +EXPLAIN SELECT + sum(ctimestamp1) as c1, + avg(ctimestamp1), + variance(ctimestamp1), + var_pop(ctimestamp1), + var_samp(ctimestamp1), + std(ctimestamp1), + stddev(ctimestamp1), + stddev_pop(ctimestamp1), + stddev_samp(ctimestamp1) +FROM alltypesorc_string +ORDER BY c1 +PREHOOK: type: QUERY +POSTHOOK: query: -- Currently, we do not expect these functions to vectorize but we do expect them to succeed-- see HIVE-8063. +EXPLAIN SELECT + sum(ctimestamp1) as c1, + avg(ctimestamp1), + variance(ctimestamp1), + var_pop(ctimestamp1), + var_samp(ctimestamp1), + std(ctimestamp1), + stddev(ctimestamp1), + stddev_pop(ctimestamp1), + stddev_samp(ctimestamp1) +FROM alltypesorc_string +ORDER BY c1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc_string + Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctimestamp1 (type: timestamp) + outputColumnNames: ctimestamp1 + Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(ctimestamp1), avg(ctimestamp1), variance(ctimestamp1), var_pop(ctimestamp1), var_samp(ctimestamp1), std(ctimestamp1), stddev(ctimestamp1), stddev_pop(ctimestamp1), stddev_samp(ctimestamp1) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), avg(VALUE._col1), variance(VALUE._col2), var_pop(VALUE._col3), var_samp(VALUE._col4), std(VALUE._col5), stddev(VALUE._col6), stddev_pop(VALUE._col7), stddev_samp(VALUE._col8) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + sum(ctimestamp1) as c1, + avg(ctimestamp1), + variance(ctimestamp1), + var_pop(ctimestamp1), + var_samp(ctimestamp1), + std(ctimestamp1), + stddev(ctimestamp1), + stddev_pop(ctimestamp1), + stddev_samp(ctimestamp1) +FROM alltypesorc_string +ORDER BY c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc_string +#### A masked pattern was here #### +POSTHOOK: query: SELECT + sum(ctimestamp1) as c1, + avg(ctimestamp1), + variance(ctimestamp1), + var_pop(ctimestamp1), + var_samp(ctimestamp1), + std(ctimestamp1), + stddev(ctimestamp1), + stddev_pop(ctimestamp1), + stddev_samp(ctimestamp1) +FROM alltypesorc_string +ORDER BY c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc_string +#### A masked pattern was here #### +1122680.2860000003 28786.674000000006 4.7403166072049E-24 4.7403166072049E-24 4.865061781078713E-24 2.177226815746329E-12 2.177226815746329E-12 2.177226815746329E-12 2.2056885049976375E-12 diff --git ql/src/test/results/clientpositive/vectorized_date_funcs.q.out ql/src/test/results/clientpositive/vectorized_date_funcs.q.out index ef30a0c..b690668 100644 --- ql/src/test/results/clientpositive/vectorized_date_funcs.q.out +++ ql/src/test/results/clientpositive/vectorized_date_funcs.q.out @@ -972,3 +972,107 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@date_udf_flight_orc #### A masked pattern was here #### 2009-07-30 +PREHOOK: query: EXPLAIN SELECT + min(fl_date) AS c1, + max(fl_date), + count(fl_date) +FROM date_udf_flight_orc +ORDER BY c1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT + min(fl_date) AS c1, + max(fl_date), + count(fl_date) +FROM date_udf_flight_orc +ORDER BY c1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: date_udf_flight_orc + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: fl_date (type: date) + outputColumnNames: fl_date + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(fl_date), max(fl_date), count(fl_date) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: date), _col1 (type: date), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: date) + sort order: + + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + min(fl_date) AS c1, + max(fl_date), + count(fl_date) +FROM date_udf_flight_orc +ORDER BY c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_udf_flight_orc +#### A masked pattern was here #### +POSTHOOK: query: SELECT + min(fl_date) AS c1, + max(fl_date), + count(fl_date) +FROM date_udf_flight_orc +ORDER BY c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_udf_flight_orc +#### A masked pattern was here #### +2010-10-20 2010-10-31 137 diff --git ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out index 89ea70d..4a1847e 100644 --- ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out @@ -576,3 +576,236 @@ POSTHOOK: Input: default@alltypesorc_wrong NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +PREHOOK: query: EXPLAIN SELECT + min(ctimestamp1) AS c1, + max(ctimestamp1), + count(ctimestamp1) +FROM alltypesorc_string +ORDER BY c1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT + min(ctimestamp1) AS c1, + max(ctimestamp1), + count(ctimestamp1) +FROM alltypesorc_string +ORDER BY c1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc_string + Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctimestamp1 (type: timestamp) + outputColumnNames: ctimestamp1 + Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(ctimestamp1), max(ctimestamp1), count(ctimestamp1) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp), _col2 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: timestamp), VALUE._col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + min(ctimestamp1) AS c1, + max(ctimestamp1), + count(ctimestamp1) +FROM alltypesorc_string +ORDER BY c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc_string +#### A masked pattern was here #### +POSTHOOK: query: SELECT + min(ctimestamp1) AS c1, + max(ctimestamp1), + count(ctimestamp1) +FROM alltypesorc_string +ORDER BY c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc_string +#### A masked pattern was here #### +1969-12-31 23:59:44.088 1970-01-01 00:00:15.007 39 +PREHOOK: query: -- Currently, we do not expect these functions to vectorize but we do expect them to succeed-- see HIVE-8063. +EXPLAIN SELECT + sum(ctimestamp1) as c1, + avg(ctimestamp1), + variance(ctimestamp1), + var_pop(ctimestamp1), + var_samp(ctimestamp1), + std(ctimestamp1), + stddev(ctimestamp1), + stddev_pop(ctimestamp1), + stddev_samp(ctimestamp1) +FROM alltypesorc_string +ORDER BY c1 +PREHOOK: type: QUERY +POSTHOOK: query: -- Currently, we do not expect these functions to vectorize but we do expect them to succeed-- see HIVE-8063. +EXPLAIN SELECT + sum(ctimestamp1) as c1, + avg(ctimestamp1), + variance(ctimestamp1), + var_pop(ctimestamp1), + var_samp(ctimestamp1), + std(ctimestamp1), + stddev(ctimestamp1), + stddev_pop(ctimestamp1), + stddev_samp(ctimestamp1) +FROM alltypesorc_string +ORDER BY c1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc_string + Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctimestamp1 (type: timestamp) + outputColumnNames: ctimestamp1 + Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(ctimestamp1), avg(ctimestamp1), variance(ctimestamp1), var_pop(ctimestamp1), var_samp(ctimestamp1), std(ctimestamp1), stddev(ctimestamp1), stddev_pop(ctimestamp1), stddev_samp(ctimestamp1) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), avg(VALUE._col1), variance(VALUE._col2), var_pop(VALUE._col3), var_samp(VALUE._col4), std(VALUE._col5), stddev(VALUE._col6), stddev_pop(VALUE._col7), stddev_samp(VALUE._col8) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: double) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 72 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + sum(ctimestamp1) as c1, + avg(ctimestamp1), + variance(ctimestamp1), + var_pop(ctimestamp1), + var_samp(ctimestamp1), + std(ctimestamp1), + stddev(ctimestamp1), + stddev_pop(ctimestamp1), + stddev_samp(ctimestamp1) +FROM alltypesorc_string +ORDER BY c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc_string +#### A masked pattern was here #### +POSTHOOK: query: SELECT + sum(ctimestamp1) as c1, + avg(ctimestamp1), + variance(ctimestamp1), + var_pop(ctimestamp1), + var_samp(ctimestamp1), + std(ctimestamp1), + stddev(ctimestamp1), + stddev_pop(ctimestamp1), + stddev_samp(ctimestamp1) +FROM alltypesorc_string +ORDER BY c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc_string +#### A masked pattern was here #### +1123143.8569999998 28798.56043589743 89.70772952793612 89.70772952793612 92.0684592523555 9.471416447814768 9.471416447814768 9.471416447814768 9.595231068210682