diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 3a045b7..7c32441 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1712,6 +1712,9 @@ HIVE_VECTORIZATION_REDUCE_ENABLED("hive.vectorized.execution.reduce.enabled", true, "This flag should be set to true to enable vectorized mode of the reduce-side of query execution.\n" + "The default value is true."), + HIVE_VECTORIZATION_REDUCE_GROUPBY_ENABLED("hive.vectorized.execution.reduce.groupby.enabled", true, + "This flag should be set to true to enable vectorized mode of the reduce-side GROUP BY query execution.\n" + + "The default value is true."), HIVE_VECTORIZATION_GROUPBY_CHECKINTERVAL("hive.vectorized.groupby.checkinterval", 100000, "Number of entries added to the group by aggregation hash before a recomputation of average entry size is performed."), HIVE_VECTORIZATION_GROUPBY_MAXENTRIES("hive.vectorized.groupby.maxentries", 1000000, diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java index ec1b0ed..bb18b32 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java @@ -67,6 +67,7 @@ DATE (0x040), TIMESTAMP (0x080), DATETIME_FAMILY (DATE.value | TIMESTAMP.value), + INT_TIMESTAMP_FAMILY (INT_FAMILY.value | TIMESTAMP.value), INT_DATETIME_FAMILY (INT_FAMILY.value | DATETIME_FAMILY.value), STRING_DATETIME_FAMILY (STRING_FAMILY.value | DATETIME_FAMILY.value), ALL_FAMILY (0xFFF); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index de33830..f3f0fe6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -1889,47 +1889,47 @@ static String getUndecoratedName(String hiveTypeName) { // TODO: And, investigate if different reduce-side versions are needed for var* and std*, or if map-side aggregate can be used.. Right now they are conservatively // marked map-side (HASH). static ArrayList aggregatesDefinition = new ArrayList() {{ - add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, null, VectorUDAFMinLong.class)); - add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, null, VectorUDAFMinDouble.class)); - add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, null, VectorUDAFMinString.class)); - add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.DECIMAL, null, VectorUDAFMinDecimal.class)); - add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, null, VectorUDAFMaxLong.class)); - add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, null, VectorUDAFMaxDouble.class)); - add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, null, VectorUDAFMaxString.class)); - add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.DECIMAL, null, VectorUDAFMaxDecimal.class)); - add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.NONE, GroupByDesc.Mode.HASH, VectorUDAFCountStar.class)); - add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); - add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.MERGEPARTIAL, VectorUDAFCountMerge.class)); - add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); - add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); - add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); - add(new AggregateDefinition("sum", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, null, VectorUDAFSumLong.class)); - add(new AggregateDefinition("sum", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, null, VectorUDAFSumDouble.class)); - add(new AggregateDefinition("sum", VectorExpressionDescriptor.ArgumentType.DECIMAL, null, VectorUDAFSumDecimal.class)); - add(new AggregateDefinition("avg", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFAvgLong.class)); - add(new AggregateDefinition("avg", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFAvgDouble.class)); - add(new AggregateDefinition("avg", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFAvgDecimal.class)); - add(new AggregateDefinition("variance", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopLong.class)); - add(new AggregateDefinition("var_pop", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopLong.class)); - add(new AggregateDefinition("variance", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopDouble.class)); - add(new AggregateDefinition("var_pop", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopDouble.class)); - add(new AggregateDefinition("variance", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFVarPopDecimal.class)); - add(new AggregateDefinition("var_pop", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFVarPopDecimal.class)); - add(new AggregateDefinition("var_samp", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarSampLong.class)); - add(new AggregateDefinition("var_samp" , VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarSampDouble.class)); - add(new AggregateDefinition("var_samp" , VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFVarSampDecimal.class)); - add(new AggregateDefinition("std", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopLong.class)); - add(new AggregateDefinition("stddev", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopLong.class)); - add(new AggregateDefinition("stddev_pop", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopLong.class)); - add(new AggregateDefinition("std", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopDouble.class)); - add(new AggregateDefinition("stddev", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopDouble.class)); - add(new AggregateDefinition("stddev_pop", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopDouble.class)); - add(new AggregateDefinition("std", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdPopDecimal.class)); - add(new AggregateDefinition("stddev", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdPopDecimal.class)); - add(new AggregateDefinition("stddev_pop", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdPopDecimal.class)); - add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdSampLong.class)); - add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdSampDouble.class)); - add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdSampDecimal.class)); + add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.INT_DATETIME_FAMILY, null, VectorUDAFMinLong.class)); + add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, null, VectorUDAFMinDouble.class)); + add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, null, VectorUDAFMinString.class)); + add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.DECIMAL, null, VectorUDAFMinDecimal.class)); + add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.INT_DATETIME_FAMILY, null, VectorUDAFMaxLong.class)); + add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, null, VectorUDAFMaxDouble.class)); + add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, null, VectorUDAFMaxString.class)); + add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.DECIMAL, null, VectorUDAFMaxDecimal.class)); + add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.NONE, GroupByDesc.Mode.HASH, VectorUDAFCountStar.class)); + add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.INT_DATETIME_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); + add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.MERGEPARTIAL, VectorUDAFCountMerge.class)); + add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); + add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); + add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); + add(new AggregateDefinition("sum", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, null, VectorUDAFSumLong.class)); + add(new AggregateDefinition("sum", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, null, VectorUDAFSumDouble.class)); + add(new AggregateDefinition("sum", VectorExpressionDescriptor.ArgumentType.DECIMAL, null, VectorUDAFSumDecimal.class)); + add(new AggregateDefinition("avg", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFAvgLong.class)); + add(new AggregateDefinition("avg", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFAvgDouble.class)); + add(new AggregateDefinition("avg", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFAvgDecimal.class)); + add(new AggregateDefinition("variance", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopLong.class)); + add(new AggregateDefinition("var_pop", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopLong.class)); + add(new AggregateDefinition("variance", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopDouble.class)); + add(new AggregateDefinition("var_pop", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopDouble.class)); + add(new AggregateDefinition("variance", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFVarPopDecimal.class)); + add(new AggregateDefinition("var_pop", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFVarPopDecimal.class)); + add(new AggregateDefinition("var_samp", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarSampLong.class)); + add(new AggregateDefinition("var_samp" , VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarSampDouble.class)); + add(new AggregateDefinition("var_samp" , VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFVarSampDecimal.class)); + add(new AggregateDefinition("std", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopLong.class)); + add(new AggregateDefinition("stddev", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopLong.class)); + add(new AggregateDefinition("stddev_pop", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopLong.class)); + add(new AggregateDefinition("std", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopDouble.class)); + add(new AggregateDefinition("stddev", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopDouble.class)); + add(new AggregateDefinition("stddev_pop", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopDouble.class)); + add(new AggregateDefinition("std", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdPopDecimal.class)); + add(new AggregateDefinition("stddev", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdPopDecimal.class)); + add(new AggregateDefinition("stddev_pop", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdPopDecimal.class)); + add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdSampLong.class)); + add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdSampDouble.class)); + add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdSampDecimal.class)); }}; public VectorAggregateExpression getAggregatorExpression(AggregationDesc desc, boolean isReduce) diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index e682dba..ee31e48 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -791,6 +791,7 @@ public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException { boolean validateMapWorkOperator(Operator op, boolean isTez) { boolean ret = false; + LOG.info("Validating MapWork operator " + op.getType().name()); switch (op.getType()) { case MAPJOIN: if (op instanceof MapJoinOperator) { @@ -827,6 +828,7 @@ boolean validateMapWorkOperator(Operator op, boolean isT boolean validateReduceWorkOperator(Operator op) { boolean ret = false; + LOG.info("Validating ReduceWork operator " + op.getType().name()); switch (op.getType()) { case EXTRACT: ret = validateExtractOperator((ExtractOperator) op); @@ -840,7 +842,12 @@ boolean validateReduceWorkOperator(Operator op) { } break; case GROUPBY: - ret = validateGroupByOperator((GroupByOperator) op, true, true); + if (HiveConf.getBoolVar(physicalContext.getConf(), + HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_GROUPBY_ENABLED)) { + ret = validateGroupByOperator((GroupByOperator) op, true, true); + } else { + ret = false; + } break; case FILTER: ret = validateFilterOperator((FilterOperator) op); @@ -1071,11 +1078,11 @@ boolean validateExprNodeDesc(ExprNodeDesc desc, VectorExpressionDescriptor.Mode VectorizationContext vc = new ValidatorVectorizationContext(); if (vc.getVectorExpression(desc, mode) == null) { // TODO: this cannot happen - VectorizationContext throws in such cases. - LOG.info("getVectorExpression returned null"); + LOG.debug("getVectorExpression returned null"); return false; } } catch (Exception e) { - LOG.info("Failed to vectorize", e); + LOG.debug("Failed to vectorize", e); return false; } return true; @@ -1098,19 +1105,19 @@ private boolean validateAggregationDesc(AggregationDesc aggDesc, boolean isReduc if (!supportedAggregationUdfs.contains(aggDesc.getGenericUDAFName().toLowerCase())) { return false; } - if (aggDesc.getParameters() != null) { - return validateExprNodeDesc(aggDesc.getParameters()); + if (aggDesc.getParameters() != null && !validateExprNodeDesc(aggDesc.getParameters())) { + return false; } // See if we can vectorize the aggregation. try { VectorizationContext vc = new ValidatorVectorizationContext(); if (vc.getAggregatorExpression(aggDesc, isReduce) == null) { // TODO: this cannot happen - VectorizationContext throws in such cases. - LOG.info("getAggregatorExpression returned null"); + LOG.debug("getAggregatorExpression returned null"); return false; } } catch (Exception e) { - LOG.info("Failed to vectorize", e); + LOG.debug("Failed to vectorize", e); return false; } return true; diff --git ql/src/test/queries/clientpositive/vectorized_date_funcs.q ql/src/test/queries/clientpositive/vectorized_date_funcs.q index 6392fc9..99ed172 100644 --- ql/src/test/queries/clientpositive/vectorized_date_funcs.q +++ ql/src/test/queries/clientpositive/vectorized_date_funcs.q @@ -122,4 +122,18 @@ SELECT FROM date_udf_flight_orc LIMIT 10; -- Test extracting the date part of expression that includes time -SELECT to_date('2009-07-30 04:17:52') FROM date_udf_flight_orc LIMIT 1; \ No newline at end of file +SELECT to_date('2009-07-30 04:17:52') FROM date_udf_flight_orc LIMIT 1; + +EXPLAIN SELECT + min(fl_date) AS c1, + max(fl_date), + count(fl_date) +FROM date_udf_flight_orc +ORDER BY c1; + +SELECT + min(fl_date) AS c1, + max(fl_date), + count(fl_date) +FROM date_udf_flight_orc +ORDER BY c1; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q index 95eedd3..98bd726 100644 --- ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q +++ ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q @@ -1,6 +1,7 @@ -SET hive.vectorized.execution.enabled = true; - -- Test timestamp functions in vectorized mode to verify they run correctly end-to-end. +-- Turning on vectorization has been temporarily moved after filling the test table +-- due to bug HIVE-8197. + CREATE TABLE alltypesorc_string(ctimestamp1 timestamp, stimestamp1 string) STORED AS ORC; @@ -11,6 +12,8 @@ SELECT FROM alltypesorc LIMIT 40; +SET hive.vectorized.execution.enabled = true; + CREATE TABLE alltypesorc_wrong(stimestamp1 string) STORED AS ORC; INSERT INTO TABLE alltypesorc_wrong SELECT 'abcd' FROM alltypesorc LIMIT 1; @@ -122,3 +125,17 @@ SELECT second(stimestamp1) FROM alltypesorc_wrong ORDER BY c1; + +EXPLAIN SELECT + min(ctimestamp1) AS c1, + max(ctimestamp1), + count(ctimestamp1) +FROM alltypesorc_string +ORDER BY c1; + +SELECT + min(ctimestamp1) AS c1, + max(ctimestamp1), + count(ctimestamp1) +FROM alltypesorc_string +ORDER BY c1; \ No newline at end of file diff --git ql/src/test/results/clientpositive/tez/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/tez/vectorized_timestamp_funcs.q.out index b8e46e9..971f114 100644 --- ql/src/test/results/clientpositive/tez/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/tez/vectorized_timestamp_funcs.q.out @@ -1,10 +1,16 @@ PREHOOK: query: -- Test timestamp functions in vectorized mode to verify they run correctly end-to-end. +-- Turning on vectorization has been temporarily moved after filling the test table +-- due to bug HIVE-8197. + CREATE TABLE alltypesorc_string(ctimestamp1 timestamp, stimestamp1 string) STORED AS ORC PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@alltypesorc_string POSTHOOK: query: -- Test timestamp functions in vectorized mode to verify they run correctly end-to-end. +-- Turning on vectorization has been temporarily moved after filling the test table +-- due to bug HIVE-8197. + CREATE TABLE alltypesorc_string(ctimestamp1 timestamp, stimestamp1 string) STORED AS ORC POSTHOOK: type: CREATETABLE @@ -169,45 +175,45 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### NULL NULL NULL NULL NULL NULL NULL NULL NULL +28784 1969 12 31 31 1 23 59 44 +28784 1969 12 31 31 1 23 59 44 +28784 1969 12 31 31 1 23 59 44 +28784 1969 12 31 31 1 23 59 44 +28785 1969 12 31 31 1 23 59 45 28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 +28787 1969 12 31 31 1 23 59 47 +28788 1969 12 31 31 1 23 59 48 +28789 1969 12 31 31 1 23 59 49 +28789 1969 12 31 31 1 23 59 49 +28790 1969 12 31 31 1 23 59 50 +28792 1969 12 31 31 1 23 59 52 +28792 1969 12 31 31 1 23 59 52 +28792 1969 12 31 31 1 23 59 52 +28792 1969 12 31 31 1 23 59 52 +28795 1969 12 31 31 1 23 59 55 +28795 1969 12 31 31 1 23 59 55 +28795 1969 12 31 31 1 23 59 55 +28798 1969 12 31 31 1 23 59 58 +28798 1969 12 31 31 1 23 59 58 +28800 1970 1 1 1 1 0 0 0 +28800 1970 1 1 1 1 0 0 0 +28802 1970 1 1 1 1 0 0 2 +28803 1970 1 1 1 1 0 0 3 +28804 1970 1 1 1 1 0 0 4 +28804 1970 1 1 1 1 0 0 4 +28805 1970 1 1 1 1 0 0 5 +28805 1970 1 1 1 1 0 0 5 +28806 1970 1 1 1 1 0 0 6 +28807 1970 1 1 1 1 0 0 7 +28807 1970 1 1 1 1 0 0 7 +28807 1970 1 1 1 1 0 0 7 +28808 1970 1 1 1 1 0 0 8 +28808 1970 1 1 1 1 0 0 8 +28809 1970 1 1 1 1 0 0 9 +28811 1970 1 1 1 1 0 0 11 +28813 1970 1 1 1 1 0 0 13 +28814 1970 1 1 1 1 0 0 14 +28815 1970 1 1 1 1 0 0 15 PREHOOK: query: EXPLAIN SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), @@ -457,44 +463,44 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### NULL NULL NULL NULL NULL NULL NULL NULL NULL -false false false false false true false false false -false true true true true true true true false -false true true true true true true true false -false true true true true true true true false -false false false false false true false false false -false false false false false true false false false -false false false false false true false false false -false true true true true true true true false -false true true true true true true true false -false true true true true true true true false -false false false false false true false false false -false false false false false true false false false -false false false false false true false false false -false true true true true true true true false -false false false false false true false false false -false true true true true true true true false -false false false false false true false false false -false false false false false true false false false -false true true true true true true true false -false true true true true true true true false -false false false false false true false false false -false false false false false true false false false -false false false false false true false false false -false false false false false true false false false -false true true true true true true true false -false true true true true true true true false -false true true true true true true true false -false false false false false true false false false -false true true true true true true true false -false false false false false true false false false -false true true true true true true true false -false false false false false true false false false -false true true true true true true true false -false true true true true true true true false -false true true true true true true true false -false false false false false true false false false -false false false false false true false false false -false true true true true true true true false +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true true true true true true true true true true PREHOOK: query: -- Wrong format. Should all be NULL. EXPLAIN SELECT @@ -604,3 +610,105 @@ POSTHOOK: Input: default@alltypesorc_wrong NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +PREHOOK: query: EXPLAIN SELECT + min(ctimestamp1) AS c1, + max(ctimestamp1), + count(ctimestamp1) +FROM alltypesorc_string +ORDER BY c1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT + min(ctimestamp1) AS c1, + max(ctimestamp1), + count(ctimestamp1) +FROM alltypesorc_string +ORDER BY c1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc_string + Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctimestamp1 (type: timestamp) + outputColumnNames: ctimestamp1 + Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(ctimestamp1), max(ctimestamp1), count(ctimestamp1) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: bigint) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp), _col2 (type: bigint) + Execution mode: vectorized + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: timestamp), VALUE._col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + min(ctimestamp1) AS c1, + max(ctimestamp1), + count(ctimestamp1) +FROM alltypesorc_string +ORDER BY c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc_string +#### A masked pattern was here #### +POSTHOOK: query: SELECT + min(ctimestamp1) AS c1, + max(ctimestamp1), + count(ctimestamp1) +FROM alltypesorc_string +ORDER BY c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc_string +#### A masked pattern was here #### +1969-12-31 23:59:44.088 1970-01-01 00:00:15.007 39 diff --git ql/src/test/results/clientpositive/vectorized_date_funcs.q.out ql/src/test/results/clientpositive/vectorized_date_funcs.q.out index ef30a0c..b690668 100644 --- ql/src/test/results/clientpositive/vectorized_date_funcs.q.out +++ ql/src/test/results/clientpositive/vectorized_date_funcs.q.out @@ -972,3 +972,107 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@date_udf_flight_orc #### A masked pattern was here #### 2009-07-30 +PREHOOK: query: EXPLAIN SELECT + min(fl_date) AS c1, + max(fl_date), + count(fl_date) +FROM date_udf_flight_orc +ORDER BY c1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT + min(fl_date) AS c1, + max(fl_date), + count(fl_date) +FROM date_udf_flight_orc +ORDER BY c1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: date_udf_flight_orc + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: fl_date (type: date) + outputColumnNames: fl_date + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(fl_date), max(fl_date), count(fl_date) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: date), _col1 (type: date), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: date) + sort order: + + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 120 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + min(fl_date) AS c1, + max(fl_date), + count(fl_date) +FROM date_udf_flight_orc +ORDER BY c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_udf_flight_orc +#### A masked pattern was here #### +POSTHOOK: query: SELECT + min(fl_date) AS c1, + max(fl_date), + count(fl_date) +FROM date_udf_flight_orc +ORDER BY c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_udf_flight_orc +#### A masked pattern was here #### +2010-10-20 2010-10-31 137 diff --git ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out index 89ea70d..ce60c8d 100644 --- ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out @@ -1,10 +1,16 @@ PREHOOK: query: -- Test timestamp functions in vectorized mode to verify they run correctly end-to-end. +-- Turning on vectorization has been temporarily moved after filling the test table +-- due to bug HIVE-8197. + CREATE TABLE alltypesorc_string(ctimestamp1 timestamp, stimestamp1 string) STORED AS ORC PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@alltypesorc_string POSTHOOK: query: -- Test timestamp functions in vectorized mode to verify they run correctly end-to-end. +-- Turning on vectorization has been temporarily moved after filling the test table +-- due to bug HIVE-8197. + CREATE TABLE alltypesorc_string(ctimestamp1 timestamp, stimestamp1 string) STORED AS ORC POSTHOOK: type: CREATETABLE @@ -576,3 +582,107 @@ POSTHOOK: Input: default@alltypesorc_wrong NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +PREHOOK: query: EXPLAIN SELECT + min(ctimestamp1) AS c1, + max(ctimestamp1), + count(ctimestamp1) +FROM alltypesorc_string +ORDER BY c1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT + min(ctimestamp1) AS c1, + max(ctimestamp1), + count(ctimestamp1) +FROM alltypesorc_string +ORDER BY c1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc_string + Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctimestamp1 (type: timestamp) + outputColumnNames: ctimestamp1 + Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(ctimestamp1), max(ctimestamp1), count(ctimestamp1) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: timestamp), _col2 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: timestamp), VALUE._col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 88 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + min(ctimestamp1) AS c1, + max(ctimestamp1), + count(ctimestamp1) +FROM alltypesorc_string +ORDER BY c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc_string +#### A masked pattern was here #### +POSTHOOK: query: SELECT + min(ctimestamp1) AS c1, + max(ctimestamp1), + count(ctimestamp1) +FROM alltypesorc_string +ORDER BY c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc_string +#### A masked pattern was here #### +1969-12-31 23:59:44.088 1970-01-01 00:00:15.007 39