diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 3a045b7..7c32441 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -1712,6 +1712,9 @@ HIVE_VECTORIZATION_REDUCE_ENABLED("hive.vectorized.execution.reduce.enabled", true, "This flag should be set to true to enable vectorized mode of the reduce-side of query execution.\n" + "The default value is true."), + HIVE_VECTORIZATION_REDUCE_GROUPBY_ENABLED("hive.vectorized.execution.reduce.groupby.enabled", true, + "This flag should be set to true to enable vectorized mode of the reduce-side GROUP BY query execution.\n" + + "The default value is true."), HIVE_VECTORIZATION_GROUPBY_CHECKINTERVAL("hive.vectorized.groupby.checkinterval", 100000, "Number of entries added to the group by aggregation hash before a recomputation of average entry size is performed."), HIVE_VECTORIZATION_GROUPBY_MAXENTRIES("hive.vectorized.groupby.maxentries", 1000000, diff --git itests/src/test/resources/testconfiguration.properties itests/src/test/resources/testconfiguration.properties index 536e418..fb4b7dd 100644 --- itests/src/test/resources/testconfiguration.properties +++ itests/src/test/resources/testconfiguration.properties @@ -158,6 +158,7 @@ minitez.query.files.shared=alter_merge_2_orc.q,\ vector_left_outer_join.q,\ vector_mapjoin_reduce.q,\ vector_string_concat.q,\ + vectorization_0.q,\ vectorization_12.q,\ vectorization_13.q,\ vectorization_14.q,\ diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java index ec1b0ed..bb18b32 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorExpressionDescriptor.java @@ -67,6 +67,7 @@ DATE (0x040), TIMESTAMP (0x080), DATETIME_FAMILY (DATE.value | TIMESTAMP.value), + INT_TIMESTAMP_FAMILY (INT_FAMILY.value | TIMESTAMP.value), INT_DATETIME_FAMILY (INT_FAMILY.value | DATETIME_FAMILY.value), STRING_DATETIME_FAMILY (STRING_FAMILY.value | DATETIME_FAMILY.value), ALL_FAMILY (0xFFF); diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java index de33830..34f5823 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizationContext.java @@ -1889,47 +1889,47 @@ static String getUndecoratedName(String hiveTypeName) { // TODO: And, investigate if different reduce-side versions are needed for var* and std*, or if map-side aggregate can be used.. Right now they are conservatively // marked map-side (HASH). static ArrayList aggregatesDefinition = new ArrayList() {{ - add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, null, VectorUDAFMinLong.class)); - add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, null, VectorUDAFMinDouble.class)); - add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, null, VectorUDAFMinString.class)); - add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.DECIMAL, null, VectorUDAFMinDecimal.class)); - add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, null, VectorUDAFMaxLong.class)); - add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, null, VectorUDAFMaxDouble.class)); - add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, null, VectorUDAFMaxString.class)); - add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.DECIMAL, null, VectorUDAFMaxDecimal.class)); - add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.NONE, GroupByDesc.Mode.HASH, VectorUDAFCountStar.class)); - add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); - add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.MERGEPARTIAL, VectorUDAFCountMerge.class)); - add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); - add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); - add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); - add(new AggregateDefinition("sum", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, null, VectorUDAFSumLong.class)); - add(new AggregateDefinition("sum", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, null, VectorUDAFSumDouble.class)); - add(new AggregateDefinition("sum", VectorExpressionDescriptor.ArgumentType.DECIMAL, null, VectorUDAFSumDecimal.class)); - add(new AggregateDefinition("avg", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFAvgLong.class)); - add(new AggregateDefinition("avg", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFAvgDouble.class)); - add(new AggregateDefinition("avg", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFAvgDecimal.class)); - add(new AggregateDefinition("variance", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopLong.class)); - add(new AggregateDefinition("var_pop", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopLong.class)); - add(new AggregateDefinition("variance", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopDouble.class)); - add(new AggregateDefinition("var_pop", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopDouble.class)); - add(new AggregateDefinition("variance", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFVarPopDecimal.class)); - add(new AggregateDefinition("var_pop", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFVarPopDecimal.class)); - add(new AggregateDefinition("var_samp", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarSampLong.class)); - add(new AggregateDefinition("var_samp" , VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarSampDouble.class)); - add(new AggregateDefinition("var_samp" , VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFVarSampDecimal.class)); - add(new AggregateDefinition("std", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopLong.class)); - add(new AggregateDefinition("stddev", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopLong.class)); - add(new AggregateDefinition("stddev_pop", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopLong.class)); - add(new AggregateDefinition("std", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopDouble.class)); - add(new AggregateDefinition("stddev", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopDouble.class)); - add(new AggregateDefinition("stddev_pop", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopDouble.class)); - add(new AggregateDefinition("std", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdPopDecimal.class)); - add(new AggregateDefinition("stddev", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdPopDecimal.class)); - add(new AggregateDefinition("stddev_pop", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdPopDecimal.class)); - add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdSampLong.class)); - add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdSampDouble.class)); - add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdSampDecimal.class)); + add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.INT_DATETIME_FAMILY, null, VectorUDAFMinLong.class)); + add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, null, VectorUDAFMinDouble.class)); + add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, null, VectorUDAFMinString.class)); + add(new AggregateDefinition("min", VectorExpressionDescriptor.ArgumentType.DECIMAL, null, VectorUDAFMinDecimal.class)); + add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.INT_DATETIME_FAMILY, null, VectorUDAFMaxLong.class)); + add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, null, VectorUDAFMaxDouble.class)); + add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, null, VectorUDAFMaxString.class)); + add(new AggregateDefinition("max", VectorExpressionDescriptor.ArgumentType.DECIMAL, null, VectorUDAFMaxDecimal.class)); + add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.NONE, GroupByDesc.Mode.HASH, VectorUDAFCountStar.class)); + add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.INT_DATETIME_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); + add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, GroupByDesc.Mode.MERGEPARTIAL, VectorUDAFCountMerge.class)); + add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); + add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.STRING_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); + add(new AggregateDefinition("count", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFCount.class)); + add(new AggregateDefinition("sum", VectorExpressionDescriptor.ArgumentType.INT_FAMILY, null, VectorUDAFSumLong.class)); + add(new AggregateDefinition("sum", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, null, VectorUDAFSumDouble.class)); + add(new AggregateDefinition("sum", VectorExpressionDescriptor.ArgumentType.DECIMAL, null, VectorUDAFSumDecimal.class)); + add(new AggregateDefinition("avg", VectorExpressionDescriptor.ArgumentType.INT_TIMESTAMP_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFAvgLong.class)); + add(new AggregateDefinition("avg", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFAvgDouble.class)); + add(new AggregateDefinition("avg", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFAvgDecimal.class)); + add(new AggregateDefinition("variance", VectorExpressionDescriptor.ArgumentType.INT_TIMESTAMP_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopLong.class)); + add(new AggregateDefinition("var_pop", VectorExpressionDescriptor.ArgumentType.INT_TIMESTAMP_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopLong.class)); + add(new AggregateDefinition("variance", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopDouble.class)); + add(new AggregateDefinition("var_pop", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarPopDouble.class)); + add(new AggregateDefinition("variance", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFVarPopDecimal.class)); + add(new AggregateDefinition("var_pop", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFVarPopDecimal.class)); + add(new AggregateDefinition("var_samp", VectorExpressionDescriptor.ArgumentType.INT_TIMESTAMP_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarSampLong.class)); + add(new AggregateDefinition("var_samp" , VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFVarSampDouble.class)); + add(new AggregateDefinition("var_samp" , VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFVarSampDecimal.class)); + add(new AggregateDefinition("std", VectorExpressionDescriptor.ArgumentType.INT_TIMESTAMP_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopLong.class)); + add(new AggregateDefinition("stddev", VectorExpressionDescriptor.ArgumentType.INT_TIMESTAMP_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopLong.class)); + add(new AggregateDefinition("stddev_pop", VectorExpressionDescriptor.ArgumentType.INT_TIMESTAMP_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopLong.class)); + add(new AggregateDefinition("std", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopDouble.class)); + add(new AggregateDefinition("stddev", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopDouble.class)); + add(new AggregateDefinition("stddev_pop", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdPopDouble.class)); + add(new AggregateDefinition("std", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdPopDecimal.class)); + add(new AggregateDefinition("stddev", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdPopDecimal.class)); + add(new AggregateDefinition("stddev_pop", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdPopDecimal.class)); + add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.INT_TIMESTAMP_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdSampLong.class)); + add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.FLOAT_FAMILY, GroupByDesc.Mode.HASH, VectorUDAFStdSampDouble.class)); + add(new AggregateDefinition("stddev_samp", VectorExpressionDescriptor.ArgumentType.DECIMAL, GroupByDesc.Mode.HASH, VectorUDAFStdSampDecimal.class)); }}; public VectorAggregateExpression getAggregatorExpression(AggregationDesc desc, boolean isReduce) diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java index e682dba..ee31e48 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/physical/Vectorizer.java @@ -791,6 +791,7 @@ public PhysicalContext resolve(PhysicalContext pctx) throws SemanticException { boolean validateMapWorkOperator(Operator op, boolean isTez) { boolean ret = false; + LOG.info("Validating MapWork operator " + op.getType().name()); switch (op.getType()) { case MAPJOIN: if (op instanceof MapJoinOperator) { @@ -827,6 +828,7 @@ boolean validateMapWorkOperator(Operator op, boolean isT boolean validateReduceWorkOperator(Operator op) { boolean ret = false; + LOG.info("Validating ReduceWork operator " + op.getType().name()); switch (op.getType()) { case EXTRACT: ret = validateExtractOperator((ExtractOperator) op); @@ -840,7 +842,12 @@ boolean validateReduceWorkOperator(Operator op) { } break; case GROUPBY: - ret = validateGroupByOperator((GroupByOperator) op, true, true); + if (HiveConf.getBoolVar(physicalContext.getConf(), + HiveConf.ConfVars.HIVE_VECTORIZATION_REDUCE_GROUPBY_ENABLED)) { + ret = validateGroupByOperator((GroupByOperator) op, true, true); + } else { + ret = false; + } break; case FILTER: ret = validateFilterOperator((FilterOperator) op); @@ -1071,11 +1078,11 @@ boolean validateExprNodeDesc(ExprNodeDesc desc, VectorExpressionDescriptor.Mode VectorizationContext vc = new ValidatorVectorizationContext(); if (vc.getVectorExpression(desc, mode) == null) { // TODO: this cannot happen - VectorizationContext throws in such cases. - LOG.info("getVectorExpression returned null"); + LOG.debug("getVectorExpression returned null"); return false; } } catch (Exception e) { - LOG.info("Failed to vectorize", e); + LOG.debug("Failed to vectorize", e); return false; } return true; @@ -1098,19 +1105,19 @@ private boolean validateAggregationDesc(AggregationDesc aggDesc, boolean isReduc if (!supportedAggregationUdfs.contains(aggDesc.getGenericUDAFName().toLowerCase())) { return false; } - if (aggDesc.getParameters() != null) { - return validateExprNodeDesc(aggDesc.getParameters()); + if (aggDesc.getParameters() != null && !validateExprNodeDesc(aggDesc.getParameters())) { + return false; } // See if we can vectorize the aggregation. try { VectorizationContext vc = new ValidatorVectorizationContext(); if (vc.getAggregatorExpression(aggDesc, isReduce) == null) { // TODO: this cannot happen - VectorizationContext throws in such cases. - LOG.info("getAggregatorExpression returned null"); + LOG.debug("getAggregatorExpression returned null"); return false; } } catch (Exception e) { - LOG.info("Failed to vectorize", e); + LOG.debug("Failed to vectorize", e); return false; } return true; diff --git ql/src/test/queries/clientpositive/vectorization_0.q ql/src/test/queries/clientpositive/vectorization_0.q index 39fba7d..b3cd794 100644 --- ql/src/test/queries/clientpositive/vectorization_0.q +++ ql/src/test/queries/clientpositive/vectorization_0.q @@ -1,4 +1,180 @@ SET hive.vectorized.execution.enabled=true; + +-- Use ORDER BY clauses to generate 2 stages. +EXPLAIN +SELECT MIN(ctinyint) as c1, + MAX(ctinyint), + COUNT(ctinyint), + COUNT(*) +FROM alltypesorc +ORDER BY c1; + +SELECT MIN(ctinyint) as c1, + MAX(ctinyint), + COUNT(ctinyint), + COUNT(*) +FROM alltypesorc +ORDER BY c1; + +EXPLAIN +SELECT SUM(ctinyint) as c1 +FROM alltypesorc +ORDER BY c1; + +SELECT SUM(ctinyint) as c1 +FROM alltypesorc +ORDER BY c1; + +EXPLAIN +SELECT + avg(ctinyint) as c1, + variance(ctinyint), + var_pop(ctinyint), + var_samp(ctinyint), + std(ctinyint), + stddev(ctinyint), + stddev_pop(ctinyint), + stddev_samp(ctinyint) +FROM alltypesorc +ORDER BY c1; + +SELECT + avg(ctinyint) as c1, + variance(ctinyint), + var_pop(ctinyint), + var_samp(ctinyint), + std(ctinyint), + stddev(ctinyint), + stddev_pop(ctinyint), + stddev_samp(ctinyint) +FROM alltypesorc +ORDER BY c1; + +EXPLAIN +SELECT MIN(cbigint) as c1, + MAX(cbigint), + COUNT(cbigint), + COUNT(*) +FROM alltypesorc +ORDER BY c1; + +SELECT MIN(cbigint) as c1, + MAX(cbigint), + COUNT(cbigint), + COUNT(*) +FROM alltypesorc +ORDER BY c1; + +EXPLAIN +SELECT SUM(cbigint) as c1 +FROM alltypesorc +ORDER BY c1; + +SELECT SUM(cbigint) as c1 +FROM alltypesorc +ORDER BY c1; + +EXPLAIN +SELECT + avg(cbigint) as c1, + variance(cbigint), + var_pop(cbigint), + var_samp(cbigint), + std(cbigint), + stddev(cbigint), + stddev_pop(cbigint), + stddev_samp(cbigint) +FROM alltypesorc +ORDER BY c1; + +SELECT + avg(cbigint) as c1, + variance(cbigint), + var_pop(cbigint), + var_samp(cbigint), + std(cbigint), + stddev(cbigint), + stddev_pop(cbigint), + stddev_samp(cbigint) +FROM alltypesorc +ORDER BY c1; + +EXPLAIN +SELECT MIN(cfloat) as c1, + MAX(cfloat), + COUNT(cfloat), + COUNT(*) +FROM alltypesorc +ORDER BY c1; + +SELECT MIN(cfloat) as c1, + MAX(cfloat), + COUNT(cfloat), + COUNT(*) +FROM alltypesorc +ORDER BY c1; + +EXPLAIN +SELECT SUM(cfloat) as c1 +FROM alltypesorc +ORDER BY c1; + +SELECT SUM(cfloat) as c1 +FROM alltypesorc +ORDER BY c1; + +EXPLAIN +SELECT + avg(cfloat) as c1, + variance(cfloat), + var_pop(cfloat), + var_samp(cfloat), + std(cfloat), + stddev(cfloat), + stddev_pop(cfloat), + stddev_samp(cfloat) +FROM alltypesorc +ORDER BY c1; + +SELECT + avg(cfloat) as c1, + variance(cfloat), + var_pop(cfloat), + var_samp(cfloat), + std(cfloat), + stddev(cfloat), + stddev_pop(cfloat), + stddev_samp(cfloat) +FROM alltypesorc +ORDER BY c1; + +EXPLAIN +SELECT AVG(cbigint), + (-(AVG(cbigint))), + (-6432 + AVG(cbigint)), + STDDEV_POP(cbigint), + (-((-6432 + AVG(cbigint)))), + ((-((-6432 + AVG(cbigint)))) + (-6432 + AVG(cbigint))), + VAR_SAMP(cbigint), + (-((-6432 + AVG(cbigint)))), + (-6432 + (-((-6432 + AVG(cbigint))))), + (-((-6432 + AVG(cbigint)))), + ((-((-6432 + AVG(cbigint)))) / (-((-6432 + AVG(cbigint))))), + COUNT(*), + SUM(cfloat), + (VAR_SAMP(cbigint) % STDDEV_POP(cbigint)), + (-(VAR_SAMP(cbigint))), + ((-((-6432 + AVG(cbigint)))) * (-(AVG(cbigint)))), + MIN(ctinyint), + (-(MIN(ctinyint))) +FROM alltypesorc +WHERE (((cstring2 LIKE '%b%') + OR ((79.553 != cint) + OR (cbigint < cdouble))) + OR ((ctinyint >= csmallint) + AND ((cboolean2 = 1) + AND (3569 = ctinyint)))); + SELECT AVG(cbigint), (-(AVG(cbigint))), (-6432 + AVG(cbigint)), diff --git ql/src/test/queries/clientpositive/vectorized_date_funcs.q ql/src/test/queries/clientpositive/vectorized_date_funcs.q index 6392fc9..1fb0dac 100644 --- ql/src/test/queries/clientpositive/vectorized_date_funcs.q +++ ql/src/test/queries/clientpositive/vectorized_date_funcs.q @@ -122,4 +122,20 @@ SELECT FROM date_udf_flight_orc LIMIT 10; -- Test extracting the date part of expression that includes time -SELECT to_date('2009-07-30 04:17:52') FROM date_udf_flight_orc LIMIT 1; \ No newline at end of file +SELECT to_date('2009-07-30 04:17:52') FROM date_udf_flight_orc LIMIT 1; + +EXPLAIN SELECT + min(fl_date) AS c1, + max(fl_date), + count(fl_date), + count(*) +FROM date_udf_flight_orc +ORDER BY c1; + +SELECT + min(fl_date) AS c1, + max(fl_date), + count(fl_date), + count(*) +FROM date_udf_flight_orc +ORDER BY c1; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q index 95eedd3..8a2d5aa 100644 --- ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q +++ ql/src/test/queries/clientpositive/vectorized_timestamp_funcs.q @@ -1,6 +1,7 @@ -SET hive.vectorized.execution.enabled = true; - -- Test timestamp functions in vectorized mode to verify they run correctly end-to-end. +-- Turning on vectorization has been temporarily moved after filling the test table +-- due to bug HIVE-8197. + CREATE TABLE alltypesorc_string(ctimestamp1 timestamp, stimestamp1 string) STORED AS ORC; @@ -11,6 +12,8 @@ SELECT FROM alltypesorc LIMIT 40; +SET hive.vectorized.execution.enabled = true; + CREATE TABLE alltypesorc_wrong(stimestamp1 string) STORED AS ORC; INSERT INTO TABLE alltypesorc_wrong SELECT 'abcd' FROM alltypesorc LIMIT 1; @@ -122,3 +125,48 @@ SELECT second(stimestamp1) FROM alltypesorc_wrong ORDER BY c1; + +EXPLAIN SELECT + min(ctimestamp1), + max(ctimestamp1), + count(ctimestamp1), + count(*) +FROM alltypesorc_string; + +SELECT + min(ctimestamp1), + max(ctimestamp1), + count(ctimestamp1), + count(*) +FROM alltypesorc_string; + +-- SUM of timestamps are not vectorized reduce-side because they produce a double instead of a long (HIVE-8211)... +EXPLAIN SELECT + sum(ctimestamp1) +FROM alltypesorc_string; + +SELECT + sum(ctimestamp1) +FROM alltypesorc_string; + +EXPLAIN SELECT + avg(ctimestamp1), + variance(ctimestamp1), + var_pop(ctimestamp1), + var_samp(ctimestamp1), + std(ctimestamp1), + stddev(ctimestamp1), + stddev_pop(ctimestamp1), + stddev_samp(ctimestamp1) +FROM alltypesorc_string; + +SELECT + avg(ctimestamp1), + variance(ctimestamp1), + var_pop(ctimestamp1), + var_samp(ctimestamp1), + std(ctimestamp1), + stddev(ctimestamp1), + stddev_pop(ctimestamp1), + stddev_samp(ctimestamp1) +FROM alltypesorc_string; \ No newline at end of file diff --git ql/src/test/results/clientpositive/tez/vectorization_0.q.out ql/src/test/results/clientpositive/tez/vectorization_0.q.out new file mode 100644 index 0000000..7703158 --- /dev/null +++ ql/src/test/results/clientpositive/tez/vectorization_0.q.out @@ -0,0 +1,1127 @@ +PREHOOK: query: -- Use ORDER BY clauses to generate 2 stages. +EXPLAIN +SELECT MIN(ctinyint) as c1, + MAX(ctinyint), + COUNT(ctinyint), + COUNT(*) +FROM alltypesorc +ORDER BY c1 +PREHOOK: type: QUERY +POSTHOOK: query: -- Use ORDER BY clauses to generate 2 stages. +EXPLAIN +SELECT MIN(ctinyint) as c1, + MAX(ctinyint), + COUNT(ctinyint), + COUNT(*) +FROM alltypesorc +ORDER BY c1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint) + outputColumnNames: ctinyint + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(ctinyint), max(ctinyint), count(ctinyint), count() + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: tinyint), _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) + Execution mode: vectorized + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: tinyint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT MIN(ctinyint) as c1, + MAX(ctinyint), + COUNT(ctinyint), + COUNT(*) +FROM alltypesorc +ORDER BY c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: SELECT MIN(ctinyint) as c1, + MAX(ctinyint), + COUNT(ctinyint), + COUNT(*) +FROM alltypesorc +ORDER BY c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-64 62 9173 12288 +PREHOOK: query: EXPLAIN +SELECT SUM(ctinyint) as c1 +FROM alltypesorc +ORDER BY c1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT SUM(ctinyint) as c1 +FROM alltypesorc +ORDER BY c1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint) + outputColumnNames: ctinyint + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(ctinyint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT SUM(ctinyint) as c1 +FROM alltypesorc +ORDER BY c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(ctinyint) as c1 +FROM alltypesorc +ORDER BY c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-39856 +PREHOOK: query: EXPLAIN +SELECT + avg(ctinyint) as c1, + variance(ctinyint), + var_pop(ctinyint), + var_samp(ctinyint), + std(ctinyint), + stddev(ctinyint), + stddev_pop(ctinyint), + stddev_samp(ctinyint) +FROM alltypesorc +ORDER BY c1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT + avg(ctinyint) as c1, + variance(ctinyint), + var_pop(ctinyint), + var_samp(ctinyint), + std(ctinyint), + stddev(ctinyint), + stddev_pop(ctinyint), + stddev_samp(ctinyint) +FROM alltypesorc +ORDER BY c1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint) + outputColumnNames: ctinyint + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(ctinyint), variance(ctinyint), var_pop(ctinyint), var_samp(ctinyint), std(ctinyint), stddev(ctinyint), stddev_pop(ctinyint), stddev_samp(ctinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + avg(ctinyint) as c1, + variance(ctinyint), + var_pop(ctinyint), + var_samp(ctinyint), + std(ctinyint), + stddev(ctinyint), + stddev_pop(ctinyint), + stddev_samp(ctinyint) +FROM alltypesorc +ORDER BY c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: SELECT + avg(ctinyint) as c1, + variance(ctinyint), + var_pop(ctinyint), + var_samp(ctinyint), + std(ctinyint), + stddev(ctinyint), + stddev_pop(ctinyint), + stddev_samp(ctinyint) +FROM alltypesorc +ORDER BY c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-4.344925324321378 1158.3003004768184 1158.3003004768184 1158.4265870337827 34.033811136527426 34.033811136527426 34.033811136527426 34.03566639620536 +PREHOOK: query: EXPLAIN +SELECT MIN(cbigint) as c1, + MAX(cbigint), + COUNT(cbigint), + COUNT(*) +FROM alltypesorc +ORDER BY c1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT MIN(cbigint) as c1, + MAX(cbigint), + COUNT(cbigint), + COUNT(*) +FROM alltypesorc +ORDER BY c1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cbigint (type: bigint) + outputColumnNames: cbigint + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(cbigint), max(cbigint), count(cbigint), count() + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) + Execution mode: vectorized + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT MIN(cbigint) as c1, + MAX(cbigint), + COUNT(cbigint), + COUNT(*) +FROM alltypesorc +ORDER BY c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: SELECT MIN(cbigint) as c1, + MAX(cbigint), + COUNT(cbigint), + COUNT(*) +FROM alltypesorc +ORDER BY c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-2147311592 2145498388 9173 12288 +PREHOOK: query: EXPLAIN +SELECT SUM(cbigint) as c1 +FROM alltypesorc +ORDER BY c1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT SUM(cbigint) as c1 +FROM alltypesorc +ORDER BY c1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cbigint (type: bigint) + outputColumnNames: cbigint + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(cbigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT SUM(cbigint) as c1 +FROM alltypesorc +ORDER BY c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(cbigint) as c1 +FROM alltypesorc +ORDER BY c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-1698460028409 +PREHOOK: query: EXPLAIN +SELECT + avg(cbigint) as c1, + variance(cbigint), + var_pop(cbigint), + var_samp(cbigint), + std(cbigint), + stddev(cbigint), + stddev_pop(cbigint), + stddev_samp(cbigint) +FROM alltypesorc +ORDER BY c1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT + avg(cbigint) as c1, + variance(cbigint), + var_pop(cbigint), + var_samp(cbigint), + std(cbigint), + stddev(cbigint), + stddev_pop(cbigint), + stddev_samp(cbigint) +FROM alltypesorc +ORDER BY c1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cbigint (type: bigint) + outputColumnNames: cbigint + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(cbigint), variance(cbigint), var_pop(cbigint), var_samp(cbigint), std(cbigint), stddev(cbigint), stddev_pop(cbigint), stddev_samp(cbigint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + avg(cbigint) as c1, + variance(cbigint), + var_pop(cbigint), + var_samp(cbigint), + std(cbigint), + stddev(cbigint), + stddev_pop(cbigint), + stddev_samp(cbigint) +FROM alltypesorc +ORDER BY c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: SELECT + avg(cbigint) as c1, + variance(cbigint), + var_pop(cbigint), + var_samp(cbigint), + std(cbigint), + stddev(cbigint), + stddev_pop(cbigint), + stddev_samp(cbigint) +FROM alltypesorc +ORDER BY c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-1.8515862077935246E8 2.07689300543081907E18 2.07689300543081907E18 2.07711944383088768E18 1.441142951074188E9 1.441142951074188E9 1.441142951074188E9 1.4412215110214279E9 +PREHOOK: query: EXPLAIN +SELECT MIN(cfloat) as c1, + MAX(cfloat), + COUNT(cfloat), + COUNT(*) +FROM alltypesorc +ORDER BY c1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT MIN(cfloat) as c1, + MAX(cfloat), + COUNT(cfloat), + COUNT(*) +FROM alltypesorc +ORDER BY c1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cfloat (type: float) + outputColumnNames: cfloat + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(cfloat), max(cfloat), count(cfloat), count() + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: float), _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: float) + sort order: + + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) + Execution mode: vectorized + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: float), VALUE._col0 (type: float), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT MIN(cfloat) as c1, + MAX(cfloat), + COUNT(cfloat), + COUNT(*) +FROM alltypesorc +ORDER BY c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: SELECT MIN(cfloat) as c1, + MAX(cfloat), + COUNT(cfloat), + COUNT(*) +FROM alltypesorc +ORDER BY c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-64.0 79.553 9173 12288 +PREHOOK: query: EXPLAIN +SELECT SUM(cfloat) as c1 +FROM alltypesorc +ORDER BY c1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT SUM(cfloat) as c1 +FROM alltypesorc +ORDER BY c1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cfloat (type: float) + outputColumnNames: cfloat + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(cfloat) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Execution mode: vectorized + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT SUM(cfloat) as c1 +FROM alltypesorc +ORDER BY c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(cfloat) as c1 +FROM alltypesorc +ORDER BY c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-39479.635992884636 +PREHOOK: query: EXPLAIN +SELECT + avg(cfloat) as c1, + variance(cfloat), + var_pop(cfloat), + var_samp(cfloat), + std(cfloat), + stddev(cfloat), + stddev_pop(cfloat), + stddev_samp(cfloat) +FROM alltypesorc +ORDER BY c1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT + avg(cfloat) as c1, + variance(cfloat), + var_pop(cfloat), + var_samp(cfloat), + std(cfloat), + stddev(cfloat), + stddev_pop(cfloat), + stddev_samp(cfloat) +FROM alltypesorc +ORDER BY c1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) + Reducer 3 <- Reducer 2 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cfloat (type: float) + outputColumnNames: cfloat + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(cfloat), variance(cfloat), var_pop(cfloat), var_samp(cfloat), std(cfloat), stddev(cfloat), stddev_pop(cfloat), stddev_samp(cfloat) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) + Reducer 3 + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + avg(cfloat) as c1, + variance(cfloat), + var_pop(cfloat), + var_samp(cfloat), + std(cfloat), + stddev(cfloat), + stddev_pop(cfloat), + stddev_samp(cfloat) +FROM alltypesorc +ORDER BY c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: SELECT + avg(cfloat) as c1, + variance(cfloat), + var_pop(cfloat), + var_samp(cfloat), + std(cfloat), + stddev(cfloat), + stddev_pop(cfloat), + stddev_samp(cfloat) +FROM alltypesorc +ORDER BY c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-4.303895780321011 1163.8972588604984 1163.8972588604984 1164.0241556397025 34.115938487171924 34.115938487171924 34.115938487171924 34.11779822379666 +WARNING: Comparing a bigint and a double may result in a loss of precision. +PREHOOK: query: EXPLAIN +SELECT AVG(cbigint), + (-(AVG(cbigint))), + (-6432 + AVG(cbigint)), + STDDEV_POP(cbigint), + (-((-6432 + AVG(cbigint)))), + ((-((-6432 + AVG(cbigint)))) + (-6432 + AVG(cbigint))), + VAR_SAMP(cbigint), + (-((-6432 + AVG(cbigint)))), + (-6432 + (-((-6432 + AVG(cbigint))))), + (-((-6432 + AVG(cbigint)))), + ((-((-6432 + AVG(cbigint)))) / (-((-6432 + AVG(cbigint))))), + COUNT(*), + SUM(cfloat), + (VAR_SAMP(cbigint) % STDDEV_POP(cbigint)), + (-(VAR_SAMP(cbigint))), + ((-((-6432 + AVG(cbigint)))) * (-(AVG(cbigint)))), + MIN(ctinyint), + (-(MIN(ctinyint))) +FROM alltypesorc +WHERE (((cstring2 LIKE '%b%') + OR ((79.553 != cint) + OR (cbigint < cdouble))) + OR ((ctinyint >= csmallint) + AND ((cboolean2 = 1) + AND (3569 = ctinyint)))) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT AVG(cbigint), + (-(AVG(cbigint))), + (-6432 + AVG(cbigint)), + STDDEV_POP(cbigint), + (-((-6432 + AVG(cbigint)))), + ((-((-6432 + AVG(cbigint)))) + (-6432 + AVG(cbigint))), + VAR_SAMP(cbigint), + (-((-6432 + AVG(cbigint)))), + (-6432 + (-((-6432 + AVG(cbigint))))), + (-((-6432 + AVG(cbigint)))), + ((-((-6432 + AVG(cbigint)))) / (-((-6432 + AVG(cbigint))))), + COUNT(*), + SUM(cfloat), + (VAR_SAMP(cbigint) % STDDEV_POP(cbigint)), + (-(VAR_SAMP(cbigint))), + ((-((-6432 + AVG(cbigint)))) * (-(AVG(cbigint)))), + MIN(ctinyint), + (-(MIN(ctinyint))) +FROM alltypesorc +WHERE (((cstring2 LIKE '%b%') + OR ((79.553 != cint) + OR (cbigint < cdouble))) + OR ((ctinyint >= csmallint) + AND ((cboolean2 = 1) + AND (3569 = ctinyint)))) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((cstring2 like '%b%') or ((79.553 <> cint) or (cbigint < cdouble))) (type: boolean) + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cbigint (type: bigint), cfloat (type: float), ctinyint (type: tinyint) + outputColumnNames: cbigint, cfloat, ctinyint + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(cbigint), stddev_pop(cbigint), var_samp(cbigint), count(), sum(cfloat), min(ctinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: bigint), _col4 (type: double), _col5 (type: tinyint) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), stddev_pop(VALUE._col1), var_samp(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), min(VALUE._col5) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: double), (- _col0) (type: double), (-6432 + _col0) (type: double), _col1 (type: double), (- (-6432 + _col0)) (type: double), ((- (-6432 + _col0)) + (-6432 + _col0)) (type: double), _col2 (type: double), (- (-6432 + _col0)) (type: double), (-6432 + (- (-6432 + _col0))) (type: double), (- (-6432 + _col0)) (type: double), ((- (-6432 + _col0)) / (- (-6432 + _col0))) (type: double), _col3 (type: bigint), _col4 (type: double), (_col2 % _col1) (type: double), (- _col2) (type: double), ((- (-6432 + _col0)) * (- _col0)) (type: double), _col5 (type: tinyint), (- _col5) (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +WARNING: Comparing a bigint and a double may result in a loss of precision. +PREHOOK: query: SELECT AVG(cbigint), + (-(AVG(cbigint))), + (-6432 + AVG(cbigint)), + STDDEV_POP(cbigint), + (-((-6432 + AVG(cbigint)))), + ((-((-6432 + AVG(cbigint)))) + (-6432 + AVG(cbigint))), + VAR_SAMP(cbigint), + (-((-6432 + AVG(cbigint)))), + (-6432 + (-((-6432 + AVG(cbigint))))), + (-((-6432 + AVG(cbigint)))), + ((-((-6432 + AVG(cbigint)))) / (-((-6432 + AVG(cbigint))))), + COUNT(*), + SUM(cfloat), + (VAR_SAMP(cbigint) % STDDEV_POP(cbigint)), + (-(VAR_SAMP(cbigint))), + ((-((-6432 + AVG(cbigint)))) * (-(AVG(cbigint)))), + MIN(ctinyint), + (-(MIN(ctinyint))) +FROM alltypesorc +WHERE (((cstring2 LIKE '%b%') + OR ((79.553 != cint) + OR (cbigint < cdouble))) + OR ((ctinyint >= csmallint) + AND ((cboolean2 = 1) + AND (3569 = ctinyint)))) +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: SELECT AVG(cbigint), + (-(AVG(cbigint))), + (-6432 + AVG(cbigint)), + STDDEV_POP(cbigint), + (-((-6432 + AVG(cbigint)))), + ((-((-6432 + AVG(cbigint)))) + (-6432 + AVG(cbigint))), + VAR_SAMP(cbigint), + (-((-6432 + AVG(cbigint)))), + (-6432 + (-((-6432 + AVG(cbigint))))), + (-((-6432 + AVG(cbigint)))), + ((-((-6432 + AVG(cbigint)))) / (-((-6432 + AVG(cbigint))))), + COUNT(*), + SUM(cfloat), + (VAR_SAMP(cbigint) % STDDEV_POP(cbigint)), + (-(VAR_SAMP(cbigint))), + ((-((-6432 + AVG(cbigint)))) * (-(AVG(cbigint)))), + MIN(ctinyint), + (-(MIN(ctinyint))) +FROM alltypesorc +WHERE (((cstring2 LIKE '%b%') + OR ((79.553 != cint) + OR (cbigint < cdouble))) + OR ((ctinyint >= csmallint) + AND ((cboolean2 = 1) + AND (3569 = ctinyint)))) +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-3.875652215945533E8 3.875652215945533E8 -3.875716535945533E8 1.436387455459401E9 3.875716535945533E8 0.0 2.06347151720204902E18 3.875716535945533E8 3.875652215945533E8 3.875716535945533E8 1.0 10934 -37224.52399241924 1.0517370547117279E9 -2.06347151720204902E18 1.5020929380914048E17 -64 64 diff --git ql/src/test/results/clientpositive/tez/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/tez/vectorized_timestamp_funcs.q.out index b8e46e9..6d729bc 100644 --- ql/src/test/results/clientpositive/tez/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/tez/vectorized_timestamp_funcs.q.out @@ -1,10 +1,16 @@ PREHOOK: query: -- Test timestamp functions in vectorized mode to verify they run correctly end-to-end. +-- Turning on vectorization has been temporarily moved after filling the test table +-- due to bug HIVE-8197. + CREATE TABLE alltypesorc_string(ctimestamp1 timestamp, stimestamp1 string) STORED AS ORC PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@alltypesorc_string POSTHOOK: query: -- Test timestamp functions in vectorized mode to verify they run correctly end-to-end. +-- Turning on vectorization has been temporarily moved after filling the test table +-- due to bug HIVE-8197. + CREATE TABLE alltypesorc_string(ctimestamp1 timestamp, stimestamp1 string) STORED AS ORC POSTHOOK: type: CREATETABLE @@ -169,45 +175,45 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### NULL NULL NULL NULL NULL NULL NULL NULL NULL +28784 1969 12 31 31 1 23 59 44 +28784 1969 12 31 31 1 23 59 44 +28784 1969 12 31 31 1 23 59 44 +28784 1969 12 31 31 1 23 59 44 +28785 1969 12 31 31 1 23 59 45 28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 -28786 1969 12 31 31 1 23 59 46 +28787 1969 12 31 31 1 23 59 47 +28788 1969 12 31 31 1 23 59 48 +28789 1969 12 31 31 1 23 59 49 +28789 1969 12 31 31 1 23 59 49 +28790 1969 12 31 31 1 23 59 50 +28792 1969 12 31 31 1 23 59 52 +28792 1969 12 31 31 1 23 59 52 +28792 1969 12 31 31 1 23 59 52 +28792 1969 12 31 31 1 23 59 52 +28795 1969 12 31 31 1 23 59 55 +28795 1969 12 31 31 1 23 59 55 +28795 1969 12 31 31 1 23 59 55 +28798 1969 12 31 31 1 23 59 58 +28798 1969 12 31 31 1 23 59 58 +28800 1970 1 1 1 1 0 0 0 +28800 1970 1 1 1 1 0 0 0 +28802 1970 1 1 1 1 0 0 2 +28803 1970 1 1 1 1 0 0 3 +28804 1970 1 1 1 1 0 0 4 +28804 1970 1 1 1 1 0 0 4 +28805 1970 1 1 1 1 0 0 5 +28805 1970 1 1 1 1 0 0 5 +28806 1970 1 1 1 1 0 0 6 +28807 1970 1 1 1 1 0 0 7 +28807 1970 1 1 1 1 0 0 7 +28807 1970 1 1 1 1 0 0 7 +28808 1970 1 1 1 1 0 0 8 +28808 1970 1 1 1 1 0 0 8 +28809 1970 1 1 1 1 0 0 9 +28811 1970 1 1 1 1 0 0 11 +28813 1970 1 1 1 1 0 0 13 +28814 1970 1 1 1 1 0 0 14 +28815 1970 1 1 1 1 0 0 15 PREHOOK: query: EXPLAIN SELECT to_unix_timestamp(stimestamp1) AS c1, year(stimestamp1), @@ -457,44 +463,44 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_string #### A masked pattern was here #### NULL NULL NULL NULL NULL NULL NULL NULL NULL -false false false false false true false false false -false true true true true true true true false -false true true true true true true true false -false true true true true true true true false -false false false false false true false false false -false false false false false true false false false -false false false false false true false false false -false true true true true true true true false -false true true true true true true true false -false true true true true true true true false -false false false false false true false false false -false false false false false true false false false -false false false false false true false false false -false true true true true true true true false -false false false false false true false false false -false true true true true true true true false -false false false false false true false false false -false false false false false true false false false -false true true true true true true true false -false true true true true true true true false -false false false false false true false false false -false false false false false true false false false -false false false false false true false false false -false false false false false true false false false -false true true true true true true true false -false true true true true true true true false -false true true true true true true true false -false false false false false true false false false -false true true true true true true true false -false false false false false true false false false -false true true true true true true true false -false false false false false true false false false -false true true true true true true true false -false true true true true true true true false -false true true true true true true true false -false false false false false true false false false -false false false false false true false false false -false true true true true true true true false +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true +true true true true true true true true true true true true true true true true true true PREHOOK: query: -- Wrong format. Should all be NULL. EXPLAIN SELECT @@ -604,3 +610,274 @@ POSTHOOK: Input: default@alltypesorc_wrong NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +PREHOOK: query: EXPLAIN SELECT + min(ctimestamp1), + max(ctimestamp1), + count(ctimestamp1), + count(*) +FROM alltypesorc_string +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT + min(ctimestamp1), + max(ctimestamp1), + count(ctimestamp1), + count(*) +FROM alltypesorc_string +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc_string + Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctimestamp1 (type: timestamp) + outputColumnNames: ctimestamp1 + Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count() + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: bigint), _col3 (type: bigint) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: bigint), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + min(ctimestamp1), + max(ctimestamp1), + count(ctimestamp1), + count(*) +FROM alltypesorc_string +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc_string +#### A masked pattern was here #### +POSTHOOK: query: SELECT + min(ctimestamp1), + max(ctimestamp1), + count(ctimestamp1), + count(*) +FROM alltypesorc_string +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc_string +#### A masked pattern was here #### +1969-12-31 23:59:44.088 1970-01-01 00:00:15.007 39 40 +PREHOOK: query: -- SUM of timestamps are not vectorized reduce-side because they produce a double instead of a long (HIVE-8211)... +EXPLAIN SELECT + sum(ctimestamp1) +FROM alltypesorc_string +PREHOOK: type: QUERY +POSTHOOK: query: -- SUM of timestamps are not vectorized reduce-side because they produce a double instead of a long (HIVE-8211)... +EXPLAIN SELECT + sum(ctimestamp1) +FROM alltypesorc_string +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc_string + Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctimestamp1 (type: timestamp) + outputColumnNames: ctimestamp1 + Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(ctimestamp1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + sum(ctimestamp1) +FROM alltypesorc_string +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc_string +#### A masked pattern was here #### +POSTHOOK: query: SELECT + sum(ctimestamp1) +FROM alltypesorc_string +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc_string +#### A masked pattern was here #### +1123143.8569999998 +PREHOOK: query: EXPLAIN SELECT + avg(ctimestamp1), + variance(ctimestamp1), + var_pop(ctimestamp1), + var_samp(ctimestamp1), + std(ctimestamp1), + stddev(ctimestamp1), + stddev_pop(ctimestamp1), + stddev_samp(ctimestamp1) +FROM alltypesorc_string +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT + avg(ctimestamp1), + variance(ctimestamp1), + var_pop(ctimestamp1), + var_samp(ctimestamp1), + std(ctimestamp1), + stddev(ctimestamp1), + stddev_pop(ctimestamp1), + stddev_samp(ctimestamp1) +FROM alltypesorc_string +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez + Edges: + Reducer 2 <- Map 1 (SIMPLE_EDGE) +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: alltypesorc_string + Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctimestamp1 (type: timestamp) + outputColumnNames: ctimestamp1 + Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(ctimestamp1), variance(ctimestamp1), var_pop(ctimestamp1), var_samp(ctimestamp1), std(ctimestamp1), stddev(ctimestamp1), stddev_pop(ctimestamp1), stddev_samp(ctimestamp1) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + Execution mode: vectorized + Reducer 2 + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + avg(ctimestamp1), + variance(ctimestamp1), + var_pop(ctimestamp1), + var_samp(ctimestamp1), + std(ctimestamp1), + stddev(ctimestamp1), + stddev_pop(ctimestamp1), + stddev_samp(ctimestamp1) +FROM alltypesorc_string +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc_string +#### A masked pattern was here #### +POSTHOOK: query: SELECT + avg(ctimestamp1), + variance(ctimestamp1), + var_pop(ctimestamp1), + var_samp(ctimestamp1), + std(ctimestamp1), + stddev(ctimestamp1), + stddev_pop(ctimestamp1), + stddev_samp(ctimestamp1) +FROM alltypesorc_string +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc_string +#### A masked pattern was here #### +2.8798560435897438E13 8.970772952794212E19 8.970772952794212E19 9.206845925236166E19 9.471416447815084E9 9.471416447815084E9 9.471416447815084E9 9.595231068211002E9 diff --git ql/src/test/results/clientpositive/vectorization_0.q.out ql/src/test/results/clientpositive/vectorization_0.q.out index 2aeaa13..d50899e 100644 --- ql/src/test/results/clientpositive/vectorization_0.q.out +++ ql/src/test/results/clientpositive/vectorization_0.q.out @@ -1,3 +1,1087 @@ +PREHOOK: query: -- Use ORDER BY clauses to generate 2 stages. +EXPLAIN +SELECT MIN(ctinyint) as c1, + MAX(ctinyint), + COUNT(ctinyint), + COUNT(*) +FROM alltypesorc +ORDER BY c1 +PREHOOK: type: QUERY +POSTHOOK: query: -- Use ORDER BY clauses to generate 2 stages. +EXPLAIN +SELECT MIN(ctinyint) as c1, + MAX(ctinyint), + COUNT(ctinyint), + COUNT(*) +FROM alltypesorc +ORDER BY c1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint) + outputColumnNames: ctinyint + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(ctinyint), max(ctinyint), count(ctinyint), count() + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: tinyint), _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: tinyint), _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: tinyint), _col2 (type: bigint), _col3 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: tinyint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT MIN(ctinyint) as c1, + MAX(ctinyint), + COUNT(ctinyint), + COUNT(*) +FROM alltypesorc +ORDER BY c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: SELECT MIN(ctinyint) as c1, + MAX(ctinyint), + COUNT(ctinyint), + COUNT(*) +FROM alltypesorc +ORDER BY c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-64 62 9173 12288 +PREHOOK: query: EXPLAIN +SELECT SUM(ctinyint) as c1 +FROM alltypesorc +ORDER BY c1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT SUM(ctinyint) as c1 +FROM alltypesorc +ORDER BY c1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint) + outputColumnNames: ctinyint + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(ctinyint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT SUM(ctinyint) as c1 +FROM alltypesorc +ORDER BY c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(ctinyint) as c1 +FROM alltypesorc +ORDER BY c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-39856 +PREHOOK: query: EXPLAIN +SELECT + avg(ctinyint) as c1, + variance(ctinyint), + var_pop(ctinyint), + var_samp(ctinyint), + std(ctinyint), + stddev(ctinyint), + stddev_pop(ctinyint), + stddev_samp(ctinyint) +FROM alltypesorc +ORDER BY c1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT + avg(ctinyint) as c1, + variance(ctinyint), + var_pop(ctinyint), + var_samp(ctinyint), + std(ctinyint), + stddev(ctinyint), + stddev_pop(ctinyint), + stddev_samp(ctinyint) +FROM alltypesorc +ORDER BY c1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint) + outputColumnNames: ctinyint + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(ctinyint), variance(ctinyint), var_pop(ctinyint), var_samp(ctinyint), std(ctinyint), stddev(ctinyint), stddev_pop(ctinyint), stddev_samp(ctinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + avg(ctinyint) as c1, + variance(ctinyint), + var_pop(ctinyint), + var_samp(ctinyint), + std(ctinyint), + stddev(ctinyint), + stddev_pop(ctinyint), + stddev_samp(ctinyint) +FROM alltypesorc +ORDER BY c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: SELECT + avg(ctinyint) as c1, + variance(ctinyint), + var_pop(ctinyint), + var_samp(ctinyint), + std(ctinyint), + stddev(ctinyint), + stddev_pop(ctinyint), + stddev_samp(ctinyint) +FROM alltypesorc +ORDER BY c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-4.344925324321378 1158.3003004768184 1158.3003004768184 1158.4265870337827 34.033811136527426 34.033811136527426 34.033811136527426 34.03566639620536 +PREHOOK: query: EXPLAIN +SELECT MIN(cbigint) as c1, + MAX(cbigint), + COUNT(cbigint), + COUNT(*) +FROM alltypesorc +ORDER BY c1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT MIN(cbigint) as c1, + MAX(cbigint), + COUNT(cbigint), + COUNT(*) +FROM alltypesorc +ORDER BY c1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cbigint (type: bigint) + outputColumnNames: cbigint + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(cbigint), max(cbigint), count(cbigint), count() + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint), _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint), _col3 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 32 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT MIN(cbigint) as c1, + MAX(cbigint), + COUNT(cbigint), + COUNT(*) +FROM alltypesorc +ORDER BY c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: SELECT MIN(cbigint) as c1, + MAX(cbigint), + COUNT(cbigint), + COUNT(*) +FROM alltypesorc +ORDER BY c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-2147311592 2145498388 9173 12288 +PREHOOK: query: EXPLAIN +SELECT SUM(cbigint) as c1 +FROM alltypesorc +ORDER BY c1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT SUM(cbigint) as c1 +FROM alltypesorc +ORDER BY c1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cbigint (type: bigint) + outputColumnNames: cbigint + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(cbigint) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: bigint) + sort order: + + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: bigint) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT SUM(cbigint) as c1 +FROM alltypesorc +ORDER BY c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(cbigint) as c1 +FROM alltypesorc +ORDER BY c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-1698460028409 +PREHOOK: query: EXPLAIN +SELECT + avg(cbigint) as c1, + variance(cbigint), + var_pop(cbigint), + var_samp(cbigint), + std(cbigint), + stddev(cbigint), + stddev_pop(cbigint), + stddev_samp(cbigint) +FROM alltypesorc +ORDER BY c1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT + avg(cbigint) as c1, + variance(cbigint), + var_pop(cbigint), + var_samp(cbigint), + std(cbigint), + stddev(cbigint), + stddev_pop(cbigint), + stddev_samp(cbigint) +FROM alltypesorc +ORDER BY c1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cbigint (type: bigint) + outputColumnNames: cbigint + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(cbigint), variance(cbigint), var_pop(cbigint), var_samp(cbigint), std(cbigint), stddev(cbigint), stddev_pop(cbigint), stddev_samp(cbigint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + avg(cbigint) as c1, + variance(cbigint), + var_pop(cbigint), + var_samp(cbigint), + std(cbigint), + stddev(cbigint), + stddev_pop(cbigint), + stddev_samp(cbigint) +FROM alltypesorc +ORDER BY c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: SELECT + avg(cbigint) as c1, + variance(cbigint), + var_pop(cbigint), + var_samp(cbigint), + std(cbigint), + stddev(cbigint), + stddev_pop(cbigint), + stddev_samp(cbigint) +FROM alltypesorc +ORDER BY c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-1.8515862077935246E8 2.07689300543081907E18 2.07689300543081907E18 2.07711944383088768E18 1.441142951074188E9 1.441142951074188E9 1.441142951074188E9 1.4412215110214279E9 +PREHOOK: query: EXPLAIN +SELECT MIN(cfloat) as c1, + MAX(cfloat), + COUNT(cfloat), + COUNT(*) +FROM alltypesorc +ORDER BY c1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT MIN(cfloat) as c1, + MAX(cfloat), + COUNT(cfloat), + COUNT(*) +FROM alltypesorc +ORDER BY c1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cfloat (type: float) + outputColumnNames: cfloat + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(cfloat), max(cfloat), count(cfloat), count() + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: float), _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: float), _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: float) + sort order: + + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: float), _col2 (type: bigint), _col3 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: float), VALUE._col0 (type: float), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 24 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT MIN(cfloat) as c1, + MAX(cfloat), + COUNT(cfloat), + COUNT(*) +FROM alltypesorc +ORDER BY c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: SELECT MIN(cfloat) as c1, + MAX(cfloat), + COUNT(cfloat), + COUNT(*) +FROM alltypesorc +ORDER BY c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-64.0 79.553 9173 12288 +PREHOOK: query: EXPLAIN +SELECT SUM(cfloat) as c1 +FROM alltypesorc +ORDER BY c1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT SUM(cfloat) as c1 +FROM alltypesorc +ORDER BY c1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cfloat (type: float) + outputColumnNames: cfloat + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(cfloat) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT SUM(cfloat) as c1 +FROM alltypesorc +ORDER BY c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: SELECT SUM(cfloat) as c1 +FROM alltypesorc +ORDER BY c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-39479.635992884636 +PREHOOK: query: EXPLAIN +SELECT + avg(cfloat) as c1, + variance(cfloat), + var_pop(cfloat), + var_samp(cfloat), + std(cfloat), + stddev(cfloat), + stddev_pop(cfloat), + stddev_samp(cfloat) +FROM alltypesorc +ORDER BY c1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT + avg(cfloat) as c1, + variance(cfloat), + var_pop(cfloat), + var_samp(cfloat), + std(cfloat), + stddev(cfloat), + stddev_pop(cfloat), + stddev_samp(cfloat) +FROM alltypesorc +ORDER BY c1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cfloat (type: float) + outputColumnNames: cfloat + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(cfloat), variance(cfloat), var_pop(cfloat), var_samp(cfloat), std(cfloat), stddev(cfloat), stddev_pop(cfloat), stddev_samp(cfloat) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: double) + sort order: + + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: double), VALUE._col0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: double), VALUE._col6 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + avg(cfloat) as c1, + variance(cfloat), + var_pop(cfloat), + var_samp(cfloat), + std(cfloat), + stddev(cfloat), + stddev_pop(cfloat), + stddev_samp(cfloat) +FROM alltypesorc +ORDER BY c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +POSTHOOK: query: SELECT + avg(cfloat) as c1, + variance(cfloat), + var_pop(cfloat), + var_samp(cfloat), + std(cfloat), + stddev(cfloat), + stddev_pop(cfloat), + stddev_samp(cfloat) +FROM alltypesorc +ORDER BY c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +#### A masked pattern was here #### +-4.303895780321011 1163.8972588604984 1163.8972588604984 1164.0241556397025 34.115938487171924 34.115938487171924 34.115938487171924 34.11779822379666 +WARNING: Comparing a bigint and a double may result in a loss of precision. +PREHOOK: query: EXPLAIN +SELECT AVG(cbigint), + (-(AVG(cbigint))), + (-6432 + AVG(cbigint)), + STDDEV_POP(cbigint), + (-((-6432 + AVG(cbigint)))), + ((-((-6432 + AVG(cbigint)))) + (-6432 + AVG(cbigint))), + VAR_SAMP(cbigint), + (-((-6432 + AVG(cbigint)))), + (-6432 + (-((-6432 + AVG(cbigint))))), + (-((-6432 + AVG(cbigint)))), + ((-((-6432 + AVG(cbigint)))) / (-((-6432 + AVG(cbigint))))), + COUNT(*), + SUM(cfloat), + (VAR_SAMP(cbigint) % STDDEV_POP(cbigint)), + (-(VAR_SAMP(cbigint))), + ((-((-6432 + AVG(cbigint)))) * (-(AVG(cbigint)))), + MIN(ctinyint), + (-(MIN(ctinyint))) +FROM alltypesorc +WHERE (((cstring2 LIKE '%b%') + OR ((79.553 != cint) + OR (cbigint < cdouble))) + OR ((ctinyint >= csmallint) + AND ((cboolean2 = 1) + AND (3569 = ctinyint)))) +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN +SELECT AVG(cbigint), + (-(AVG(cbigint))), + (-6432 + AVG(cbigint)), + STDDEV_POP(cbigint), + (-((-6432 + AVG(cbigint)))), + ((-((-6432 + AVG(cbigint)))) + (-6432 + AVG(cbigint))), + VAR_SAMP(cbigint), + (-((-6432 + AVG(cbigint)))), + (-6432 + (-((-6432 + AVG(cbigint))))), + (-((-6432 + AVG(cbigint)))), + ((-((-6432 + AVG(cbigint)))) / (-((-6432 + AVG(cbigint))))), + COUNT(*), + SUM(cfloat), + (VAR_SAMP(cbigint) % STDDEV_POP(cbigint)), + (-(VAR_SAMP(cbigint))), + ((-((-6432 + AVG(cbigint)))) * (-(AVG(cbigint)))), + MIN(ctinyint), + (-(MIN(ctinyint))) +FROM alltypesorc +WHERE (((cstring2 LIKE '%b%') + OR ((79.553 != cint) + OR (cbigint < cdouble))) + OR ((ctinyint >= csmallint) + AND ((cboolean2 = 1) + AND (3569 = ctinyint)))) +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: ((cstring2 like '%b%') or ((79.553 <> cint) or (cbigint < cdouble))) (type: boolean) + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cbigint (type: bigint), cfloat (type: float), ctinyint (type: tinyint) + outputColumnNames: cbigint, cfloat, ctinyint + Statistics: Num rows: 12288 Data size: 377237 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(cbigint), stddev_pop(cbigint), var_samp(cbigint), count(), sum(cfloat), min(ctinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: bigint), _col4 (type: double), _col5 (type: tinyint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), stddev_pop(VALUE._col1), var_samp(VALUE._col2), count(VALUE._col3), sum(VALUE._col4), min(VALUE._col5) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: double), (- _col0) (type: double), (-6432 + _col0) (type: double), _col1 (type: double), (- (-6432 + _col0)) (type: double), ((- (-6432 + _col0)) + (-6432 + _col0)) (type: double), _col2 (type: double), (- (-6432 + _col0)) (type: double), (-6432 + (- (-6432 + _col0))) (type: double), (- (-6432 + _col0)) (type: double), ((- (-6432 + _col0)) / (- (-6432 + _col0))) (type: double), _col3 (type: bigint), _col4 (type: double), (_col2 % _col1) (type: double), (- _col2) (type: double), ((- (-6432 + _col0)) * (- _col0)) (type: double), _col5 (type: tinyint), (- _col5) (type: tinyint) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 44 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + WARNING: Comparing a bigint and a double may result in a loss of precision. PREHOOK: query: SELECT AVG(cbigint), (-(AVG(cbigint))), diff --git ql/src/test/results/clientpositive/vectorized_date_funcs.q.out ql/src/test/results/clientpositive/vectorized_date_funcs.q.out index ef30a0c..bf4230f 100644 --- ql/src/test/results/clientpositive/vectorized_date_funcs.q.out +++ ql/src/test/results/clientpositive/vectorized_date_funcs.q.out @@ -972,3 +972,111 @@ POSTHOOK: type: QUERY POSTHOOK: Input: default@date_udf_flight_orc #### A masked pattern was here #### 2009-07-30 +PREHOOK: query: EXPLAIN SELECT + min(fl_date) AS c1, + max(fl_date), + count(fl_date), + count(*) +FROM date_udf_flight_orc +ORDER BY c1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT + min(fl_date) AS c1, + max(fl_date), + count(fl_date), + count(*) +FROM date_udf_flight_orc +ORDER BY c1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: date_udf_flight_orc + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: fl_date (type: date) + outputColumnNames: fl_date + Statistics: Num rows: 137 Data size: 13152 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(fl_date), max(fl_date), count(fl_date), count() + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: date), _col1 (type: date), _col2 (type: bigint), _col3 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: date), _col1 (type: date), _col2 (type: bigint), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: date) + sort order: + + Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: date), _col2 (type: bigint), _col3 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: date), VALUE._col0 (type: date), VALUE._col1 (type: bigint), VALUE._col2 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 128 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + min(fl_date) AS c1, + max(fl_date), + count(fl_date), + count(*) +FROM date_udf_flight_orc +ORDER BY c1 +PREHOOK: type: QUERY +PREHOOK: Input: default@date_udf_flight_orc +#### A masked pattern was here #### +POSTHOOK: query: SELECT + min(fl_date) AS c1, + max(fl_date), + count(fl_date), + count(*) +FROM date_udf_flight_orc +ORDER BY c1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@date_udf_flight_orc +#### A masked pattern was here #### +2010-10-20 2010-10-31 137 137 diff --git ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out index 89ea70d..a233b87 100644 --- ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out @@ -1,10 +1,16 @@ PREHOOK: query: -- Test timestamp functions in vectorized mode to verify they run correctly end-to-end. +-- Turning on vectorization has been temporarily moved after filling the test table +-- due to bug HIVE-8197. + CREATE TABLE alltypesorc_string(ctimestamp1 timestamp, stimestamp1 string) STORED AS ORC PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@alltypesorc_string POSTHOOK: query: -- Test timestamp functions in vectorized mode to verify they run correctly end-to-end. +-- Turning on vectorization has been temporarily moved after filling the test table +-- due to bug HIVE-8197. + CREATE TABLE alltypesorc_string(ctimestamp1 timestamp, stimestamp1 string) STORED AS ORC POSTHOOK: type: CREATETABLE @@ -576,3 +582,254 @@ POSTHOOK: Input: default@alltypesorc_wrong NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL +PREHOOK: query: EXPLAIN SELECT + min(ctimestamp1), + max(ctimestamp1), + count(ctimestamp1), + count(*) +FROM alltypesorc_string +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT + min(ctimestamp1), + max(ctimestamp1), + count(ctimestamp1), + count(*) +FROM alltypesorc_string +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc_string + Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctimestamp1 (type: timestamp) + outputColumnNames: ctimestamp1 + Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count() + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: bigint), _col3 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: min(VALUE._col0), max(VALUE._col1), count(VALUE._col2), count(VALUE._col3) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: timestamp), _col1 (type: timestamp), _col2 (type: bigint), _col3 (type: bigint) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 96 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + min(ctimestamp1), + max(ctimestamp1), + count(ctimestamp1), + count(*) +FROM alltypesorc_string +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc_string +#### A masked pattern was here #### +POSTHOOK: query: SELECT + min(ctimestamp1), + max(ctimestamp1), + count(ctimestamp1), + count(*) +FROM alltypesorc_string +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc_string +#### A masked pattern was here #### +1969-12-31 23:59:44.088 1970-01-01 00:00:15.007 39 40 +PREHOOK: query: -- SUM of timestamps are not vectorized reduce-side because they produce a double instead of a long (HIVE-8211)... +EXPLAIN SELECT + sum(ctimestamp1) +FROM alltypesorc_string +PREHOOK: type: QUERY +POSTHOOK: query: -- SUM of timestamps are not vectorized reduce-side because they produce a double instead of a long (HIVE-8211)... +EXPLAIN SELECT + sum(ctimestamp1) +FROM alltypesorc_string +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc_string + Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctimestamp1 (type: timestamp) + outputColumnNames: ctimestamp1 + Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(ctimestamp1) + mode: hash + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: double) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0) + mode: mergepartial + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: double) + outputColumnNames: _col0 + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + sum(ctimestamp1) +FROM alltypesorc_string +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc_string +#### A masked pattern was here #### +POSTHOOK: query: SELECT + sum(ctimestamp1) +FROM alltypesorc_string +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc_string +#### A masked pattern was here #### +1123143.8569999998 +PREHOOK: query: EXPLAIN SELECT + avg(ctimestamp1), + variance(ctimestamp1), + var_pop(ctimestamp1), + var_samp(ctimestamp1), + std(ctimestamp1), + stddev(ctimestamp1), + stddev_pop(ctimestamp1), + stddev_samp(ctimestamp1) +FROM alltypesorc_string +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT + avg(ctimestamp1), + variance(ctimestamp1), + var_pop(ctimestamp1), + var_samp(ctimestamp1), + std(ctimestamp1), + stddev(ctimestamp1), + stddev_pop(ctimestamp1), + stddev_samp(ctimestamp1) +FROM alltypesorc_string +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: alltypesorc_string + Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctimestamp1 (type: timestamp) + outputColumnNames: ctimestamp1 + Statistics: Num rows: 40 Data size: 5694 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: avg(ctimestamp1), variance(ctimestamp1), var_pop(ctimestamp1), var_samp(ctimestamp1), std(ctimestamp1), stddev(ctimestamp1), stddev_pop(ctimestamp1), stddev_samp(ctimestamp1) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + Reduce Output Operator + sort order: + Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col7 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: avg(VALUE._col0), variance(VALUE._col1), var_pop(VALUE._col2), var_samp(VALUE._col3), std(VALUE._col4), stddev(VALUE._col5), stddev_pop(VALUE._col6), stddev_samp(VALUE._col7) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: double), _col1 (type: double), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 64 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT + avg(ctimestamp1), + variance(ctimestamp1), + var_pop(ctimestamp1), + var_samp(ctimestamp1), + std(ctimestamp1), + stddev(ctimestamp1), + stddev_pop(ctimestamp1), + stddev_samp(ctimestamp1) +FROM alltypesorc_string +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc_string +#### A masked pattern was here #### +POSTHOOK: query: SELECT + avg(ctimestamp1), + variance(ctimestamp1), + var_pop(ctimestamp1), + var_samp(ctimestamp1), + std(ctimestamp1), + stddev(ctimestamp1), + stddev_pop(ctimestamp1), + stddev_samp(ctimestamp1) +FROM alltypesorc_string +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc_string +#### A masked pattern was here #### +2.8798560435897438E13 8.970772952794212E19 8.970772952794212E19 9.206845925236166E19 9.471416447815084E9 9.471416447815084E9 9.471416447815084E9 9.595231068211002E9