diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 0eee582..33a1c8b 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2501,6 +2501,9 @@ "higher compute cost. (NDV means the number of distinct values.). It only affects the FM-Sketch \n" + "(not the HLL algorithm which is the default), where it computes the number of necessary\n" + " bitvectors to achieve the accuracy."), + HIVE_STATS_USE_UDF_ESTIMATORS("hive.stats.use.statestimators", true, + "Statestimators are able to provide more accurate column statistic infos for UDF results."), + /** * @deprecated Use MetastoreConf.STATS_NDV_TUNER */ diff --git contrib/src/test/results/clientpositive/udaf_example_group_concat.q.out contrib/src/test/results/clientpositive/udaf_example_group_concat.q.out index 15dd4c0..509b016 100644 --- contrib/src/test/results/clientpositive/udaf_example_group_concat.q.out +++ contrib/src/test/results/clientpositive/udaf_example_group_concat.q.out @@ -39,13 +39,13 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 526000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 501250 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 526000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 501250 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: array) Reduce Operator Tree: Group By Operator @@ -53,10 +53,10 @@ keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 67250 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 67250 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index cb2d0a7..2673580 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -27,6 +27,7 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.Optional; import java.util.Set; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; @@ -73,6 +74,9 @@ import org.apache.hadoop.hive.ql.plan.Statistics; import org.apache.hadoop.hive.ql.plan.Statistics.State; import org.apache.hadoop.hive.ql.stats.BasicStats.Factory; +import org.apache.hadoop.hive.ql.stats.estimator.IStatEstimator; +import org.apache.hadoop.hive.ql.stats.estimator.IStatEstimatorProvider; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; import org.apache.hadoop.hive.ql.udf.generic.NDV; @@ -81,6 +85,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantMapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantStructObjectInspector; @@ -1528,18 +1533,7 @@ return null; } } else if (end instanceof ExprNodeConstantDesc) { - - // constant projection - ExprNodeConstantDesc encd = (ExprNodeConstantDesc) end; - - colName = encd.getName(); - colType = encd.getTypeString(); - if (encd.getValue() == null) { - // null projection - numNulls = numRows; - } else { - countDistincts = 1; - } + return buildColStatForConstant(conf, numRows, (ExprNodeConstantDesc) end); } else if (end instanceof ExprNodeGenericFuncDesc) { ExprNodeGenericFuncDesc engfd = (ExprNodeGenericFuncDesc) end; colName = engfd.getName(); @@ -1560,6 +1554,27 @@ } } + if (conf.getBoolVar(ConfVars.HIVE_STATS_USE_UDF_ESTIMATORS)) { + Optional sep = engfd.getGenericUDF().adapt(IStatEstimatorProvider.class); + if (sep.isPresent()) { + Optional se = sep.get().getStatEstimator(); + if (se.isPresent()) { + List csList = new ArrayList(); + for (ExprNodeDesc child : engfd.getChildren()) { + ColStatistics cs = getColStatisticsFromExpression(conf, parentStats, child); + csList.add(cs); + } + Optional res = se.get().estimate(csList); + if (res.isPresent()) { + ColStatistics newStats = res.get(); + colType = colType.toLowerCase(); + newStats.setColumnType(colType); + newStats.setColumnName(colName); + return newStats; + } + } + } + } // fallback to default countDistincts = getNDVFor(engfd, numRows, parentStats); } else if (end instanceof ExprNodeColumnListDesc) { @@ -1590,6 +1605,43 @@ return colStats; } + private static ColStatistics buildColStatForConstant(HiveConf conf, long numRows, ExprNodeConstantDesc encd) { + + long numNulls = 0; + long countDistincts = 0; + if (encd.getValue() == null) { + // null projection + numNulls = numRows; + } else { + countDistincts = 1; + } + String colType = encd.getTypeString(); + colType = colType.toLowerCase(); + ObjectInspector oi = encd.getWritableObjectInspector(); + double avgColSize = getAvgColLenOf(conf, oi, colType); + ColStatistics colStats = new ColStatistics(encd.getName(), colType); + colStats.setAvgColLen(avgColSize); + colStats.setCountDistint(countDistincts); + colStats.setNumNulls(numNulls); + + Optional value = getLongConstValue(encd); + if (value.isPresent()) { + colStats.setRange(value.get(), value.get()); + } + return colStats; + } + + private static Optional getLongConstValue(ExprNodeConstantDesc encd) { + if (encd.getValue() != null) { + String constant = encd.getValue().toString(); + PrimitiveCategory category = GenericUDAFSum.getReturnType(encd.getTypeInfo()); + if (category == PrimitiveCategory.LONG) { + return Optional.of(Long.parseLong(constant)); + } + } + return Optional.empty(); + } + private static boolean isWideningCast(ExprNodeGenericFuncDesc engfd) { GenericUDF udf = engfd.getGenericUDF(); if (!FunctionRegistry.isOpCast(udf)) { diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/IStatEstimator.java ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/IStatEstimator.java new file mode 100644 index 0000000..a72ecd2 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/IStatEstimator.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.stats.estimator; + +import java.util.List; +import java.util.Optional; + +import org.apache.hadoop.hive.ql.plan.ColStatistics; + +/** + * Enables statistics related computation on UDFs + */ +public interface IStatEstimator { + + /** + * Computes the output statistics of the actual UDF. + * + * @param argStats the statistics for every argument of the UDF. + */ + public Optional estimate(List argStats); +} diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/IStatEstimatorProvider.java ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/IStatEstimatorProvider.java new file mode 100644 index 0000000..be9a934 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/IStatEstimatorProvider.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.stats.estimator; + +import java.util.Optional; + +/** + * Marker interface for UDFs to communicate that the usage of StatEstimators is supported by the UDF. + */ +public interface IStatEstimatorProvider { + + /** + * Returns the stat estimator for the given UDF instance. + */ + public Optional getStatEstimator(); +} diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/StatEstimators.java ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/StatEstimators.java new file mode 100644 index 0000000..05d894a --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/StatEstimators.java @@ -0,0 +1,51 @@ +package org.apache.hadoop.hive.ql.stats.estimator; + +import java.util.Optional; + +import org.apache.hadoop.hive.ql.plan.ColStatistics; + +public class StatEstimators { + + public static class WorstStatCombiner { + + private boolean inited; + private ColStatistics result; + + public void add(ColStatistics stat) { + if (!inited) { + inited = true; + result = stat.clone(); + result.setRange(null); + result.setIsEstimated(true); + return; + } else { + + if (stat.getAvgColLen() > result.getAvgColLen()) { + result.setAvgColLen(stat.getAvgColLen()); + } + if (stat.getCountDistint() > result.getCountDistint()) { + result.setCountDistint(stat.getCountDistint()); + } + if (stat.getNumNulls() > result.getNumNulls()) { + result.setNumNulls(stat.getNumNulls()); + } + if (stat.getNumTrues() > result.getNumTrues()) { + result.setNumTrues(stat.getNumTrues()); + } + if (stat.getNumFalses() > result.getNumFalses()) { + result.setNumFalses(stat.getNumFalses()); + } + if (stat.isFilteredColumn()) { + result.setFilterColumn(); + } + + } + + } + public Optional getResult() { + return Optional.of(result); + + } + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSubstr.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSubstr.java index 5b1964c..ac974af 100755 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSubstr.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSubstr.java @@ -19,12 +19,18 @@ package org.apache.hadoop.hive.ql.udf; import java.util.Arrays; +import java.util.List; +import java.util.Optional; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringSubstrColStart; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringSubstrColStartLen; +import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.apache.hadoop.hive.ql.plan.ColStatistics.Range; +import org.apache.hadoop.hive.ql.stats.estimator.IStatEstimator; +import org.apache.hadoop.hive.ql.stats.estimator.IStatEstimatorProvider; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; @@ -48,7 +54,7 @@ + " > SELECT _FUNC_('Facebook', 5, 1) FROM src LIMIT 1;\n" + " 'b'") @VectorizedExpressions({StringSubstrColStart.class, StringSubstrColStartLen.class}) -public class UDFSubstr extends UDF { +public class UDFSubstr extends UDF implements IStatEstimatorProvider { private final int[] index; private final Text r; @@ -131,4 +137,52 @@ public BytesWritable evaluate(BytesWritable bw, IntWritable pos){ return evaluate(bw, pos, maxValue); } + + @Override + public Optional getStatEstimator() { + return Optional.of(new SubStrStatEstimator()); + } + + private static class SubStrStatEstimator implements IStatEstimator { + + @Override + public Optional estimate(List csList) { + ColStatistics cs = csList.get(0).clone(); + + // this might bad in a skewed case; consider: + // 1 row with 1000 long string + // 99 rows with 0 length + // orig avg is 10 + // new avg is 5 (if substr(5)) ; but in reality it will stay ~10 + Optional start = getRangeWidth(csList.get(1).getRange()); + Range startRange = csList.get(1).getRange(); + if (startRange != null && startRange.minValue != null) { + double newAvgColLen = cs.getAvgColLen() - startRange.minValue.doubleValue(); + if (newAvgColLen > 0) { + cs.setAvgColLen(newAvgColLen); + } + + } + + if (csList.size() > 2) { + Range lengthRange = csList.get(2).getRange(); + if (lengthRange != null && lengthRange.maxValue != null) { + Double w = lengthRange.maxValue.doubleValue(); + if (cs.getAvgColLen() > w) { + cs.setAvgColLen(w); + } + } + } + + return Optional.of(cs); + } + + private Optional getRangeWidth(Range range) { + if (range.minValue != null && range.maxValue != null) { + return Optional.of(range.maxValue.doubleValue() - range.minValue.doubleValue()); + } + return Optional.empty(); + } + + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java index 6597f4b..c1bf325 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java @@ -20,6 +20,7 @@ import java.io.Closeable; import java.io.IOException; +import java.util.Optional; import org.apache.hadoop.hive.common.classification.InterfaceAudience; import org.apache.hadoop.hive.common.classification.InterfaceStability; @@ -638,4 +639,12 @@ return i + ORDINAL_SUFFIXES[i % 10]; } } + + @SuppressWarnings("unchecked") + public Optional adapt(Class clazz) { + if (clazz.isInstance(this)) { + return Optional.of((T) this); + } + return Optional.empty(); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBridge.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBridge.java index 7a644fc..377ab37 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBridge.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBridge.java @@ -21,6 +21,7 @@ import java.io.Serializable; import java.lang.reflect.Method; import java.util.ArrayList; +import java.util.Optional; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; @@ -28,6 +29,8 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.stats.estimator.IStatEstimator; +import org.apache.hadoop.hive.ql.stats.estimator.IStatEstimatorProvider; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils.ConversionHelper; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -44,7 +47,7 @@ * UDF class needs to be serialized with the plan. * */ -public class GenericUDFBridge extends GenericUDF implements Serializable { +public class GenericUDFBridge extends GenericUDF implements Serializable, IStatEstimatorProvider { private static final long serialVersionUID = 4994861742809511113L; /** @@ -97,7 +100,7 @@ this.isOperator = isOperator; this.udfClassName = udfClassName; } - + // For Java serialization only public GenericUDFBridge() { } @@ -151,7 +154,7 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { try { - udf = (UDF)getUdfClassInternal().newInstance(); + udf = getUdfClassInternal().newInstance(); } catch (Exception e) { throw new UDFArgumentException( "Unable to instantiate UDF implementation class " + udfClassName + ": " + e); @@ -249,4 +252,22 @@ public interface UdfWhitelistChecker { boolean isUdfAllowed(Class clazz); } + + @SuppressWarnings("unchecked") + @Override + public Optional adapt(Class clazz) { + if (clazz.isInstance(udf)) { + return Optional.of((T) udf); + } + return super.adapt(clazz); + } + + @Override + public Optional getStatEstimator() { + if (IStatEstimatorProvider.class.isInstance(udf)) { + IStatEstimatorProvider sep = (IStatEstimatorProvider) udf; + return sep.getStatEstimator(); + } + return Optional.empty(); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCase.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCase.java index 06e9d00..a5fec3b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCase.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCase.java @@ -18,16 +18,24 @@ package org.apache.hadoop.hive.ql.udf.generic; +import java.util.List; +import java.util.Optional; + import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.apache.hadoop.hive.ql.stats.estimator.IStatEstimator; +import org.apache.hadoop.hive.ql.stats.estimator.IStatEstimatorProvider; +import org.apache.hadoop.hive.ql.stats.estimator.StatEstimators; +import org.apache.hadoop.hive.ql.stats.estimator.StatEstimators.WorstStatCombiner; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; /** * GenericUDF Class for SQL construct "CASE a WHEN b THEN c [ELSE f] END". - * + * * NOTES: 1. a and b should be compatible, or an exception will be * thrown. 2. c and f should be compatible types, or an exception will be * thrown. @@ -49,7 +57,7 @@ + " END\n" + " FROM emp_details") -public class GenericUDFCase extends GenericUDF { +public class GenericUDFCase extends GenericUDF implements IStatEstimatorProvider { private transient ObjectInspector[] argumentOIs; private transient GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver; private transient GenericUDFUtils.ReturnObjectInspectorResolver caseOIResolver; @@ -138,4 +146,23 @@ return sb.toString(); } + @Override + public Optional getStatEstimator() { + return Optional.of(new CaseStatEstimator()); + } + + static class CaseStatEstimator implements IStatEstimator { + + @Override + public Optional estimate(List argStats) { + WorstStatCombiner combiner = new StatEstimators.WorstStatCombiner(); + for (int i = 1; i < argStats.size(); i += 2) { + combiner.add(argStats.get(i)); + } + combiner.add(argStats.get(argStats.size() - 1)); + return combiner.getResult(); + } + } + + } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java index 8ebe9e0..26b895c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java @@ -18,9 +18,17 @@ package org.apache.hadoop.hive.ql.udf.generic; +import java.util.List; +import java.util.Optional; + import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.apache.hadoop.hive.ql.stats.estimator.IStatEstimator; +import org.apache.hadoop.hive.ql.stats.estimator.IStatEstimatorProvider; +import org.apache.hadoop.hive.ql.stats.estimator.StatEstimators; +import org.apache.hadoop.hive.ql.stats.estimator.StatEstimators.WorstStatCombiner; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; /** @@ -33,7 +41,7 @@ value = "_FUNC_(a1, a2, ...) - Returns the first non-null argument", extended = "Example:\n" + " > SELECT _FUNC_(NULL, 1, NULL) FROM src LIMIT 1;\n" + " 1") -public class GenericUDFCoalesce extends GenericUDF { +public class GenericUDFCoalesce extends GenericUDF implements IStatEstimatorProvider { private transient ObjectInspector[] argumentOIs; private transient GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver; @@ -72,4 +80,20 @@ return getStandardDisplayString("COALESCE", children, ","); } + @Override + public Optional getStatEstimator() { + return Optional.of(new CoalesceStatEstimator()); + } + + static class CoalesceStatEstimator implements IStatEstimator { + + @Override + public Optional estimate(List argStats) { + WorstStatCombiner combiner = new StatEstimators.WorstStatCombiner(); + for (int i = 0; i < argStats.size(); i++) { + combiner.add(argStats.get(i)); + } + return combiner.getResult(); + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java index 23708dc..aa5faab 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java @@ -18,6 +18,9 @@ package org.apache.hadoop.hive.ql.udf.generic; +import java.util.List; +import java.util.Optional; + import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; @@ -25,6 +28,11 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressionsSupportDecimal64; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.apache.hadoop.hive.ql.stats.estimator.IStatEstimator; +import org.apache.hadoop.hive.ql.stats.estimator.IStatEstimatorProvider; +import org.apache.hadoop.hive.ql.stats.estimator.StatEstimators; +import org.apache.hadoop.hive.ql.stats.estimator.StatEstimators.WorstStatCombiner; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; @@ -115,7 +123,7 @@ IfExprTimestampScalarColumn.class, IfExprTimestampScalarScalar.class, }) @VectorizedExpressionsSupportDecimal64() -public class GenericUDFIf extends GenericUDF { +public class GenericUDFIf extends GenericUDF implements IStatEstimatorProvider { private transient ObjectInspector[] argumentOIs; private transient GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver; @@ -171,4 +179,22 @@ assert (children.length == 3); return getStandardDisplayString("if", children); } + + @Override + public Optional getStatEstimator() { + return Optional.of(new IfStatEstimator()); + } + + static class IfStatEstimator implements IStatEstimator { + + @Override + public Optional estimate(List argStats) { + WorstStatCombiner combiner = new StatEstimators.WorstStatCombiner(); + combiner.add(argStats.get(1)); + combiner.add(argStats.get(2)); + return combiner.getResult(); + } + + } + } diff --git ql/src/test/queries/clientpositive/udf_coalesce.q ql/src/test/queries/clientpositive/udf_coalesce.q index 7d87580..6c6594a 100644 --- ql/src/test/queries/clientpositive/udf_coalesce.q +++ ql/src/test/queries/clientpositive/udf_coalesce.q @@ -1,6 +1,7 @@ --! qt:dataset:src_thrift --! qt:dataset:src -set hive.fetch.task.conversion=more; +set hive.cbo.enable=false; +set hive.fetch.task.conversion=none; DESCRIBE FUNCTION coalesce; DESCRIBE FUNCTION EXTENDED coalesce; @@ -47,6 +48,10 @@ FROM src tablesample (1 rows); EXPLAIN +SELECT COALESCE(key,'x') from src limit 1; + + +EXPLAIN SELECT COALESCE(src_thrift.lint[1], 999), COALESCE(src_thrift.lintstring[0].mystring, '999'), COALESCE(src_thrift.mstringstring['key_2'], '999') diff --git ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out index 8579c83..c813456 100644 --- ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out +++ ql/src/test/results/clientpositive/cbo_rp_gby2_map_multi_distinct.q.out @@ -38,20 +38,20 @@ Select Operator expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string), value (type: string) outputColumnNames: $f0, $f1, $f2 - Statistics: Num rows: 500 Data size: 229500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 131000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT $f1), sum($f1), sum(DISTINCT $f1), count($f2) keys: $f0 (type: string), $f1 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 250 Data size: 100000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 50750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 100000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 50750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: double), _col5 (type: bigint) Reduce Operator Tree: Group By Operator @@ -59,14 +59,14 @@ keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: $f0, $f1, $f2, $f3, $f4 - Statistics: Num rows: 250 Data size: 54000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 29250 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: $f0 (type: string), UDFToInteger($f1) (type: int), concat($f0, $f2) (type: string), UDFToInteger($f3) (type: int), UDFToInteger($f4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 250 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -75,7 +75,7 @@ Select Operator expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) outputColumnNames: key, c1, c2, c3, c4 - Statistics: Num rows: 250 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') minReductionHashAggr: 0.99 @@ -204,20 +204,20 @@ Select Operator expressions: substr(key, 1, 1) (type: string), substr(value, 5) (type: string), value (type: string) outputColumnNames: $f0, $f1, $f2 - Statistics: Num rows: 500 Data size: 229500 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 131000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT $f0), sum($f1), sum(DISTINCT $f1), count($f2) keys: $f0 (type: string), $f1 (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 250 Data size: 100000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 50750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 100000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 50750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: double), _col5 (type: bigint) Reduce Operator Tree: Group By Operator @@ -225,14 +225,14 @@ keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: $f0, $f1, $f2, $f3, $f4 - Statistics: Num rows: 250 Data size: 54000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 29250 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: $f0 (type: string), UDFToInteger($f1) (type: int), concat($f0, $f2) (type: string), UDFToInteger($f3) (type: int), UDFToInteger($f4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 250 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -241,7 +241,7 @@ Select Operator expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) outputColumnNames: key, c1, c2, c3, c4 - Statistics: Num rows: 250 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') minReductionHashAggr: 0.99 diff --git ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out index 9ae6330..c1e3013 100644 --- ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out +++ ql/src/test/results/clientpositive/cbo_rp_groupby3_noskew_multi_distinct.q.out @@ -55,12 +55,12 @@ Select Operator expressions: substr(value, 5) (type: string), UDFToDouble(substr(value, 5)) (type: double), (UDFToDouble(substr(value, 5)) * UDFToDouble(substr(value, 5))) (type: double) outputColumnNames: $f0, $f00, $f2 - Statistics: Num rows: 500 Data size: 100000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: $f0 (type: string) null sort order: z sort order: + - Statistics: Num rows: 500 Data size: 100000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: $f2 (type: double), $f00 (type: double) Execution mode: vectorized Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/constprog_when_case.q.out ql/src/test/results/clientpositive/constprog_when_case.q.out index f75fa1c..8d3dd98 100644 --- ql/src/test/results/clientpositive/constprog_when_case.q.out +++ ql/src/test/results/clientpositive/constprog_when_case.q.out @@ -52,10 +52,10 @@ Select Operator expressions: if((bool0 is true or (null and bool0 is not true and bool0 is not false)), key0, if((((not bool0) is true and bool0 is not true) or (null and bool0 is not true and bool0 is not false)), key1, key2)) (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/count_dist_rewrite.q.out ql/src/test/results/clientpositive/count_dist_rewrite.q.out index f8dbec1..ee1cd28 100644 --- ql/src/test/results/clientpositive/count_dist_rewrite.q.out +++ ql/src/test/results/clientpositive/count_dist_rewrite.q.out @@ -762,13 +762,13 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6, _col7 - Statistics: Num rows: 250 Data size: 146000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 121500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 146000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 121500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double), _col2 (type: bigint), _col4 (type: string), _col5 (type: string), _col6 (type: double), _col7 (type: double) Reduce Operator Tree: Group By Operator @@ -776,7 +776,7 @@ keys: KEY._col0 (type: string) mode: partial2 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 250 Data size: 146000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 121500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1), count(_col2), count(_col0), max(_col3), min(_col4), sum(_col5), sum(_col6) mode: partial2 diff --git ql/src/test/results/clientpositive/groupby11.q.out ql/src/test/results/clientpositive/groupby11.q.out index a784779..4b62238 100644 --- ql/src/test/results/clientpositive/groupby11.q.out +++ ql/src/test/results/clientpositive/groupby11.q.out @@ -234,7 +234,7 @@ keys: KEY._col0 (type: string) mode: partial1 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 100000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -251,7 +251,7 @@ null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 100000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 51000 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: bigint) Execution mode: vectorized Reduce Operator Tree: @@ -260,14 +260,14 @@ keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 307 Data size: 61400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 307 Data size: 31314 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 307 Data size: 58944 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 307 Data size: 28858 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 307 Data size: 58944 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 307 Data size: 28858 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -276,7 +276,7 @@ Select Operator expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) outputColumnNames: key, val1, val2 - Statistics: Num rows: 307 Data size: 85653 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 307 Data size: 55567 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -305,7 +305,7 @@ null sort order: z sort order: + Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 307 Data size: 85653 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 307 Data size: 55567 Basic stats: COMPLETE Column stats: COMPLETE value expressions: key (type: string), val1 (type: int), val2 (type: int) Execution mode: vectorized Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/groupby2_map.q.out ql/src/test/results/clientpositive/groupby2_map.q.out index 1569f04..85bebf0 100644 --- ql/src/test/results/clientpositive/groupby2_map.q.out +++ ql/src/test/results/clientpositive/groupby2_map.q.out @@ -41,13 +41,13 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 96000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 46750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 96000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 46750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: double) Reduce Operator Tree: Group By Operator @@ -55,14 +55,14 @@ keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 50000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 25250 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 68250 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 68250 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -71,7 +71,7 @@ Select Operator expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) outputColumnNames: key, c1, c2 - Statistics: Num rows: 250 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 68250 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll') minReductionHashAggr: 0.99 diff --git ql/src/test/results/clientpositive/groupby2_map_multi_distinct.q.out ql/src/test/results/clientpositive/groupby2_map_multi_distinct.q.out index 8d1b345..d49e7de 100644 --- ql/src/test/results/clientpositive/groupby2_map_multi_distinct.q.out +++ ql/src/test/results/clientpositive/groupby2_map_multi_distinct.q.out @@ -41,13 +41,13 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 250 Data size: 100000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 50750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 100000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 50750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: double), _col5 (type: bigint) Reduce Operator Tree: Group By Operator @@ -55,14 +55,14 @@ keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 250 Data size: 54000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 29250 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 250 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -71,7 +71,7 @@ Select Operator expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) outputColumnNames: key, c1, c2, c3, c4 - Statistics: Num rows: 250 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') minReductionHashAggr: 0.99 @@ -195,13 +195,13 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 - Statistics: Num rows: 250 Data size: 100000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 50750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 100000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 50750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: double), _col5 (type: bigint) Reduce Operator Tree: Group By Operator @@ -209,14 +209,14 @@ keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 250 Data size: 54000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 29250 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 250 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -225,7 +225,7 @@ Select Operator expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) outputColumnNames: key, c1, c2, c3, c4 - Statistics: Num rows: 250 Data size: 95000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 70250 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') minReductionHashAggr: 0.99 diff --git ql/src/test/results/clientpositive/groupby2_map_skew.q.out ql/src/test/results/clientpositive/groupby2_map_skew.q.out index 762e286..9783f9e 100644 --- ql/src/test/results/clientpositive/groupby2_map_skew.q.out +++ ql/src/test/results/clientpositive/groupby2_map_skew.q.out @@ -42,13 +42,13 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 250 Data size: 96000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 46750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 96000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 46750 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: double) Reduce Operator Tree: Group By Operator @@ -56,7 +56,7 @@ keys: KEY._col0 (type: string) mode: partials outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 50000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 25250 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -73,7 +73,7 @@ null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 50000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 25250 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: double) Execution mode: vectorized Reduce Operator Tree: @@ -82,14 +82,14 @@ keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 50000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 25250 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 68250 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 68250 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -98,7 +98,7 @@ Select Operator expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) outputColumnNames: key, c1, c2 - Statistics: Num rows: 250 Data size: 93000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 68250 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll') minReductionHashAggr: 0.99 diff --git ql/src/test/results/clientpositive/groupby2_noskew.q.out ql/src/test/results/clientpositive/groupby2_noskew.q.out index 6edcdc6..5dd501f 100644 --- ql/src/test/results/clientpositive/groupby2_noskew.q.out +++ ql/src/test/results/clientpositive/groupby2_noskew.q.out @@ -48,14 +48,14 @@ keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 316 Data size: 63200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 31916 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 316 Data size: 117552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 316 Data size: 117552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -64,7 +64,7 @@ Select Operator expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) outputColumnNames: key, c1, c2 - Statistics: Num rows: 316 Data size: 117552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -97,7 +97,7 @@ Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 316 Data size: 117552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE value expressions: key (type: string), c1 (type: int), c2 (type: string) Execution mode: vectorized Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out index ded98a4..4d8a5aa 100644 --- ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out +++ ql/src/test/results/clientpositive/groupby2_noskew_multi_distinct.q.out @@ -49,14 +49,14 @@ keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 316 Data size: 68256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 36972 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 316 Data size: 120080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 316 Data size: 120080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -65,7 +65,7 @@ Select Operator expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) outputColumnNames: key, c1, c2, c3, c4 - Statistics: Num rows: 316 Data size: 120080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -98,7 +98,7 @@ Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 316 Data size: 120080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE value expressions: key (type: string), c1 (type: int), c2 (type: string), c3 (type: int), c4 (type: int) Execution mode: vectorized Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/groupby3_map.q.out ql/src/test/results/clientpositive/groupby3_map.q.out index af02802..22573cb 100644 --- ql/src/test/results/clientpositive/groupby3_map.q.out +++ ql/src/test/results/clientpositive/groupby3_map.q.out @@ -58,12 +58,12 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 250 Data size: 150000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 125500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + - Statistics: Num rows: 250 Data size: 150000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 125500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double), _col2 (type: bigint), _col5 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: double) Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out index 1f56188..fbd6e4e 100644 --- ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out +++ ql/src/test/results/clientpositive/groupby3_map_multi_distinct.q.out @@ -62,12 +62,12 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 - Statistics: Num rows: 250 Data size: 150000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 125500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + - Statistics: Num rows: 250 Data size: 150000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 125500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double), _col2 (type: bigint), _col5 (type: string), _col6 (type: string), _col7 (type: double), _col8 (type: double) Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/groupby3_map_skew.q.out ql/src/test/results/clientpositive/groupby3_map_skew.q.out index f315663..c6cccd1 100644 --- ql/src/test/results/clientpositive/groupby3_map_skew.q.out +++ ql/src/test/results/clientpositive/groupby3_map_skew.q.out @@ -59,13 +59,13 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 250 Data size: 348000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 323500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 348000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 323500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double), _col2 (type: struct), _col4 (type: string), _col5 (type: string), _col6 (type: struct), _col7 (type: struct), _col8 (type: struct), _col9 (type: struct) Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/groupby4.q.out ql/src/test/results/clientpositive/groupby4.q.out index 7528034..d861e47 100644 --- ql/src/test/results/clientpositive/groupby4.q.out +++ ql/src/test/results/clientpositive/groupby4.q.out @@ -49,7 +49,7 @@ keys: KEY._col0 (type: string) mode: partial1 outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -66,17 +66,17 @@ null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0 - Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -85,7 +85,7 @@ Select Operator expressions: _col0 (type: string) outputColumnNames: c1 - Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -119,7 +119,7 @@ null sort order: sort order: Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE value expressions: c1 (type: string) Execution mode: vectorized Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/groupby4_noskew.q.out ql/src/test/results/clientpositive/groupby4_noskew.q.out index 6550017..842bda3 100644 --- ql/src/test/results/clientpositive/groupby4_noskew.q.out +++ ql/src/test/results/clientpositive/groupby4_noskew.q.out @@ -47,10 +47,10 @@ keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0 - Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -59,7 +59,7 @@ Select Operator expressions: _col0 (type: string) outputColumnNames: c1 - Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -92,7 +92,7 @@ Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE value expressions: c1 (type: string) Execution mode: vectorized Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/groupby6.q.out ql/src/test/results/clientpositive/groupby6.q.out index be673c5..a72afb2 100644 --- ql/src/test/results/clientpositive/groupby6.q.out +++ ql/src/test/results/clientpositive/groupby6.q.out @@ -49,7 +49,7 @@ keys: KEY._col0 (type: string) mode: partial1 outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -66,17 +66,17 @@ null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0 - Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -85,7 +85,7 @@ Select Operator expressions: _col0 (type: string) outputColumnNames: c1 - Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -119,7 +119,7 @@ null sort order: sort order: Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE value expressions: c1 (type: string) Execution mode: vectorized Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/groupby6_map.q.out ql/src/test/results/clientpositive/groupby6_map.q.out index ed812ce..9f8c791 100644 --- ql/src/test/results/clientpositive/groupby6_map.q.out +++ ql/src/test/results/clientpositive/groupby6_map.q.out @@ -40,23 +40,23 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 46000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 46000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 46000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 46000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -65,7 +65,7 @@ Select Operator expressions: _col0 (type: string) outputColumnNames: c1 - Statistics: Num rows: 250 Data size: 46000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(c1, 'hll') minReductionHashAggr: 0.99 diff --git ql/src/test/results/clientpositive/groupby6_map_skew.q.out ql/src/test/results/clientpositive/groupby6_map_skew.q.out index 57f8c3f..43fbc84 100644 --- ql/src/test/results/clientpositive/groupby6_map_skew.q.out +++ ql/src/test/results/clientpositive/groupby6_map_skew.q.out @@ -41,20 +41,20 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 46000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 250 Data size: 46000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: partials outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 46000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -71,17 +71,17 @@ null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 46000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 46000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 46000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -90,7 +90,7 @@ Select Operator expressions: _col0 (type: string) outputColumnNames: c1 - Statistics: Num rows: 250 Data size: 46000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(c1, 'hll') minReductionHashAggr: 0.99 diff --git ql/src/test/results/clientpositive/groupby6_noskew.q.out ql/src/test/results/clientpositive/groupby6_noskew.q.out index 689775e..9545ca3 100644 --- ql/src/test/results/clientpositive/groupby6_noskew.q.out +++ ql/src/test/results/clientpositive/groupby6_noskew.q.out @@ -47,10 +47,10 @@ keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0 - Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -59,7 +59,7 @@ Select Operator expressions: _col0 (type: string) outputColumnNames: c1 - Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -92,7 +92,7 @@ Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE value expressions: c1 (type: string) Execution mode: vectorized Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/groupby8_map_skew.q.out ql/src/test/results/clientpositive/groupby8_map_skew.q.out index 8191963..66719b3 100644 --- ql/src/test/results/clientpositive/groupby8_map_skew.q.out +++ ql/src/test/results/clientpositive/groupby8_map_skew.q.out @@ -59,13 +59,13 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 45250 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 45250 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value @@ -76,7 +76,7 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 45250 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -204,7 +204,7 @@ null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 45250 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/groupby9.q.out ql/src/test/results/clientpositive/groupby9.q.out index d46ca78..0170f97 100644 --- ql/src/test/results/clientpositive/groupby9.q.out +++ ql/src/test/results/clientpositive/groupby9.q.out @@ -57,13 +57,13 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 45250 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 45250 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value @@ -74,7 +74,7 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 500 Data size: 185000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 136000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -176,7 +176,7 @@ null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 185000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 136000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -944,13 +944,13 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 45250 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 45250 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value @@ -961,7 +961,7 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 500 Data size: 185000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 136000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -1063,7 +1063,7 @@ null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 185000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 136000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -1831,13 +1831,13 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 45250 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 45250 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value @@ -1848,7 +1848,7 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 500 Data size: 185000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 136000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -1950,7 +1950,7 @@ null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 185000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 136000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -3608,13 +3608,13 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 45250 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 69750 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 45250 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: key (type: string), value (type: string) outputColumnNames: key, value @@ -3625,7 +3625,7 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 500 Data size: 185000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 136000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -3727,7 +3727,7 @@ null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 185000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 136000 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/groupby_map_ppr.q.out ql/src/test/results/clientpositive/groupby_map_ppr.q.out index afdb705..952f310 100644 --- ql/src/test/results/clientpositive/groupby_map_ppr.q.out +++ ql/src/test/results/clientpositive/groupby_map_ppr.q.out @@ -57,13 +57,13 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 93500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 192000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 93500 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col3 (type: double) auto parallelism: false @@ -178,17 +178,17 @@ keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 316 Data size: 63200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 31916 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 316 Data size: 117552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 316 Data size: 117552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -219,7 +219,7 @@ Select Operator expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) outputColumnNames: key, c1, c2 - Statistics: Num rows: 316 Data size: 117552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll') minReductionHashAggr: 0.99 diff --git ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out index a0a0ea4..bd43f54 100644 --- ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out +++ ql/src/test/results/clientpositive/groupby_map_ppr_multi_distinct.q.out @@ -57,13 +57,13 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1000 Data size: 491000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 294000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1000 Data size: 491000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 294000 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col4 (type: double) auto parallelism: false @@ -178,17 +178,17 @@ keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 316 Data size: 68256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 36972 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 316 Data size: 120080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 316 Data size: 120080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -219,7 +219,7 @@ Select Operator expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) outputColumnNames: key, c1, c2, c3, c4 - Statistics: Num rows: 316 Data size: 120080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll'), compute_stats(c3, 'hll'), compute_stats(c4, 'hll') minReductionHashAggr: 0.99 diff --git ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out index 8fa21f2..756c179 100644 --- ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out +++ ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out @@ -94,14 +94,14 @@ keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 316 Data size: 68256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 36972 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 316 Data size: 120080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 316 Data size: 120080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -109,20 +109,20 @@ name: default.dest_g4 Filter Operator predicate: (KEY._col0 >= 5) (type: boolean) - Statistics: Num rows: 166 Data size: 45650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 29216 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 105 Data size: 22680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 105 Data size: 12285 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 105 Data size: 39900 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 105 Data size: 29505 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 105 Data size: 39900 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 105 Data size: 29505 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -130,20 +130,20 @@ name: default.dest_g2 Filter Operator predicate: (KEY._col0 < 5) (type: boolean) - Statistics: Num rows: 166 Data size: 45650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 29216 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 105 Data size: 22680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 105 Data size: 12285 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 105 Data size: 39900 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 105 Data size: 29505 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 105 Data size: 39900 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 105 Data size: 29505 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -349,14 +349,14 @@ keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 316 Data size: 68256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 36972 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 316 Data size: 120080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 316 Data size: 120080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -364,20 +364,20 @@ name: default.dest_g4 Filter Operator predicate: (KEY._col0 >= 5) (type: boolean) - Statistics: Num rows: 166 Data size: 45650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 29216 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 105 Data size: 22680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 105 Data size: 12285 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 105 Data size: 39900 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 105 Data size: 29505 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 105 Data size: 39900 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 105 Data size: 29505 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -385,20 +385,20 @@ name: default.dest_g2 Filter Operator predicate: (KEY._col0 < 5) (type: boolean) - Statistics: Num rows: 166 Data size: 45650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 29216 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 105 Data size: 22680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 105 Data size: 12285 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 105 Data size: 39900 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 105 Data size: 29505 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 105 Data size: 39900 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 105 Data size: 29505 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -467,11 +467,11 @@ keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 500 Data size: 196000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col2 (type: bigint), concat(_col0, _col3) (type: string), _col3 (type: double), _col4 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 500 Data size: 196000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 146500 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -480,20 +480,20 @@ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 >= 5) (type: boolean) - Statistics: Num rows: 166 Data size: 76194 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 43326 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0), sum(KEY._col2:0._col0), count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 166 Data size: 65072 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 32204 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), UDFToInteger(_col2) (type: int), concat(_col0, _col3) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 166 Data size: 63080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 46646 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 166 Data size: 63080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 46646 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -508,7 +508,7 @@ key expressions: _col0 (type: string), _col1 (type: bigint) null sort order: zz sort order: ++ - Statistics: Num rows: 500 Data size: 196000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 146500 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: string), _col3 (type: double), _col4 (type: bigint) Execution mode: vectorized @@ -516,17 +516,17 @@ Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: bigint), VALUE._col0 (type: string), VALUE._col1 (type: double), VALUE._col2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 500 Data size: 196000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 146500 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 3920 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 2930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 10 Data size: 3800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 2810 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 3800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 2810 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out index e4bc26e..d151470 100644 --- ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out +++ ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out @@ -63,20 +63,20 @@ Statistics: Num rows: 332 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (KEY._col0 >= 5) (type: boolean) - Statistics: Num rows: 110 Data size: 30250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 110 Data size: 19360 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 105 Data size: 20160 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 105 Data size: 9765 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 105 Data size: 19740 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 105 Data size: 9345 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 105 Data size: 19740 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 105 Data size: 9345 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -85,7 +85,7 @@ Select Operator expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: key, c1 - Statistics: Num rows: 105 Data size: 19740 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 105 Data size: 9345 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll') minReductionHashAggr: 0.99 @@ -100,20 +100,20 @@ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 < 5) (type: boolean) - Statistics: Num rows: 110 Data size: 30250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 110 Data size: 19360 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), count(VALUE._col0) keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 105 Data size: 21000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 105 Data size: 10605 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 105 Data size: 20160 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 105 Data size: 9765 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 105 Data size: 20160 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 105 Data size: 9765 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -122,7 +122,7 @@ Select Operator expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) outputColumnNames: key, c1, c2 - Statistics: Num rows: 105 Data size: 20160 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 105 Data size: 9765 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll') minReductionHashAggr: 0.99 diff --git ql/src/test/results/clientpositive/groupby_nocolumnalign.q.out ql/src/test/results/clientpositive/groupby_nocolumnalign.q.out index 19ae138..46d34fb 100644 --- ql/src/test/results/clientpositive/groupby_nocolumnalign.q.out +++ ql/src/test/results/clientpositive/groupby_nocolumnalign.q.out @@ -41,13 +41,13 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 500 Data size: 185000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 136000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 500 Data size: 185000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 136000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Operator Tree: Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0) diff --git ql/src/test/results/clientpositive/groupby_position.q.out ql/src/test/results/clientpositive/groupby_position.q.out index f52623a..af0c243 100644 --- ql/src/test/results/clientpositive/groupby_position.q.out +++ ql/src/test/results/clientpositive/groupby_position.q.out @@ -56,13 +56,13 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 83 Data size: 23157 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 15023 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 83 Data size: 23157 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 15023 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 20) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE @@ -72,7 +72,7 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 166 Data size: 61420 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 22576 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -174,7 +174,7 @@ null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 61420 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 22576 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -182,14 +182,14 @@ keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 166 Data size: 30876 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToInteger(_col0) (type: int), _col1 (type: string), CAST( _col2 AS STRING) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 166 Data size: 46314 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 23157 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 166 Data size: 46314 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 23157 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -198,7 +198,7 @@ Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) outputColumnNames: key, val1, val2 - Statistics: Num rows: 166 Data size: 46314 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 23157 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') minReductionHashAggr: 0.99 @@ -349,13 +349,13 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 83 Data size: 23157 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 15023 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 83 Data size: 23157 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 15023 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (key < 20) (type: boolean) Statistics: Num rows: 166 Data size: 29548 Basic stats: COMPLETE Column stats: COMPLETE @@ -365,7 +365,7 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 166 Data size: 61420 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 22576 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -467,7 +467,7 @@ null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 166 Data size: 61420 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 22576 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reduce Operator Tree: Group By Operator @@ -475,14 +475,14 @@ keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 166 Data size: 30876 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 15438 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToInteger(_col1) (type: int), _col0 (type: string), CAST( _col2 AS STRING) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 166 Data size: 46314 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 23157 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 166 Data size: 46314 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 23157 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -491,7 +491,7 @@ Select Operator expressions: _col0 (type: int), _col1 (type: string), _col2 (type: string) outputColumnNames: key, val1, val2 - Statistics: Num rows: 166 Data size: 46314 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 83 Data size: 23157 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(val1, 'hll'), compute_stats(val2, 'hll') minReductionHashAggr: 0.99 diff --git ql/src/test/results/clientpositive/groupby_ppr.q.out ql/src/test/results/clientpositive/groupby_ppr.q.out index 085ac2c..d7549d9 100644 --- ql/src/test/results/clientpositive/groupby_ppr.q.out +++ ql/src/test/results/clientpositive/groupby_ppr.q.out @@ -171,17 +171,17 @@ keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 316 Data size: 63200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 31916 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 316 Data size: 117552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 316 Data size: 117552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -212,7 +212,7 @@ Select Operator expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) outputColumnNames: key, c1, c2 - Statistics: Num rows: 316 Data size: 117552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 @@ -279,7 +279,7 @@ Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 316 Data size: 117552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: key (type: string), c1 (type: int), c2 (type: string) auto parallelism: false diff --git ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out index 0a7275f..95f95b0 100644 --- ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out +++ ql/src/test/results/clientpositive/groupby_ppr_multi_distinct.q.out @@ -171,17 +171,17 @@ keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 316 Data size: 68256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 36972 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 316 Data size: 120080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 316 Data size: 120080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.TextInputFormat @@ -212,7 +212,7 @@ Select Operator expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string), _col3 (type: int), _col4 (type: int) outputColumnNames: key, c1, c2, c3, c4 - Statistics: Num rows: 316 Data size: 120080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 @@ -279,7 +279,7 @@ Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 316 Data size: 120080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: key (type: string), c1 (type: int), c2 (type: string), c3 (type: int), c4 (type: int) auto parallelism: false diff --git ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out index ed969fa..9b869a1 100644 --- ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out +++ ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out @@ -419,15 +419,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-4 depends on stages: Stage-0, Stage-2 - Stage-3 depends on stages: Stage-1 - Stage-9 depends on stages: Stage-3 , consists of Stage-6, Stage-5, Stage-7 - Stage-6 - Stage-0 depends on stages: Stage-6, Stage-5, Stage-8 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0, Stage-8 + Stage-3 Stage-5 - Stage-7 - Stage-8 depends on stages: Stage-7 + Stage-6 depends on stages: Stage-5 + Stage-8 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -466,100 +465,37 @@ Select Operator expressions: _col0 (type: string), CAST( _col1 AS STRING) (type: string), if(((UDFToDouble(_col0) % 100.0D) = 0.0D), '11', '12') (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 316 Data size: 143780 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 112812 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 316 Data size: 112812 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.test_table_n8 Select Operator expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) outputColumnNames: key, value, ds, hr - Statistics: Num rows: 316 Data size: 173484 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 142516 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 158 Data size: 182964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 158 Data size: 182964 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 158 Data size: 182964 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 158 Data size: 182964 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 158 Data size: 182964 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: string, string - Table: default.test_table_n8 - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col2 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 316 Data size: 143780 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string) - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 316 Data size: 143780 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.test_table_n8 - - Stage: Stage-9 + Stage: Stage-7 Conditional Operator - Stage: Stage-6 + Stage: Stage-4 Move Operator files: hdfs directory: true @@ -578,6 +514,21 @@ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.test_table_n8 + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_table_n8 + + Stage: Stage-3 + Merge File Operator + Map Operator Tree: + RCFile Merge Operator + merge level: block + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + Stage: Stage-5 Merge File Operator Map Operator Tree: @@ -585,19 +536,43 @@ merge level: block input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - Stage: Stage-7 - Merge File Operator - Map Operator Tree: - RCFile Merge Operator - merge level: block - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - - Stage: Stage-8 + Stage: Stage-6 Move Operator files: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE test_table_n8 PARTITION (ds = '2008-04-08', hr) SELECT key, value, IF (key % 100 == 0, '11', '12') FROM (SELECT key, COUNT(*) AS value FROM srcpart diff --git ql/src/test/results/clientpositive/list_bucket_dml_6.q.out ql/src/test/results/clientpositive/list_bucket_dml_6.q.out index f88f5bf..3ec1d32 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_6.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_6.q.out @@ -49,14 +49,14 @@ Select Operator expressions: key (type: string), value (type: string), if(((UDFToDouble(key) % 100.0D) = 0.0D), 'a1', 'b1') (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 264000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Static Partition Specification: ds=2008-04-08/ - Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 264000 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -84,20 +84,20 @@ Select Operator expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) outputColumnNames: key, value, ds, hr - Statistics: Num rows: 1000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col2 (type: struct), _col3 (type: struct) auto parallelism: false @@ -212,17 +212,17 @@ keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -430,14 +430,14 @@ Select Operator expressions: key (type: string), value (type: string), if(((UDFToDouble(key) % 100.0D) = 0.0D), 'a1', 'b1') (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 264000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Static Partition Specification: ds=2008-04-08/ - Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 264000 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -465,20 +465,20 @@ Select Operator expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) outputColumnNames: key, value, ds, hr - Statistics: Num rows: 1000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col2 (type: struct), _col3 (type: struct) auto parallelism: false @@ -593,17 +593,17 @@ keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/list_bucket_dml_7.q.out ql/src/test/results/clientpositive/list_bucket_dml_7.q.out index 43804d6..c41ae71 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_7.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_7.q.out @@ -49,14 +49,14 @@ Select Operator expressions: key (type: string), value (type: string), if(((UDFToDouble(key) % 100.0D) = 0.0D), 'a1', 'b1') (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 264000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Static Partition Specification: ds=2008-04-08/ - Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 264000 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -84,20 +84,20 @@ Select Operator expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) outputColumnNames: key, value, ds, hr - Statistics: Num rows: 1000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col2 (type: struct), _col3 (type: struct) auto parallelism: false @@ -212,17 +212,17 @@ keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -430,14 +430,14 @@ Select Operator expressions: key (type: string), value (type: string), if(((UDFToDouble(key) % 100.0D) = 0.0D), 'a1', 'b1') (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 264000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Static Partition Specification: ds=2008-04-08/ - Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 264000 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -465,20 +465,20 @@ Select Operator expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) outputColumnNames: key, value, ds, hr - Statistics: Num rows: 1000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col2 (type: struct), _col3 (type: struct) auto parallelism: false @@ -593,17 +593,17 @@ keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/list_bucket_dml_8.q.out ql/src/test/results/clientpositive/list_bucket_dml_8.q.out index 8551f57..2b82c86 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_8.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_8.q.out @@ -49,14 +49,14 @@ Select Operator expressions: key (type: string), value (type: string), if(((UDFToDouble(key) % 100.0D) = 0.0D), 'a1', 'b1') (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 264000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Static Partition Specification: ds=2008-04-08/ - Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 264000 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -84,20 +84,20 @@ Select Operator expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) outputColumnNames: key, value, ds, hr - Statistics: Num rows: 1000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col2 (type: struct), _col3 (type: struct) auto parallelism: false @@ -212,17 +212,17 @@ keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/count_dist_rewrite.q.out ql/src/test/results/clientpositive/llap/count_dist_rewrite.q.out index 2276b99..81069fd 100644 --- ql/src/test/results/clientpositive/llap/count_dist_rewrite.q.out +++ ql/src/test/results/clientpositive/llap/count_dist_rewrite.q.out @@ -763,13 +763,13 @@ minReductionHashAggr: 0.5 mode: hash outputColumnNames: _col0, _col1, _col2, _col4, _col5, _col6, _col7 - Statistics: Num rows: 250 Data size: 146000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 121500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 146000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 121500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: double), _col2 (type: bigint), _col4 (type: string), _col5 (type: string), _col6 (type: double), _col7 (type: double) Execution mode: llap LLAP IO: no inputs @@ -781,7 +781,7 @@ keys: KEY._col0 (type: string) mode: partial2 outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 250 Data size: 146000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 121500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col1), count(_col2), count(_col0), max(_col3), min(_col4), sum(_col5), sum(_col6) mode: partial2 diff --git ql/src/test/results/clientpositive/llap/explainuser_2.q.out ql/src/test/results/clientpositive/llap/explainuser_2.q.out index 738bf44..3613e12 100644 --- ql/src/test/results/clientpositive/llap/explainuser_2.q.out +++ ql/src/test/results/clientpositive/llap/explainuser_2.q.out @@ -3564,7 +3564,7 @@ <-Reducer 4 [SIMPLE_EDGE] llap PARTITION_ONLY_SHUFFLE [RS_17] PartitionCols:_col0 - Group By Operator [GBY_16] (rows=1 width=280) + Group By Operator [GBY_16] (rows=1 width=275) Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) Group By Operator [GBY_13] (rows=1 width=272) Output:["_col0","_col1"],keys:KEY._col0, KEY._col1 @@ -3612,7 +3612,7 @@ Output:["key","val1","val2"] Select Operator [SEL_33] (rows=1 width=456) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_32] (rows=1 width=464) + Group By Operator [GBY_32] (rows=1 width=459) Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1 Please refer to the previous Group By Operator [GBY_13] Stage-5 @@ -3787,7 +3787,7 @@ <-Map 8 [CONTAINS] llap Reduce Output Operator [RS_52] PartitionCols:_col0 - Group By Operator [GBY_50] (rows=1 width=280) + Group By Operator [GBY_50] (rows=1 width=275) Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) Select Operator [SEL_48] (rows=500 width=10) Output:["_col0","_col1"] @@ -3795,13 +3795,13 @@ Output:["key","value"] Reduce Output Operator [RS_53] PartitionCols:_col0, _col1 - Group By Operator [GBY_51] (rows=1 width=464) + Group By Operator [GBY_51] (rows=1 width=459) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) Please refer to the previous Select Operator [SEL_48] <-Map 9 [CONTAINS] llap Reduce Output Operator [RS_59] PartitionCols:_col0 - Group By Operator [GBY_57] (rows=1 width=280) + Group By Operator [GBY_57] (rows=1 width=275) Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) Select Operator [SEL_55] (rows=500 width=10) Output:["_col0","_col1"] @@ -3809,13 +3809,13 @@ Output:["key","value"] Reduce Output Operator [RS_60] PartitionCols:_col0, _col1 - Group By Operator [GBY_58] (rows=1 width=464) + Group By Operator [GBY_58] (rows=1 width=459) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) Please refer to the previous Select Operator [SEL_55] <-Reducer 2 [CONTAINS] llap Reduce Output Operator [RS_45] PartitionCols:_col0 - Group By Operator [GBY_43] (rows=1 width=280) + Group By Operator [GBY_43] (rows=1 width=275) Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) Select Operator [SEL_41] (rows=1 width=272) Output:["_col0","_col1"] @@ -3830,7 +3830,7 @@ default@src,s1,Tbl:COMPLETE,Col:COMPLETE Reduce Output Operator [RS_46] PartitionCols:_col0, _col1 - Group By Operator [GBY_44] (rows=1 width=464) + Group By Operator [GBY_44] (rows=1 width=459) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) Please refer to the previous Select Operator [SEL_41] PARTITION_ONLY_SHUFFLE [RS_22] @@ -3923,7 +3923,7 @@ <-Map 8 [CONTAINS] llap Reduce Output Operator [RS_50] PartitionCols:_col0 - Group By Operator [GBY_48] (rows=1 width=280) + Group By Operator [GBY_48] (rows=1 width=275) Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) Select Operator [SEL_46] (rows=500 width=10) Output:["_col0","_col1"] @@ -3931,13 +3931,13 @@ Output:["key","value"] Reduce Output Operator [RS_51] PartitionCols:_col0, _col1 - Group By Operator [GBY_49] (rows=1 width=464) + Group By Operator [GBY_49] (rows=1 width=459) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) Please refer to the previous Select Operator [SEL_46] <-Reducer 2 [CONTAINS] llap Reduce Output Operator [RS_43] PartitionCols:_col0 - Group By Operator [GBY_41] (rows=1 width=280) + Group By Operator [GBY_41] (rows=1 width=275) Output:["_col0","_col1","_col2"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, substr(_col1, 5) Select Operator [SEL_39] (rows=1 width=272) Output:["_col0","_col1"] @@ -3952,7 +3952,7 @@ default@src,s1,Tbl:COMPLETE,Col:COMPLETE Reduce Output Operator [RS_44] PartitionCols:_col0, _col1 - Group By Operator [GBY_42] (rows=1 width=464) + Group By Operator [GBY_42] (rows=1 width=459) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(DISTINCT substr(_col1, 5))"],keys:_col0, _col1, substr(_col1, 5) Please refer to the previous Select Operator [SEL_39] PARTITION_ONLY_SHUFFLE [RS_20] diff --git ql/src/test/results/clientpositive/llap/groupby2.q.out ql/src/test/results/clientpositive/llap/groupby2.q.out index 848b503..bdad787 100644 --- ql/src/test/results/clientpositive/llap/groupby2.q.out +++ ql/src/test/results/clientpositive/llap/groupby2.q.out @@ -60,13 +60,13 @@ keys: KEY._col0 (type: string) mode: partial1 outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 100000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 50500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 100000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 50500 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: bigint), _col2 (type: double) Reducer 3 Execution mode: llap @@ -76,14 +76,14 @@ keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 316 Data size: 63200 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 31916 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 316 Data size: 117552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 316 Data size: 117552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -92,12 +92,12 @@ Select Operator expressions: _col0 (type: string), _col1 (type: int), _col2 (type: string) outputColumnNames: key, c1, c2 - Statistics: Num rows: 316 Data size: 117552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 316 Data size: 117552 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 86268 Basic stats: COMPLETE Column stats: COMPLETE value expressions: key (type: string), c1 (type: int), c2 (type: string) Reducer 4 Execution mode: llap diff --git ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out index 7b6bd1f..54dc91e 100644 --- ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out +++ ql/src/test/results/clientpositive/llap/tez_union_multiinsert.q.out @@ -103,26 +103,26 @@ minReductionHashAggr: 0.0 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 137500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 137500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT substr(_col1, 5)) keys: _col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string) minReductionHashAggr: 0.0 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1001 Data size: 459459 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1001 Data size: 459459 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 9 @@ -140,26 +140,26 @@ minReductionHashAggr: 0.0 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 137500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 137500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT substr(_col1, 5)) keys: _col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string) minReductionHashAggr: 0.0 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1001 Data size: 459459 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1001 Data size: 459459 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -180,26 +180,26 @@ minReductionHashAggr: 0.0 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 137500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 137500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT substr(_col1, 5)) keys: _col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string) minReductionHashAggr: 0.0 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1001 Data size: 459459 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1001 Data size: 459459 Basic stats: COMPLETE Column stats: COMPLETE Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -1082,26 +1082,26 @@ minReductionHashAggr: 0.0 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 137500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 137500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT substr(_col1, 5)) keys: _col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string) minReductionHashAggr: 0.0 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1001 Data size: 459459 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1001 Data size: 459459 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 7 @@ -1139,26 +1139,26 @@ minReductionHashAggr: 0.0 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 137500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 137500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT substr(_col1, 5)) keys: _col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string) minReductionHashAggr: 0.0 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1001 Data size: 459459 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1001 Data size: 459459 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 3 @@ -1281,26 +1281,26 @@ minReductionHashAggr: 0.0 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 137500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 137500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT substr(_col1, 5)) keys: _col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string) minReductionHashAggr: 0.0 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1001 Data size: 459459 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1001 Data size: 459459 Basic stats: COMPLETE Column stats: COMPLETE Union 2 Vertex: Union 2 @@ -2077,26 +2077,26 @@ minReductionHashAggr: 0.0 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 137500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 137500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT substr(_col1, 5)) keys: _col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string) minReductionHashAggr: 0.0 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1001 Data size: 459459 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1001 Data size: 459459 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Map 7 @@ -2134,26 +2134,26 @@ minReductionHashAggr: 0.0 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 137500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 137500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT substr(_col1, 5)) keys: _col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string) minReductionHashAggr: 0.0 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1001 Data size: 459459 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1001 Data size: 459459 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 3 @@ -2276,26 +2276,26 @@ minReductionHashAggr: 0.0 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 137500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 140000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 137500 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT substr(_col1, 5)) keys: _col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string) minReductionHashAggr: 0.0 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1001 Data size: 459459 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1001 Data size: 464464 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1001 Data size: 459459 Basic stats: COMPLETE Column stats: COMPLETE Union 2 Vertex: Union 2 @@ -3083,26 +3083,26 @@ minReductionHashAggr: 0.0 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 70000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 68750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 70000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 68750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT substr(_col1, 5)) keys: _col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string) minReductionHashAggr: 0.0 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 501 Data size: 232464 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 501 Data size: 229959 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 501 Data size: 232464 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 501 Data size: 229959 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: llap LLAP IO: no inputs Reducer 2 @@ -3123,26 +3123,26 @@ minReductionHashAggr: 0.0 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 70000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 68750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 70000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 68750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT substr(_col1, 5)) keys: _col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string) minReductionHashAggr: 0.0 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 501 Data size: 232464 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 501 Data size: 229959 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 501 Data size: 232464 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 501 Data size: 229959 Basic stats: COMPLETE Column stats: COMPLETE Reducer 4 Execution mode: llap Reduce Operator Tree: @@ -4072,19 +4072,19 @@ minReductionHashAggr: 0.0 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 125 Data size: 35000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 125 Data size: 34375 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 125 Data size: 35000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 125 Data size: 34375 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT substr(_col1, 5)) keys: _col0 (type: string), _col1 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 116000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 114750 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: string), CAST( _col2 AS STRING) (type: string) outputColumnNames: _col0, _col1, _col2 diff --git ql/src/test/results/clientpositive/llap/udf_coalesce.q.out ql/src/test/results/clientpositive/llap/udf_coalesce.q.out index f35ec7a..a934fdc 100644 --- ql/src/test/results/clientpositive/llap/udf_coalesce.q.out +++ ql/src/test/results/clientpositive/llap/udf_coalesce.q.out @@ -61,20 +61,39 @@ POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Row Limit Per Split: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 1 (type: int), 1 (type: int), 2 (type: int), 1 (type: int), 3 (type: int), 4 (type: int), '1' (type: string), '1' (type: string), '2' (type: string), '1' (type: string), '3' (type: string), '4' (type: string), 1 (type: decimal(1,0)), 1 (type: decimal(1,0)), 2 (type: decimal(1,0)), 2 (type: decimal(1,0)), 2 (type: decimal(1,0)), null (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 500 Data size: 547004 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 547004 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: no inputs + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: src - Row Limit Per Split: 1 - Select Operator - expressions: 1 (type: int), 1 (type: int), 2 (type: int), 1 (type: int), 3 (type: int), 4 (type: int), '1' (type: string), '1' (type: string), '2' (type: string), '1' (type: string), '3' (type: string), '4' (type: string), 1 (type: decimal(1,0)), 1 (type: decimal(1,0)), 2 (type: decimal(1,0)), 2 (type: decimal(1,0)), 2 (type: decimal(1,0)), null (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 - ListSink + ListSink PREHOOK: query: SELECT COALESCE(1), COALESCE(1, 2), @@ -122,6 +141,53 @@ #### A masked pattern was here #### 1 1 2 1 3 4 1 1 2 1 3 4 1 1 2 2 2 NULL PREHOOK: query: EXPLAIN +SELECT COALESCE(key,'x') from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT COALESCE(key,'x') from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: COALESCE(key,'x') (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: no inputs + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT COALESCE(src_thrift.lint[1], 999), COALESCE(src_thrift.lintstring[0].mystring, '999'), COALESCE(src_thrift.mstringstring['key_2'], '999') @@ -138,19 +204,38 @@ POSTHOOK: Input: default@src_thrift #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src_thrift + Statistics: Num rows: 11 Data size: 39600 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: COALESCE(lint[1],999) (type: int), COALESCE(lintstring[0].mystring,'999') (type: string), COALESCE(mstringstring['key_2'],'999') (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 11 Data size: 39600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 39600 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: no inputs + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: src_thrift - Select Operator - expressions: COALESCE(lint[1],999) (type: int), COALESCE(lintstring[0].mystring,'999') (type: string), COALESCE(mstringstring['key_2'],'999') (type: string) - outputColumnNames: _col0, _col1, _col2 - ListSink + ListSink PREHOOK: query: SELECT COALESCE(src_thrift.lint[1], 999), COALESCE(src_thrift.lintstring[0].mystring, '999'), diff --git ql/src/test/results/clientpositive/llap/vector_case_when_1.q.out ql/src/test/results/clientpositive/llap/vector_case_when_1.q.out index b81c166..339c7a2 100644 --- ql/src/test/results/clientpositive/llap/vector_case_when_1.q.out +++ ql/src/test/results/clientpositive/llap/vector_case_when_1.q.out @@ -216,13 +216,13 @@ native: true projectedOutputColumnNums: [4, 21, 26, 30, 34, 38, 42, 44, 46, 48, 50, 52, 54, 58, 61, 64, 67] selectExpressions: VectorUDFAdaptor(CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE ('Huge number') END)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, LongColLessLongScalar(col 4:int, val 100) -> 20:boolean) -> 21:string, VectorUDFAdaptor(CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE (null) END)(children: LongColEqualLongScalar(col 4:int, val 1) -> 22:boolean, LongColEqualLongScalar(col 4:int, val 2) -> 23:boolean, LongColLessLongScalar(col 4:int, val 10) -> 24:boolean, LongColLessLongScalar(col 4:int, val 100) -> 25:boolean) -> 26:string, VectorUDFAdaptor(CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') ELSE (null) END)(children: LongColEqualLongScalar(col 4:int, val 1) -> 27:boolean, LongColEqualLongScalar(col 4:int, val 2) -> 28:boolean, LongColLessLongScalar(col 4:int, val 10) -> 29:boolean) -> 30:string, IfExprLongColumnLongColumn(col 31:boolean, col 32:date, col 33:date)(children: StringGroupColEqualCharScalar(col 14:char(10), val SHIP) -> 31:boolean, VectorUDFDateAddColScalar(col 10:date, val 10) -> 32:date, VectorUDFDateAddColScalar(col 10:date, val 5) -> 33:date) -> 34:date, VectorUDFAdaptor(CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0D - l_discount))) ELSE (0.0D) END)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 35:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 36:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 36:double) -> 37:double) -> 38:double, VectorUDFAdaptor(CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0D - l_discount))) ELSE (0.0D) END)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 39:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 40:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 40:double) -> 41:double) -> 42:double, VectorUDFAdaptor(if((l_shipinstruct = 'DELIVER IN PERSON'), null, l_tax))(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 43:boolean) -> 44:decimal(10,2), VectorUDFAdaptor(if((l_shipinstruct = 'TAKE BACK RETURN'), l_tax, null))(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 45:boolean) -> 46:decimal(10,2), VectorUDFAdaptor(if((l_shipinstruct = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 47:boolean) -> 48:decimal(12,2), VectorUDFAdaptor(if((l_shipinstruct = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 49:boolean) -> 50:decimal(12,2), IfExprDecimal64ScalarDecimal64Column(col 51:boolean, decimal64Val 0, decimalVal 0, col 7:decimal(10,2)/DECIMAL_64)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 51:boolean) -> 52:decimal(10,2)/DECIMAL_64, IfExprDecimal64ColumnDecimal64Scalar(col 53:boolean, col 7:decimal(10,2)/DECIMAL_64, decimal64Val 0, decimalVal 0)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 53:boolean) -> 54:decimal(10,2)/DECIMAL_64, IfExprTimestampColumnColumn(col 55:boolean, col 56:timestampcol 57:timestamp)(children: LongColGreaterLongScalar(col 1:int, val 30) -> 55:boolean, CastDateToTimestamp(col 12:date) -> 56:timestamp, CastDateToTimestamp(col 11:date) -> 57:timestamp) -> 58:timestamp, VectorUDFAdaptor(if((l_suppkey > 10000), datediff(l_receiptdate, l_commitdate), null))(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 59:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 60:int) -> 61:int, VectorUDFAdaptor(if((l_suppkey > 10000), null, datediff(l_receiptdate, l_commitdate)))(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 62:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 63:int) -> 64:int, IfExprLongScalarLongScalar(col 66:boolean, val 14245, val 14609)(children: LongColGreaterLongScalar(col 65:int, val 100)(children: LongColModuloLongScalar(col 2:int, val 500) -> 65:int) -> 66:boolean) -> 67:date - Statistics: Num rows: 101 Data size: 141804 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 101 Data size: 118604 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 101 Data size: 141804 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 101 Data size: 118604 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -552,14 +552,19 @@ className: VectorSelectOperator native: true projectedOutputColumnNums: [4, 24, 33, 40, 44, 48, 52, 54, 56, 58, 60, 62, 64, 68, 71, 74, 77] +<<<<<<< HEAD + selectExpressions: IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 23:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 22:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean, val Somecol 21:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, IfExprStringScalarStringScalar(col 20:boolean, val Many, val Huge number)(children: LongColLessLongScalar(col 4:int, val 100) -> 20:boolean) -> 21:string) -> 22:string) -> 23:string) -> 24:string, IfExprStringScalarStringGroupColumn(col 25:boolean, val Singlecol 32:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 25:boolean, IfExprStringScalarStringGroupColumn(col 26:boolean, val Twocol 31:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 26:boolean, IfExprStringScalarStringGroupColumn(col 27:boolean, val Somecol 30:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 27:boolean, IfExprColumnNull(col 28:boolean, col 29:string, null)(children: LongColLessLongScalar(col 4:int, val 100) -> 28:boolean, ConstantVectorExpression(val Many) -> 29:string) -> 30:string) -> 31:string) -> 32:string) -> 33:string, IfExprStringScalarStringGroupColumn(col 34:boolean, val Singlecol 39:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 34:boolean, IfExprStringScalarStringGroupColumn(col 35:boolean, val Twocol 38:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 35:boolean, IfExprColumnNull(col 36:boolean, col 37:string, null)(children: LongColLessLongScalar(col 4:int, val 10) -> 36:boolean, ConstantVectorExpression(val Some) -> 37:string) -> 38:string) -> 39:string) -> 40:string, IfExprLongColumnLongColumn(col 41:boolean, col 42:date, col 43:date)(children: StringGroupColEqualCharScalar(col 14:char(10), val SHIP) -> 41:boolean, VectorUDFDateAddColScalar(col 10:date, val 10) -> 42:date, VectorUDFDateAddColScalar(col 10:date, val 5) -> 43:date) -> 44:date, IfExprDoubleColumnDoubleScalar(col 45:boolean, col 47:double, val 0.0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 45:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 46:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 46:double) -> 47:double) -> 48:double, IfExprDoubleColumnDoubleScalar(col 49:boolean, col 51:double, val 0.0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 49:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 50:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 50:double) -> 51:double) -> 52:double, IfExprNullColumn(col 53:boolean, null, col 78)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 53:boolean, ConvertDecimal64ToDecimal(col 7:decimal(10,2)/DECIMAL_64) -> 78:decimal(10,2)) -> 54:decimal(10,2), IfExprColumnNull(col 55:boolean, col 79:decimal(10,2), null)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 55:boolean, ConvertDecimal64ToDecimal(col 7:decimal(10,2)/DECIMAL_64) -> 79:decimal(10,2)) -> 56:decimal(10,2), VectorUDFAdaptor(if((l_shipinstruct = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 57:boolean) -> 58:decimal(12,2), VectorUDFAdaptor(if((l_shipinstruct = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 59:boolean) -> 60:decimal(12,2), IfExprDecimal64ScalarDecimal64Column(col 61:boolean, decimal64Val 0, decimalVal 0, col 7:decimal(10,2)/DECIMAL_64)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 61:boolean) -> 62:decimal(10,2)/DECIMAL_64, IfExprDecimal64ColumnDecimal64Scalar(col 63:boolean, col 7:decimal(10,2)/DECIMAL_64, decimal64Val 0, decimalVal 0)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 63:boolean) -> 64:decimal(10,2)/DECIMAL_64, IfExprTimestampColumnColumn(col 65:boolean, col 66:timestampcol 67:timestamp)(children: LongColGreaterLongScalar(col 1:int, val 30) -> 65:boolean, CastDateToTimestamp(col 12:date) -> 66:timestamp, CastDateToTimestamp(col 11:date) -> 67:timestamp) -> 68:timestamp, IfExprColumnNull(col 69:boolean, col 70:int, null)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 69:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 70:int) -> 71:int, IfExprNullColumn(col 72:boolean, null, col 73)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 72:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 73:int) -> 74:int, IfExprLongScalarLongScalar(col 76:boolean, val 14245, val 14609)(children: LongColGreaterLongScalar(col 75:int, val 100)(children: LongColModuloLongScalar(col 2:int, val 500) -> 75:int) -> 76:boolean) -> 77:date + Statistics: Num rows: 101 Data size: 118604 Basic stats: COMPLETE Column stats: COMPLETE +======= selectExpressions: IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 23:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 22:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean, val Somecol 21:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, IfExprStringScalarStringScalar(col 20:boolean, val Many, val Huge number)(children: LongColLessLongScalar(col 4:int, val 100) -> 20:boolean) -> 21:string) -> 22:string) -> 23:string) -> 24:string, IfExprStringScalarStringGroupColumn(col 25:boolean, val Singlecol 32:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 25:boolean, IfExprStringScalarStringGroupColumn(col 26:boolean, val Twocol 31:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 26:boolean, IfExprStringScalarStringGroupColumn(col 27:boolean, val Somecol 30:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 27:boolean, IfExprColumnNull(col 28:boolean, col 29:string, null)(children: LongColLessLongScalar(col 4:int, val 100) -> 28:boolean, ConstantVectorExpression(val Many) -> 29:string) -> 30:string) -> 31:string) -> 32:string) -> 33:string, IfExprStringScalarStringGroupColumn(col 34:boolean, val Singlecol 39:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 34:boolean, IfExprStringScalarStringGroupColumn(col 35:boolean, val Twocol 38:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 35:boolean, IfExprColumnNull(col 36:boolean, col 37:string, null)(children: LongColLessLongScalar(col 4:int, val 10) -> 36:boolean, ConstantVectorExpression(val Some) -> 37:string) -> 38:string) -> 39:string) -> 40:string, IfExprLongColumnLongColumn(col 41:boolean, col 42:date, col 43:date)(children: StringGroupColEqualCharScalar(col 14:char(10), val SHIP) -> 41:boolean, VectorUDFDateAddColScalar(col 10:date, val 10) -> 42:date, VectorUDFDateAddColScalar(col 10:date, val 5) -> 43:date) -> 44:date, IfExprDoubleColumnDoubleScalar(col 45:boolean, col 47:double, val 0.0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 45:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 46:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 46:double) -> 47:double) -> 48:double, IfExprDoubleColumnDoubleScalar(col 49:boolean, col 51:double, val 0.0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 49:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 50:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 50:double) -> 51:double) -> 52:double, IfExprNullColumn(col 53:boolean, null, col 7)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 53:boolean, col 7:decimal(10,2)/DECIMAL_64) -> 54:decimal(10,2)/DECIMAL_64, IfExprColumnNull(col 55:boolean, col 7:decimal(10,2)/DECIMAL_64, null)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 55:boolean, col 7:decimal(10,2)/DECIMAL_64) -> 56:decimal(10,2)/DECIMAL_64, VectorUDFAdaptor(if((l_shipinstruct = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 57:boolean) -> 58:decimal(12,2), VectorUDFAdaptor(if((l_shipinstruct = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 59:boolean) -> 60:decimal(12,2), IfExprDecimal64ScalarDecimal64Column(col 61:boolean, decimal64Val 0, decimalVal 0, col 7:decimal(10,2)/DECIMAL_64)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 61:boolean) -> 62:decimal(10,2)/DECIMAL_64, IfExprDecimal64ColumnDecimal64Scalar(col 63:boolean, col 7:decimal(10,2)/DECIMAL_64, decimal64Val 0, decimalVal 0)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 63:boolean) -> 64:decimal(10,2)/DECIMAL_64, IfExprTimestampColumnColumn(col 65:boolean, col 66:timestampcol 67:timestamp)(children: LongColGreaterLongScalar(col 1:int, val 30) -> 65:boolean, CastDateToTimestamp(col 12:date) -> 66:timestamp, CastDateToTimestamp(col 11:date) -> 67:timestamp) -> 68:timestamp, IfExprColumnNull(col 69:boolean, col 70:int, null)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 69:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 70:int) -> 71:int, IfExprNullColumn(col 72:boolean, null, col 73)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 72:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 73:int) -> 74:int, IfExprLongScalarLongScalar(col 76:boolean, val 14245, val 14609)(children: LongColGreaterLongScalar(col 75:int, val 100)(children: LongColModuloLongScalar(col 2:int, val 500) -> 75:int) -> 76:boolean) -> 77:date Statistics: Num rows: 101 Data size: 141804 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 101 Data size: 141804 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 101 Data size: 118604 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -889,14 +894,19 @@ className: VectorSelectOperator native: true projectedOutputColumnNums: [4, 27, 39, 48, 52, 57, 62, 64, 66, 71, 76, 78, 80, 84, 87, 90, 93] +<<<<<<< HEAD + selectExpressions: IfExprColumnCondExpr(col 17:boolean, col 18:stringcol 26:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, ConstantVectorExpression(val Single) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 25:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 19:boolean, ConstantVectorExpression(val Two) -> 20:string, IfExprColumnCondExpr(col 21:boolean, col 22:stringcol 24:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 21:boolean, ConstantVectorExpression(val Some) -> 22:string, IfExprStringScalarStringScalar(col 23:boolean, val Many, val Huge number)(children: LongColLessLongScalar(col 4:int, val 100) -> 23:boolean) -> 24:string) -> 25:string) -> 26:string) -> 27:string, IfExprColumnCondExpr(col 28:boolean, col 29:stringcol 38:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 28:boolean, ConstantVectorExpression(val Single) -> 29:string, IfExprColumnCondExpr(col 30:boolean, col 31:stringcol 37:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 30:boolean, ConstantVectorExpression(val Two) -> 31:string, IfExprColumnCondExpr(col 32:boolean, col 33:stringcol 36:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 32:boolean, ConstantVectorExpression(val Some) -> 33:string, IfExprColumnNull(col 34:boolean, col 35:string, null)(children: LongColLessLongScalar(col 4:int, val 100) -> 34:boolean, ConstantVectorExpression(val Many) -> 35:string) -> 36:string) -> 37:string) -> 38:string) -> 39:string, IfExprColumnCondExpr(col 40:boolean, col 41:stringcol 47:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 40:boolean, ConstantVectorExpression(val Single) -> 41:string, IfExprColumnCondExpr(col 42:boolean, col 43:stringcol 46:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 42:boolean, ConstantVectorExpression(val Two) -> 43:string, IfExprColumnNull(col 44:boolean, col 45:string, null)(children: LongColLessLongScalar(col 4:int, val 10) -> 44:boolean, ConstantVectorExpression(val Some) -> 45:string) -> 46:string) -> 47:string) -> 48:string, IfExprCondExprCondExpr(col 49:boolean, col 50:datecol 51:date)(children: StringGroupColEqualCharScalar(col 14:char(10), val SHIP) -> 49:boolean, VectorUDFDateAddColScalar(col 10:date, val 10) -> 50:date, VectorUDFDateAddColScalar(col 10:date, val 5) -> 51:date) -> 52:date, IfExprCondExprColumn(col 53:boolean, col 55:double, col 56:double)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 53:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 54:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 54:double) -> 55:double, ConstantVectorExpression(val 0.0) -> 56:double) -> 57:double, IfExprCondExprColumn(col 58:boolean, col 60:double, col 61:double)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 58:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 59:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 59:double) -> 60:double, ConstantVectorExpression(val 0.0) -> 61:double) -> 62:double, IfExprNullColumn(col 63:boolean, null, col 94)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 63:boolean, ConvertDecimal64ToDecimal(col 7:decimal(10,2)/DECIMAL_64) -> 94:decimal(10,2)) -> 64:decimal(10,2), IfExprColumnNull(col 65:boolean, col 95:decimal(10,2), null)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 65:boolean, ConvertDecimal64ToDecimal(col 7:decimal(10,2)/DECIMAL_64) -> 95:decimal(10,2)) -> 66:decimal(10,2), VectorUDFAdaptor(if((l_shipinstruct = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 70:boolean) -> 71:decimal(12,2), VectorUDFAdaptor(if((l_shipinstruct = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 75:boolean) -> 76:decimal(12,2), IfExprDecimal64ScalarDecimal64Column(col 77:boolean, decimal64Val 0, decimalVal 0, col 7:decimal(10,2)/DECIMAL_64)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 77:boolean) -> 78:decimal(10,2)/DECIMAL_64, IfExprDecimal64ColumnDecimal64Scalar(col 79:boolean, col 7:decimal(10,2)/DECIMAL_64, decimal64Val 0, decimalVal 0)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 79:boolean) -> 80:decimal(10,2)/DECIMAL_64, IfExprCondExprCondExpr(col 81:boolean, col 82:timestampcol 83:timestamp)(children: LongColGreaterLongScalar(col 1:int, val 30) -> 81:boolean, CastDateToTimestamp(col 12:date) -> 82:timestamp, CastDateToTimestamp(col 11:date) -> 83:timestamp) -> 84:timestamp, IfExprCondExprNull(col 85:boolean, col 86:int, null)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 85:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 86:int) -> 87:int, IfExprNullCondExpr(col 88:boolean, null, col 89:int)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 88:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 89:int) -> 90:int, IfExprLongScalarLongScalar(col 92:boolean, val 14245, val 14609)(children: LongColGreaterLongScalar(col 91:int, val 100)(children: LongColModuloLongScalar(col 2:int, val 500) -> 91:int) -> 92:boolean) -> 93:date + Statistics: Num rows: 101 Data size: 118604 Basic stats: COMPLETE Column stats: COMPLETE +======= selectExpressions: IfExprColumnCondExpr(col 17:boolean, col 18:stringcol 26:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, ConstantVectorExpression(val Single) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 25:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 19:boolean, ConstantVectorExpression(val Two) -> 20:string, IfExprColumnCondExpr(col 21:boolean, col 22:stringcol 24:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 21:boolean, ConstantVectorExpression(val Some) -> 22:string, IfExprStringScalarStringScalar(col 23:boolean, val Many, val Huge number)(children: LongColLessLongScalar(col 4:int, val 100) -> 23:boolean) -> 24:string) -> 25:string) -> 26:string) -> 27:string, IfExprColumnCondExpr(col 28:boolean, col 29:stringcol 38:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 28:boolean, ConstantVectorExpression(val Single) -> 29:string, IfExprColumnCondExpr(col 30:boolean, col 31:stringcol 37:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 30:boolean, ConstantVectorExpression(val Two) -> 31:string, IfExprColumnCondExpr(col 32:boolean, col 33:stringcol 36:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 32:boolean, ConstantVectorExpression(val Some) -> 33:string, IfExprColumnNull(col 34:boolean, col 35:string, null)(children: LongColLessLongScalar(col 4:int, val 100) -> 34:boolean, ConstantVectorExpression(val Many) -> 35:string) -> 36:string) -> 37:string) -> 38:string) -> 39:string, IfExprColumnCondExpr(col 40:boolean, col 41:stringcol 47:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 40:boolean, ConstantVectorExpression(val Single) -> 41:string, IfExprColumnCondExpr(col 42:boolean, col 43:stringcol 46:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 42:boolean, ConstantVectorExpression(val Two) -> 43:string, IfExprColumnNull(col 44:boolean, col 45:string, null)(children: LongColLessLongScalar(col 4:int, val 10) -> 44:boolean, ConstantVectorExpression(val Some) -> 45:string) -> 46:string) -> 47:string) -> 48:string, IfExprCondExprCondExpr(col 49:boolean, col 50:datecol 51:date)(children: StringGroupColEqualCharScalar(col 14:char(10), val SHIP) -> 49:boolean, VectorUDFDateAddColScalar(col 10:date, val 10) -> 50:date, VectorUDFDateAddColScalar(col 10:date, val 5) -> 51:date) -> 52:date, IfExprCondExprColumn(col 53:boolean, col 55:double, col 56:double)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 53:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 54:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 54:double) -> 55:double, ConstantVectorExpression(val 0.0) -> 56:double) -> 57:double, IfExprCondExprColumn(col 58:boolean, col 60:double, col 61:double)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 58:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 59:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 59:double) -> 60:double, ConstantVectorExpression(val 0.0) -> 61:double) -> 62:double, IfExprNullColumn(col 63:boolean, null, col 7)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 63:boolean, col 7:decimal(10,2)/DECIMAL_64) -> 64:decimal(10,2)/DECIMAL_64, IfExprColumnNull(col 65:boolean, col 7:decimal(10,2)/DECIMAL_64, null)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 65:boolean, col 7:decimal(10,2)/DECIMAL_64) -> 66:decimal(10,2)/DECIMAL_64, VectorUDFAdaptor(if((l_shipinstruct = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 70:boolean) -> 71:decimal(12,2), VectorUDFAdaptor(if((l_shipinstruct = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 75:boolean) -> 76:decimal(12,2), IfExprDecimal64ScalarDecimal64Column(col 77:boolean, decimal64Val 0, decimalVal 0, col 7:decimal(10,2)/DECIMAL_64)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 77:boolean) -> 78:decimal(10,2)/DECIMAL_64, IfExprDecimal64ColumnDecimal64Scalar(col 79:boolean, col 7:decimal(10,2)/DECIMAL_64, decimal64Val 0, decimalVal 0)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 79:boolean) -> 80:decimal(10,2)/DECIMAL_64, IfExprCondExprCondExpr(col 81:boolean, col 82:timestampcol 83:timestamp)(children: LongColGreaterLongScalar(col 1:int, val 30) -> 81:boolean, CastDateToTimestamp(col 12:date) -> 82:timestamp, CastDateToTimestamp(col 11:date) -> 83:timestamp) -> 84:timestamp, IfExprCondExprNull(col 85:boolean, col 86:int, null)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 85:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 86:int) -> 87:int, IfExprNullCondExpr(col 88:boolean, null, col 89:int)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 88:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 89:int) -> 90:int, IfExprLongScalarLongScalar(col 92:boolean, val 14245, val 14609)(children: LongColGreaterLongScalar(col 91:int, val 100)(children: LongColModuloLongScalar(col 2:int, val 500) -> 91:int) -> 92:boolean) -> 93:date Statistics: Num rows: 101 Data size: 141804 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 101 Data size: 141804 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 101 Data size: 118604 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out index bcc7dd2..cc72f45 100644 --- ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out @@ -151,7 +151,7 @@ native: true projectedOutputColumnNums: [1, 3, 9, 14, 18, 22, 24, 27, 32, 38, 2] selectExpressions: VectorUDFAdaptor(CASE WHEN ((ctimestamp2 <= TIMESTAMP'1800-12-31 00:00:00')) THEN ('1800s or Earlier') WHEN ((ctimestamp2 < TIMESTAMP'1900-01-01 00:00:00')) THEN ('1900s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN ('Early 2010s') ELSE ('Unknown') END)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 1800-12-31 00:00:00) -> 5:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00) -> 6:boolean, TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 7:boolean, TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 8:boolean) -> 9:string, VectorUDFAdaptor(CASE WHEN ((ctimestamp2 <= TIMESTAMP'2000-12-31 23:59:59.999999999')) THEN ('Old') WHEN ((ctimestamp2 < TIMESTAMP'2006-01-01 00:00:00')) THEN ('Early 2000s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN ('Early 2010s') ELSE (null) END)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 10:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 11:boolean, TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 12:boolean, TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 13:boolean) -> 14:string, VectorUDFAdaptor(CASE WHEN ((ctimestamp2 <= TIMESTAMP'2000-12-31 23:59:59.999999999')) THEN ('Old') WHEN ((ctimestamp2 < TIMESTAMP'2006-01-01 00:00:00')) THEN ('Early 2000s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') ELSE (null) END)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 15:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 16:boolean, TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 17:boolean) -> 18:string, IfExprLongColumnLongColumn(col 19:boolean, col 20:int, col 21:int)(children: TimestampColLessTimestampScalar(col 1:timestamp, val 1974-10-04 17:21:03.989) -> 19:boolean, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 20:int, VectorUDFYearTimestamp(col 3:timestamp, field YEAR) -> 21:int) -> 22:int, VectorUDFAdaptor(CASE WHEN ((stimestamp1 like '%19%')) THEN (stimestamp1) ELSE ('2018-03-08 23:04:59') END)(children: SelectStringColLikeStringScalar(col 2:string) -> 23:boolean) -> 24:string, VectorUDFAdaptor(if((ctimestamp1 = TIMESTAMP'2021-09-24 03:18:32.413655165'), null, minute(ctimestamp1)))(children: TimestampColEqualTimestampScalar(col 1:timestamp, val 2021-09-24 03:18:32.413655165) -> 25:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 26:int) -> 27:int, VectorUDFAdaptor(if(((ctimestamp2 >= TIMESTAMP'5344-10-04 18:40:08.165') and (ctimestamp2 < TIMESTAMP'6631-11-13 16:31:29.702202248')), minute(ctimestamp1), null))(children: ColAndCol(col 28:boolean, col 29:boolean)(children: TimestampColGreaterEqualTimestampScalar(col 3:timestamp, val 5344-10-04 18:40:08.165) -> 28:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 6631-11-13 16:31:29.702202248) -> 29:boolean) -> 30:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 31:int) -> 32:int, IfExprLongColumnLongColumn(col 35:boolean, col 36:date, col 37:date)(children: DoubleColGreaterDoubleScalar(col 34:double, val 100.0)(children: DoubleColModuloDoubleScalar(col 33:double, val 500.0)(children: CastTimestampToDouble(col 1:timestamp) -> 33:double) -> 34:double) -> 35:boolean, VectorUDFDateAddColScalar(col 0:date, val 1) -> 36:date, VectorUDFDateAddColScalar(col 0:date, val 365) -> 37:date) -> 38:date - Statistics: Num rows: 51 Data size: 50745 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 50345 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp), _col10 (type: string), _col1 (type: timestamp) null sort order: zzz @@ -162,7 +162,7 @@ native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 9:string, 14:string, 18:string, 22:int, 24:string, 27:int, 32:int, 38:date - Statistics: Num rows: 51 Data size: 50745 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 50345 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: date) Execution mode: vectorized, llap LLAP IO: all inputs @@ -204,13 +204,13 @@ className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 2, 3, 4, 5, 6, 7, 8, 9, 10] - Statistics: Num rows: 51 Data size: 45084 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 44684 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 51 Data size: 45084 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 44684 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -441,7 +441,7 @@ native: true projectedOutputColumnNums: [1, 3, 12, 21, 28, 32, 34, 37, 42, 48, 2] selectExpressions: IfExprStringScalarStringGroupColumn(col 5:boolean, val 1800s or Earliercol 11:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 1800-12-31 00:00:00) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val 1900scol 10:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 9:string)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 7:boolean, IfExprStringScalarStringScalar(col 8:boolean, val Early 2010s, val Unknown)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 8:boolean) -> 9:string) -> 10:string) -> 11:string) -> 12:string, IfExprStringScalarStringGroupColumn(col 13:boolean, val Oldcol 20:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 13:boolean, IfExprStringScalarStringGroupColumn(col 14:boolean, val Early 2000scol 19:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 14:boolean, IfExprStringScalarStringGroupColumn(col 15:boolean, val Late 2000scol 18:string)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 15:boolean, IfExprColumnNull(col 16:boolean, col 17:string, null)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 16:boolean, ConstantVectorExpression(val Early 2010s) -> 17:string) -> 18:string) -> 19:string) -> 20:string) -> 21:string, IfExprStringScalarStringGroupColumn(col 22:boolean, val Oldcol 27:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 22:boolean, IfExprStringScalarStringGroupColumn(col 23:boolean, val Early 2000scol 26:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 23:boolean, IfExprColumnNull(col 24:boolean, col 25:string, null)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 24:boolean, ConstantVectorExpression(val Late 2000s) -> 25:string) -> 26:string) -> 27:string) -> 28:string, IfExprLongColumnLongColumn(col 29:boolean, col 30:int, col 31:int)(children: TimestampColLessTimestampScalar(col 1:timestamp, val 1974-10-04 17:21:03.989) -> 29:boolean, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 30:int, VectorUDFYearTimestamp(col 3:timestamp, field YEAR) -> 31:int) -> 32:int, IfExprStringGroupColumnStringScalar(col 33:boolean, col 2:string, val 2018-03-08 23:04:59)(children: SelectStringColLikeStringScalar(col 2:string) -> 33:boolean) -> 34:string, IfExprNullColumn(col 35:boolean, null, col 36)(children: TimestampColEqualTimestampScalar(col 1:timestamp, val 2021-09-24 03:18:32.413655165) -> 35:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 36:int) -> 37:int, IfExprColumnNull(col 40:boolean, col 41:int, null)(children: ColAndCol(col 38:boolean, col 39:boolean)(children: TimestampColGreaterEqualTimestampScalar(col 3:timestamp, val 5344-10-04 18:40:08.165) -> 38:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 6631-11-13 16:31:29.702202248) -> 39:boolean) -> 40:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 41:int) -> 42:int, IfExprLongColumnLongColumn(col 45:boolean, col 46:date, col 47:date)(children: DoubleColGreaterDoubleScalar(col 44:double, val 100.0)(children: DoubleColModuloDoubleScalar(col 43:double, val 500.0)(children: CastTimestampToDouble(col 1:timestamp) -> 43:double) -> 44:double) -> 45:boolean, VectorUDFDateAddColScalar(col 0:date, val 1) -> 46:date, VectorUDFDateAddColScalar(col 0:date, val 365) -> 47:date) -> 48:date - Statistics: Num rows: 51 Data size: 50745 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 50345 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp), _col10 (type: string), _col1 (type: timestamp) null sort order: zzz @@ -452,7 +452,7 @@ native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 12:string, 21:string, 28:string, 32:int, 34:string, 37:int, 42:int, 48:date - Statistics: Num rows: 51 Data size: 50745 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 50345 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: date) Execution mode: vectorized, llap LLAP IO: all inputs @@ -494,13 +494,13 @@ className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 2, 3, 4, 5, 6, 7, 8, 9, 10] - Statistics: Num rows: 51 Data size: 45084 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 44684 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 51 Data size: 45084 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 44684 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -731,7 +731,7 @@ native: true projectedOutputColumnNums: [1, 3, 15, 27, 36, 40, 42, 45, 50, 56, 2] selectExpressions: IfExprColumnCondExpr(col 5:boolean, col 6:stringcol 14:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 1800-12-31 00:00:00) -> 5:boolean, ConstantVectorExpression(val 1800s or Earlier) -> 6:string, IfExprColumnCondExpr(col 7:boolean, col 8:stringcol 13:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00) -> 7:boolean, ConstantVectorExpression(val 1900s) -> 8:string, IfExprColumnCondExpr(col 9:boolean, col 10:stringcol 12:string)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 9:boolean, ConstantVectorExpression(val Late 2000s) -> 10:string, IfExprStringScalarStringScalar(col 11:boolean, val Early 2010s, val Unknown)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 11:boolean) -> 12:string) -> 13:string) -> 14:string) -> 15:string, IfExprColumnCondExpr(col 16:boolean, col 17:stringcol 26:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 16:boolean, ConstantVectorExpression(val Old) -> 17:string, IfExprColumnCondExpr(col 18:boolean, col 19:stringcol 25:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 18:boolean, ConstantVectorExpression(val Early 2000s) -> 19:string, IfExprColumnCondExpr(col 20:boolean, col 21:stringcol 24:string)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 20:boolean, ConstantVectorExpression(val Late 2000s) -> 21:string, IfExprColumnNull(col 22:boolean, col 23:string, null)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 22:boolean, ConstantVectorExpression(val Early 2010s) -> 23:string) -> 24:string) -> 25:string) -> 26:string) -> 27:string, IfExprColumnCondExpr(col 28:boolean, col 29:stringcol 35:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 28:boolean, ConstantVectorExpression(val Old) -> 29:string, IfExprColumnCondExpr(col 30:boolean, col 31:stringcol 34:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 30:boolean, ConstantVectorExpression(val Early 2000s) -> 31:string, IfExprColumnNull(col 32:boolean, col 33:string, null)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 32:boolean, ConstantVectorExpression(val Late 2000s) -> 33:string) -> 34:string) -> 35:string) -> 36:string, IfExprCondExprCondExpr(col 37:boolean, col 38:intcol 39:int)(children: TimestampColLessTimestampScalar(col 1:timestamp, val 1974-10-04 17:21:03.989) -> 37:boolean, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 38:int, VectorUDFYearTimestamp(col 3:timestamp, field YEAR) -> 39:int) -> 40:int, IfExprStringGroupColumnStringScalar(col 41:boolean, col 2:string, val 2018-03-08 23:04:59)(children: SelectStringColLikeStringScalar(col 2:string) -> 41:boolean) -> 42:string, IfExprNullCondExpr(col 43:boolean, null, col 44:int)(children: TimestampColEqualTimestampScalar(col 1:timestamp, val 2021-09-24 03:18:32.413655165) -> 43:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 44:int) -> 45:int, IfExprCondExprNull(col 48:boolean, col 49:int, null)(children: ColAndCol(col 46:boolean, col 47:boolean)(children: TimestampColGreaterEqualTimestampScalar(col 3:timestamp, val 5344-10-04 18:40:08.165) -> 46:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 6631-11-13 16:31:29.702202248) -> 47:boolean) -> 48:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 49:int) -> 50:int, IfExprCondExprCondExpr(col 53:boolean, col 54:datecol 55:date)(children: DoubleColGreaterDoubleScalar(col 52:double, val 100.0)(children: DoubleColModuloDoubleScalar(col 51:double, val 500.0)(children: CastTimestampToDouble(col 1:timestamp) -> 51:double) -> 52:double) -> 53:boolean, VectorUDFDateAddColScalar(col 0:date, val 1) -> 54:date, VectorUDFDateAddColScalar(col 0:date, val 365) -> 55:date) -> 56:date - Statistics: Num rows: 51 Data size: 50745 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 50345 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp), _col10 (type: string), _col1 (type: timestamp) null sort order: zzz @@ -742,7 +742,7 @@ native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 15:string, 27:string, 36:string, 40:int, 42:string, 45:int, 50:int, 56:date - Statistics: Num rows: 51 Data size: 50745 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 50345 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: date) Execution mode: vectorized, llap LLAP IO: all inputs @@ -784,13 +784,13 @@ className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 2, 3, 4, 5, 6, 7, 8, 9, 10] - Statistics: Num rows: 51 Data size: 45084 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 44684 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 51 Data size: 45084 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 44684 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_groupby4.q.out ql/src/test/results/clientpositive/llap/vector_groupby4.q.out index e31fe56..f8d6e2d 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby4.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby4.q.out @@ -108,7 +108,7 @@ keys: KEY._col0 (type: string) mode: partial1 outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z @@ -118,7 +118,7 @@ className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -139,13 +139,13 @@ keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0 - Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -158,7 +158,7 @@ className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: @@ -167,7 +167,7 @@ className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE value expressions: c1 (type: string) Reducer 4 Execution mode: llap diff --git ql/src/test/results/clientpositive/llap/vector_groupby6.q.out ql/src/test/results/clientpositive/llap/vector_groupby6.q.out index 25b6be6..2042f6d 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby6.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby6.q.out @@ -108,7 +108,7 @@ keys: KEY._col0 (type: string) mode: partial1 outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z @@ -118,7 +118,7 @@ className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -139,13 +139,13 @@ keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0 - Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -158,7 +158,7 @@ className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: @@ -167,7 +167,7 @@ className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE value expressions: c1 (type: string) Reducer 4 Execution mode: llap diff --git ql/src/test/results/clientpositive/llap/vector_if_expr.q.out ql/src/test/results/clientpositive/llap/vector_if_expr.q.out index 5d955bc..ed73482 100644 --- ql/src/test/results/clientpositive/llap/vector_if_expr.q.out +++ ql/src/test/results/clientpositive/llap/vector_if_expr.q.out @@ -47,7 +47,7 @@ native: true projectedOutputColumnNums: [10, 13] selectExpressions: IfExprStringScalarStringScalar(col 10:boolean, val first, val second) -> 13:string - Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4587 Data size: 426534 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean) null sort order: z @@ -56,7 +56,7 @@ className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4587 Data size: 426534 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -85,13 +85,13 @@ className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4587 Data size: 426534 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4587 Data size: 426534 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_nvl.q.out ql/src/test/results/clientpositive/llap/vector_nvl.q.out index 420520e..5346ed4 100644 --- ql/src/test/results/clientpositive/llap/vector_nvl.q.out +++ ql/src/test/results/clientpositive/llap/vector_nvl.q.out @@ -147,19 +147,19 @@ native: true projectedOutputColumnNums: [4, 14] selectExpressions: VectorCoalesce(columns [4, 13])(children: col 4:float, ConstantVectorExpression(val 1.0) -> 13:float) -> 14:float - Statistics: Num rows: 12288 Data size: 85848 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 73392 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 10 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 10 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_udf1.q.out ql/src/test/results/clientpositive/llap/vector_udf1.q.out index 956ff47..8ca8a8e 100644 --- ql/src/test/results/clientpositive/llap/vector_udf1.q.out +++ ql/src/test/results/clientpositive/llap/vector_udf1.q.out Binary files differ diff --git ql/src/test/results/clientpositive/llap/vectorized_string_funcs.q.out ql/src/test/results/clientpositive/llap/vectorized_string_funcs.q.out index 29e35ec..12e5037 100644 --- ql/src/test/results/clientpositive/llap/vectorized_string_funcs.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_string_funcs.q.out @@ -71,10 +71,10 @@ Select Operator expressions: substr(cstring1, 1, 2) (type: string), substr(cstring1, 2) (type: string), lower(cstring1) (type: string), upper(cstring1) (type: string), upper(cstring1) (type: string), length(cstring1) (type: int), trim(cstring1) (type: string), ltrim(cstring1) (type: string), rtrim(cstring1) (type: string), concat(cstring1, cstring2) (type: string), concat('>', cstring1) (type: string), concat(cstring1, '<') (type: string), concat(substr(cstring1, 1, 2), substr(cstring2, 1, 2)) (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 1024 Data size: 2265088 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1024 Data size: 2024426 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1024 Data size: 2265088 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1024 Data size: 2024426 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out index 0077f08..0e0058b 100644 --- ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out @@ -274,7 +274,7 @@ native: true projectedOutputColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 0, 1, 3, 13, 14, 15, 16, 17] selectExpressions: VectorUDFUnixTimeStampTimestamp(col 1:timestamp) -> 5:bigint, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 6:int, VectorUDFMonthTimestamp(col 1:timestamp, field MONTH) -> 7:int, VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 8:int, VectorUDFWeekOfYearTimestamp(col 1:timestamp, field WEEK_OF_YEAR) -> 9:int, VectorUDFHourTimestamp(col 1:timestamp, field HOUR_OF_DAY) -> 10:int, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 11:int, VectorUDFSecondTimestamp(col 1:timestamp, field SECOND) -> 12:int, IfExprTimestampColumnScalar(col 0:boolean, col 1:timestamp, val 1319-01-25 08:31:57.778) -> 13:timestamp, IfExprTimestampScalarColumn(col 0:boolean, val 2000-12-18 00:42:30.0005, col 1:timestamp) -> 14:timestamp, IfExprTimestampColumnColumn(col 0:boolean, col 1:timestampcol 3:timestamp) -> 15:timestamp, IfExprColumnNull(col 0:boolean, col 1:timestamp, null)(children: col 0:boolean, col 1:timestamp) -> 16:timestamp, IfExprNullColumn(col 0:boolean, null, col 3)(children: col 0:boolean, col 3:timestamp) -> 17:timestamp - Statistics: Num rows: 52 Data size: 16436 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 11916 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z @@ -283,7 +283,7 @@ className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 52 Data size: 16436 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 11916 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: boolean), _col10 (type: timestamp), _col11 (type: timestamp), _col12 (type: timestamp), _col13 (type: timestamp), _col14 (type: timestamp), _col15 (type: timestamp), _col16 (type: timestamp) Execution mode: vectorized, llap LLAP IO: all inputs @@ -312,13 +312,13 @@ className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] - Statistics: Num rows: 52 Data size: 16436 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 11916 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 52 Data size: 16436 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 11916 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out index c55ef0f..8387880 100644 --- ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out +++ ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out @@ -140,14 +140,13 @@ POSTHOOK: Input: default@srcpart_merge_dp_rc_n1@ds=2008-04-08/hr=12 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 @@ -160,79 +159,56 @@ Select Operator expressions: key (type: string), value (type: string), if(((UDFToDouble(key) % 2.0D) = 0.0D), 'a1', 'b1') (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 264000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 264000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.merge_dynamic_part_n3 Select Operator expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) outputColumnNames: key, value, ds, hr - Statistics: Num rows: 1000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: struct), _col3 (type: struct) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col2 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string) - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.merge_dynamic_part_n3 - - Stage: Stage-8 + Stage: Stage-7 Conditional Operator - Stage: Stage-5 + Stage: Stage-4 Move Operator files: hdfs directory: true @@ -251,7 +227,7 @@ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.merge_dynamic_part_n3 - Stage: Stage-3 + Stage: Stage-2 Stats Work Basic Stats Work: Column Stats Desc: @@ -259,7 +235,14 @@ Column Types: string, string Table: default.merge_dynamic_part_n3 - Stage: Stage-4 + Stage: Stage-3 + Merge File Operator + Map Operator Tree: + RCFile Merge Operator + merge level: block + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + + Stage: Stage-5 Merge File Operator Map Operator Tree: RCFile Merge Operator @@ -267,13 +250,6 @@ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat Stage: Stage-6 - Merge File Operator - Map Operator Tree: - RCFile Merge Operator - merge level: block - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - - Stage: Stage-7 Move Operator files: hdfs directory: true diff --git ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out index 251c3f8..518f400 100644 --- ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out +++ ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out @@ -116,14 +116,13 @@ POSTHOOK: Input: default@srcpart_merge_dp_rc@ds=2008-04-08/hr=12 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 @@ -136,79 +135,56 @@ Select Operator expressions: key (type: string), value (type: string), if(((UDFToDouble(key) % 100.0D) = 0.0D), 'a1', 'b1') (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 618 Data size: 223716 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 618 Data size: 163152 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 618 Data size: 163152 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.merge_dynamic_part Select Operator expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) outputColumnNames: key, value, ds, hr - Statistics: Num rows: 618 Data size: 281808 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 618 Data size: 221244 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 309 Data size: 357822 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 309 Data size: 357822 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: struct), _col3 (type: struct) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 309 Data size: 357822 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 309 Data size: 357822 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 309 Data size: 357822 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col2 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 618 Data size: 223716 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string) - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 618 Data size: 223716 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.merge_dynamic_part - - Stage: Stage-8 + Stage: Stage-7 Conditional Operator - Stage: Stage-5 + Stage: Stage-4 Move Operator files: hdfs directory: true @@ -227,7 +203,7 @@ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.merge_dynamic_part - Stage: Stage-3 + Stage: Stage-2 Stats Work Basic Stats Work: Column Stats Desc: @@ -235,7 +211,14 @@ Column Types: string, string Table: default.merge_dynamic_part - Stage: Stage-4 + Stage: Stage-3 + Merge File Operator + Map Operator Tree: + RCFile Merge Operator + merge level: block + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + + Stage: Stage-5 Merge File Operator Map Operator Tree: RCFile Merge Operator @@ -243,13 +226,6 @@ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat Stage: Stage-6 - Merge File Operator - Map Operator Tree: - RCFile Merge Operator - merge level: block - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - - Stage: Stage-7 Move Operator files: hdfs directory: true diff --git ql/src/test/results/clientpositive/nullgroup4_multi_distinct.q.out ql/src/test/results/clientpositive/nullgroup4_multi_distinct.q.out index 318e694..b4ff434 100644 --- ql/src/test/results/clientpositive/nullgroup4_multi_distinct.q.out +++ ql/src/test/results/clientpositive/nullgroup4_multi_distinct.q.out @@ -33,12 +33,12 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 125 Data size: 37375 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 125 Data size: 25125 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ - Statistics: Num rows: 125 Data size: 37375 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 125 Data size: 25125 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: bigint) Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/offset_limit_global_optimizer.q.out ql/src/test/results/clientpositive/offset_limit_global_optimizer.q.out index e29ca9d..7e1a2c0 100644 --- ql/src/test/results/clientpositive/offset_limit_global_optimizer.q.out +++ ql/src/test/results/clientpositive/offset_limit_global_optimizer.q.out @@ -36,12 +36,12 @@ Select Operator expressions: key (type: string), substr(value, 5) (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) null sort order: zzzz sort order: ++++ - Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 TopN: 410 TopN Hash Memory Usage: 0.1 @@ -256,17 +256,17 @@ Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 Offset of rows: 400 - Statistics: Num rows: 10 Data size: 6390 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 5410 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 10 Data size: 6390 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 5410 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -354,12 +354,12 @@ Select Operator expressions: key (type: string), substr(value, 5) (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) null sort order: zzzz sort order: ++++ - Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 TopN: 500 TopN Hash Memory Usage: 0.1 @@ -574,17 +574,17 @@ Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 Offset of rows: 490 - Statistics: Num rows: 10 Data size: 6390 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 5410 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 10 Data size: 6390 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 5410 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -672,12 +672,12 @@ Select Operator expressions: key (type: string), substr(value, 5) (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) null sort order: zzzz sort order: ++++ - Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 TopN: 510 TopN Hash Memory Usage: 0.1 @@ -892,17 +892,17 @@ Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Offset of rows: 490 - Statistics: Num rows: 20 Data size: 12780 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 10820 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 20 Data size: 12780 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 10820 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1000,12 +1000,12 @@ Select Operator expressions: key (type: string), substr(value, 5) (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) null sort order: zzzz sort order: ++++ - Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 TopN: 1090 TopN Hash Memory Usage: 0.1 @@ -1220,17 +1220,17 @@ Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 600 Offset of rows: 490 - Statistics: Num rows: 600 Data size: 383400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 600 Data size: 324600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 600 Data size: 383400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 600 Data size: 324600 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -1903,12 +1903,12 @@ Select Operator expressions: key (type: string), substr(value, 5) (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) null sort order: zzzz sort order: ++++ - Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 TopN: 410 TopN Hash Memory Usage: 0.1 @@ -2123,17 +2123,17 @@ Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 Offset of rows: 400 - Statistics: Num rows: 10 Data size: 6390 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 5410 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 10 Data size: 6390 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 5410 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2216,12 +2216,12 @@ Select Operator expressions: key (type: string), substr(value, 5) (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) null sort order: zzzz sort order: ++++ - Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 TopN: 500 TopN Hash Memory Usage: 0.1 @@ -2436,17 +2436,17 @@ Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 Offset of rows: 490 - Statistics: Num rows: 10 Data size: 6390 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 5410 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 10 Data size: 6390 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 5410 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2529,12 +2529,12 @@ Select Operator expressions: key (type: string), substr(value, 5) (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) null sort order: zzzz sort order: ++++ - Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 TopN: 510 TopN Hash Memory Usage: 0.1 @@ -2749,17 +2749,17 @@ Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Offset of rows: 490 - Statistics: Num rows: 20 Data size: 12780 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 10820 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 20 Data size: 12780 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 10820 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -2852,12 +2852,12 @@ Select Operator expressions: key (type: string), substr(value, 5) (type: string), ds (type: string), hr (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string), _col3 (type: string) null sort order: zzzz sort order: ++++ - Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 TopN: 1090 TopN Hash Memory Usage: 0.1 @@ -3072,17 +3072,17 @@ Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: string), KEY.reducesinkkey2 (type: string), KEY.reducesinkkey3 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 2000 Data size: 1278000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 1082000 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 600 Offset of rows: 490 - Statistics: Num rows: 600 Data size: 383400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 600 Data size: 324600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 600 Data size: 383400 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 600 Data size: 324600 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query19.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query19.q.out index 94bf30c..4000b81 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query19.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query19.q.out @@ -96,23 +96,23 @@ PartitionCols:_col0, _col1, _col2, _col3 Group By Operator [GBY_34] (rows=76645658 width=314) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col8)"],keys:_col12, _col11, _col13, _col14 - Select Operator [SEL_33] (rows=76645658 width=650) + Select Operator [SEL_33] (rows=76645658 width=458) Output:["_col8","_col11","_col12","_col13","_col14"] - Filter Operator [FIL_32] (rows=76645658 width=650) + Filter Operator [FIL_32] (rows=76645658 width=458) predicate:(_col3 <> _col16) - Merge Join Operator [MERGEJOIN_122] (rows=76645658 width=650) + Merge Join Operator [MERGEJOIN_122] (rows=76645658 width=458) Conds:RS_29._col7=RS_143._col0(Inner),Output:["_col3","_col8","_col11","_col12","_col13","_col14","_col16"] <-Map 14 [SIMPLE_EDGE] vectorized SHUFFLE [RS_143] PartitionCols:_col0 - Select Operator [SEL_142] (rows=1704 width=188) + Select Operator [SEL_142] (rows=1704 width=92) Output:["_col0","_col1"] TableScan [TS_21] (rows=1704 width=93) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_zip"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_29] PartitionCols:_col7 - Merge Join Operator [MERGEJOIN_121] (rows=76645658 width=468) + Merge Join Operator [MERGEJOIN_121] (rows=76645658 width=372) Conds:RS_26._col0=RS_27._col2(Inner),Output:["_col3","_col7","_col8","_col11","_col12","_col13","_col14"] <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_27] @@ -165,7 +165,7 @@ <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_26] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_118] (rows=80000000 width=188) + Merge Join Operator [MERGEJOIN_118] (rows=80000000 width=92) Conds:RS_125._col1=RS_127._col0(Inner),Output:["_col0","_col3"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_125] @@ -179,7 +179,7 @@ <-Map 7 [SIMPLE_EDGE] vectorized SHUFFLE [RS_127] PartitionCols:_col0 - Select Operator [SEL_126] (rows=40000000 width=188) + Select Operator [SEL_126] (rows=40000000 width=92) Output:["_col0","_col1"] TableScan [TS_3] (rows=40000000 width=93) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_zip"] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query79.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query79.q.out index 2828fb0..2b94615 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query79.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query79.q.out @@ -72,17 +72,17 @@ Stage-1 Reducer 3 vectorized File Output Operator [FS_125] - Limit [LIM_124] (rows=100 width=776) + Limit [LIM_124] (rows=100 width=592) Number of rows:100 - Select Operator [SEL_123] (rows=479121995 width=776) + Select Operator [SEL_123] (rows=479121995 width=592) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_32] - Select Operator [SEL_31] (rows=479121995 width=776) + Select Operator [SEL_31] (rows=479121995 width=592) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Top N Key Operator [TNK_56] (rows=479121995 width=685) + Top N Key Operator [TNK_56] (rows=479121995 width=593) keys:_col2, _col1, substr(_col5, 1, 30), _col7,top n:100 - Merge Join Operator [MERGEJOIN_100] (rows=479121995 width=685) + Merge Join Operator [MERGEJOIN_100] (rows=479121995 width=593) Conds:RS_102._col0=RS_122._col1(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7","_col8"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_102] @@ -94,7 +94,7 @@ <-Reducer 8 [SIMPLE_EDGE] vectorized SHUFFLE [RS_122] PartitionCols:_col1 - Select Operator [SEL_121] (rows=479121995 width=508) + Select Operator [SEL_121] (rows=479121995 width=416) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] Group By Operator [GBY_120] (rows=479121995 width=328) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out index ed1501b..06b9f6f 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out @@ -271,7 +271,7 @@ <-Map 18 [SIMPLE_EDGE] vectorized SHUFFLE [RS_149] PartitionCols:_col2 - Select Operator [SEL_148] (rows=1704 width=276) + Select Operator [SEL_148] (rows=1704 width=178) Output:["_col0","_col1","_col2"] Filter Operator [FIL_147] (rows=1704 width=181) predicate:substr(s_zip, 1, 2) is not null @@ -280,24 +280,24 @@ <-Reducer 11 [SIMPLE_EDGE] vectorized SHUFFLE [RS_146] PartitionCols:_col0 - Select Operator [SEL_145] (rows=1 width=184) + Select Operator [SEL_145] (rows=1 width=86) Output:["_col0"] - Filter Operator [FIL_144] (rows=1 width=192) + Filter Operator [FIL_144] (rows=1 width=96) predicate:(_col1 = 2L) - Group By Operator [GBY_143] (rows=5633 width=192) + Group By Operator [GBY_143] (rows=5633 width=96) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Union 10 [SIMPLE_EDGE] <-Reducer 16 [CONTAINS] vectorized Reduce Output Operator [RS_175] PartitionCols:_col0 - Group By Operator [GBY_174] (rows=5633 width=192) + Group By Operator [GBY_174] (rows=5633 width=96) Output:["_col0","_col1"],aggregations:["count(_col1)"],keys:_col0 - Group By Operator [GBY_173] (rows=1126 width=192) + Group By Operator [GBY_173] (rows=1126 width=96) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 15 [SIMPLE_EDGE] vectorized SHUFFLE [RS_172] PartitionCols:_col0 - Group By Operator [GBY_171] (rows=1126 width=192) + Group By Operator [GBY_171] (rows=1126 width=96) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 Select Operator [SEL_170] (rows=2253 width=97) Output:["_col0"] @@ -333,14 +333,14 @@ <-Reducer 9 [CONTAINS] vectorized Reduce Output Operator [RS_161] PartitionCols:_col0 - Group By Operator [GBY_160] (rows=5633 width=192) + Group By Operator [GBY_160] (rows=5633 width=96) Output:["_col0","_col1"],aggregations:["count(_col1)"],keys:_col0 - Group By Operator [GBY_159] (rows=10141 width=192) + Group By Operator [GBY_159] (rows=10141 width=96) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Map 8 [SIMPLE_EDGE] vectorized SHUFFLE [RS_158] PartitionCols:_col0 - Group By Operator [GBY_157] (rows=141974 width=192) + Group By Operator [GBY_157] (rows=141974 width=96) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 Select Operator [SEL_156] (rows=20000000 width=89) Output:["_col0"] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query85.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query85.q.out index 706bbd7..9a550b7 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query85.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query85.q.out @@ -200,13 +200,13 @@ Stage-1 Reducer 6 vectorized File Output Operator [FS_209] - Limit [LIM_208] (rows=72 width=832) + Limit [LIM_208] (rows=72 width=656) Number of rows:100 - Select Operator [SEL_207] (rows=72 width=832) + Select Operator [SEL_207] (rows=72 width=656) Output:["_col0","_col1","_col2","_col3"] <-Reducer 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_206] - Select Operator [SEL_205] (rows=72 width=832) + Select Operator [SEL_205] (rows=72 width=656) Output:["_col4","_col5","_col6","_col7"] Top N Key Operator [TNK_204] (rows=72 width=353) keys:substr(_col0, 1, 20), (UDFToDouble(_col1) / _col2), (_col3 / _col4), (_col5 / _col6),top n:100 diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query99.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query99.q.out index 5310297..98249ac 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query99.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query99.q.out @@ -96,24 +96,24 @@ Stage-1 Reducer 7 vectorized File Output Operator [FS_125] - Limit [LIM_124] (rows=100 width=590) + Limit [LIM_124] (rows=100 width=420) Number of rows:100 - Select Operator [SEL_123] (rows=3920468 width=590) + Select Operator [SEL_123] (rows=3920468 width=420) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 6 [SIMPLE_EDGE] vectorized SHUFFLE [RS_122] - Select Operator [SEL_121] (rows=3920468 width=590) + Select Operator [SEL_121] (rows=3920468 width=420) Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Group By Operator [GBY_120] (rows=3920468 width=406) + Group By Operator [GBY_120] (rows=3920468 width=321) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_26] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_25] (rows=7840936 width=406) + Group By Operator [GBY_25] (rows=7840936 width=321) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col13, _col11, _col15 - Top N Key Operator [TNK_56] (rows=15681873 width=386) + Top N Key Operator [TNK_56] (rows=15681873 width=301) keys:_col13, _col11, _col15,top n:100 - Merge Join Operator [MERGEJOIN_102] (rows=15681873 width=386) + Merge Join Operator [MERGEJOIN_102] (rows=15681873 width=301) Conds:RS_21._col1=RS_119._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col8","_col11","_col13","_col15"] <-Map 12 [SIMPLE_EDGE] vectorized SHUFFLE [RS_119] @@ -125,12 +125,12 @@ <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_101] (rows=15681873 width=291) + Merge Join Operator [MERGEJOIN_101] (rows=15681873 width=206) Conds:RS_18._col3=RS_117._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col8","_col11","_col13"] <-Map 11 [SIMPLE_EDGE] vectorized SHUFFLE [RS_117] PartitionCols:_col0 - Select Operator [SEL_116] (rows=27 width=188) + Select Operator [SEL_116] (rows=27 width=103) Output:["_col0","_col1"] TableScan [TS_8] (rows=27 width=104) default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk","w_warehouse_name"] diff --git ql/src/test/results/clientpositive/perf/tez/query19.q.out ql/src/test/results/clientpositive/perf/tez/query19.q.out index 55ce944..d94c899 100644 --- ql/src/test/results/clientpositive/perf/tez/query19.q.out +++ ql/src/test/results/clientpositive/perf/tez/query19.q.out @@ -96,16 +96,16 @@ PartitionCols:_col0, _col1, _col2, _col3 Group By Operator [GBY_36] (rows=76645658 width=314) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col8)"],keys:_col12, _col11, _col13, _col14 - Select Operator [SEL_35] (rows=76645658 width=650) + Select Operator [SEL_35] (rows=76645658 width=458) Output:["_col8","_col11","_col12","_col13","_col14"] - Filter Operator [FIL_34] (rows=76645658 width=650) + Filter Operator [FIL_34] (rows=76645658 width=458) predicate:(_col3 <> _col16) - Merge Join Operator [MERGEJOIN_124] (rows=76645658 width=650) + Merge Join Operator [MERGEJOIN_124] (rows=76645658 width=458) Conds:RS_31._col7=RS_147._col0(Inner),Output:["_col3","_col8","_col11","_col12","_col13","_col14","_col16"] <-Map 14 [SIMPLE_EDGE] vectorized SHUFFLE [RS_147] PartitionCols:_col0 - Select Operator [SEL_146] (rows=1704 width=188) + Select Operator [SEL_146] (rows=1704 width=92) Output:["_col0","_col1"] Filter Operator [FIL_145] (rows=1704 width=93) predicate:s_store_sk is not null @@ -114,7 +114,7 @@ <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_31] PartitionCols:_col7 - Merge Join Operator [MERGEJOIN_123] (rows=76645658 width=468) + Merge Join Operator [MERGEJOIN_123] (rows=76645658 width=372) Conds:RS_28._col0=RS_29._col2(Inner),Output:["_col3","_col7","_col8","_col11","_col12","_col13","_col14"] <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_29] @@ -167,7 +167,7 @@ <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_28] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_120] (rows=80000000 width=188) + Merge Join Operator [MERGEJOIN_120] (rows=80000000 width=92) Conds:RS_127._col1=RS_130._col0(Inner),Output:["_col0","_col3"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_127] @@ -181,7 +181,7 @@ <-Map 7 [SIMPLE_EDGE] vectorized SHUFFLE [RS_130] PartitionCols:_col0 - Select Operator [SEL_129] (rows=40000000 width=188) + Select Operator [SEL_129] (rows=40000000 width=92) Output:["_col0","_col1"] Filter Operator [FIL_128] (rows=40000000 width=93) predicate:ca_address_sk is not null diff --git ql/src/test/results/clientpositive/perf/tez/query23.q.out ql/src/test/results/clientpositive/perf/tez/query23.q.out index e60051b..edde953 100644 --- ql/src/test/results/clientpositive/perf/tez/query23.q.out +++ ql/src/test/results/clientpositive/perf/tez/query23.q.out @@ -173,25 +173,25 @@ PartitionCols:_col0 Group By Operator [GBY_501] (rows=62562 width=4) Output:["_col0"],keys:_col1 - Select Operator [SEL_500] (rows=183358851 width=290) + Select Operator [SEL_500] (rows=183358851 width=220) Output:["_col1"] - Filter Operator [FIL_499] (rows=183358851 width=290) + Filter Operator [FIL_499] (rows=183358851 width=220) predicate:(_col3 > 4L) - Select Operator [SEL_498] (rows=550076554 width=290) + Select Operator [SEL_498] (rows=550076554 width=220) Output:["_col1","_col3"] - Group By Operator [GBY_497] (rows=550076554 width=290) + Group By Operator [GBY_497] (rows=550076554 width=220) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 15 [SIMPLE_EDGE] SHUFFLE [RS_23] PartitionCols:_col0 - Group By Operator [GBY_22] (rows=550076554 width=290) + Group By Operator [GBY_22] (rows=550076554 width=220) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col4, _col3, _col5 - Merge Join Operator [MERGEJOIN_442] (rows=550076554 width=282) + Merge Join Operator [MERGEJOIN_442] (rows=550076554 width=212) Conds:RS_18._col1=RS_496._col0(Inner),Output:["_col3","_col4","_col5"] <-Map 19 [SIMPLE_EDGE] vectorized SHUFFLE [RS_496] PartitionCols:_col0 - Select Operator [SEL_495] (rows=462000 width=188) + Select Operator [SEL_495] (rows=462000 width=118) Output:["_col0","_col1"] Filter Operator [FIL_494] (rows=462000 width=188) predicate:i_item_sk is not null diff --git ql/src/test/results/clientpositive/perf/tez/query79.q.out ql/src/test/results/clientpositive/perf/tez/query79.q.out index f7c8e2f..83b29ee 100644 --- ql/src/test/results/clientpositive/perf/tez/query79.q.out +++ ql/src/test/results/clientpositive/perf/tez/query79.q.out @@ -72,17 +72,17 @@ Stage-1 Reducer 3 vectorized File Output Operator [FS_127] - Limit [LIM_126] (rows=100 width=776) + Limit [LIM_126] (rows=100 width=592) Number of rows:100 - Select Operator [SEL_125] (rows=479121995 width=776) + Select Operator [SEL_125] (rows=479121995 width=592) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_33] - Select Operator [SEL_32] (rows=479121995 width=776) + Select Operator [SEL_32] (rows=479121995 width=592) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Top N Key Operator [TNK_57] (rows=479121995 width=685) + Top N Key Operator [TNK_57] (rows=479121995 width=593) keys:_col2, _col1, substr(_col5, 1, 30), _col7,top n:100 - Merge Join Operator [MERGEJOIN_101] (rows=479121995 width=685) + Merge Join Operator [MERGEJOIN_101] (rows=479121995 width=593) Conds:RS_104._col0=RS_124._col1(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7","_col8"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_104] @@ -96,7 +96,7 @@ <-Reducer 8 [SIMPLE_EDGE] vectorized SHUFFLE [RS_124] PartitionCols:_col1 - Select Operator [SEL_123] (rows=479121995 width=508) + Select Operator [SEL_123] (rows=479121995 width=416) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] Group By Operator [GBY_122] (rows=479121995 width=328) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 diff --git ql/src/test/results/clientpositive/perf/tez/query8.q.out ql/src/test/results/clientpositive/perf/tez/query8.q.out index c72498b..27420bd 100644 --- ql/src/test/results/clientpositive/perf/tez/query8.q.out +++ ql/src/test/results/clientpositive/perf/tez/query8.q.out @@ -271,7 +271,7 @@ <-Map 18 [SIMPLE_EDGE] vectorized SHUFFLE [RS_149] PartitionCols:_col2 - Select Operator [SEL_148] (rows=1704 width=276) + Select Operator [SEL_148] (rows=1704 width=178) Output:["_col0","_col1","_col2"] Filter Operator [FIL_147] (rows=1704 width=181) predicate:(s_store_sk is not null and substr(s_zip, 1, 2) is not null) @@ -280,24 +280,24 @@ <-Reducer 11 [SIMPLE_EDGE] vectorized SHUFFLE [RS_146] PartitionCols:_col0 - Select Operator [SEL_145] (rows=1 width=184) + Select Operator [SEL_145] (rows=1 width=86) Output:["_col0"] - Filter Operator [FIL_144] (rows=1 width=192) + Filter Operator [FIL_144] (rows=1 width=96) predicate:(_col1 = 2L) - Group By Operator [GBY_143] (rows=5633 width=192) + Group By Operator [GBY_143] (rows=5633 width=96) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Union 10 [SIMPLE_EDGE] <-Reducer 16 [CONTAINS] vectorized Reduce Output Operator [RS_175] PartitionCols:_col0 - Group By Operator [GBY_174] (rows=5633 width=192) + Group By Operator [GBY_174] (rows=5633 width=96) Output:["_col0","_col1"],aggregations:["count(_col1)"],keys:_col0 - Group By Operator [GBY_173] (rows=1126 width=192) + Group By Operator [GBY_173] (rows=1126 width=96) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 15 [SIMPLE_EDGE] vectorized SHUFFLE [RS_172] PartitionCols:_col0 - Group By Operator [GBY_171] (rows=1126 width=192) + Group By Operator [GBY_171] (rows=1126 width=96) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 Select Operator [SEL_170] (rows=2253 width=97) Output:["_col0"] @@ -333,14 +333,14 @@ <-Reducer 9 [CONTAINS] vectorized Reduce Output Operator [RS_161] PartitionCols:_col0 - Group By Operator [GBY_160] (rows=5633 width=192) + Group By Operator [GBY_160] (rows=5633 width=96) Output:["_col0","_col1"],aggregations:["count(_col1)"],keys:_col0 - Group By Operator [GBY_159] (rows=10141 width=192) + Group By Operator [GBY_159] (rows=10141 width=96) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Map 8 [SIMPLE_EDGE] vectorized SHUFFLE [RS_158] PartitionCols:_col0 - Group By Operator [GBY_157] (rows=141974 width=192) + Group By Operator [GBY_157] (rows=141974 width=96) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 Select Operator [SEL_156] (rows=20000000 width=89) Output:["_col0"] diff --git ql/src/test/results/clientpositive/perf/tez/query85.q.out ql/src/test/results/clientpositive/perf/tez/query85.q.out index 6e1a562..94ec2f9 100644 --- ql/src/test/results/clientpositive/perf/tez/query85.q.out +++ ql/src/test/results/clientpositive/perf/tez/query85.q.out @@ -201,13 +201,13 @@ Stage-1 Reducer 6 vectorized File Output Operator [FS_239] - Limit [LIM_238] (rows=72 width=832) + Limit [LIM_238] (rows=72 width=656) Number of rows:100 - Select Operator [SEL_237] (rows=72 width=832) + Select Operator [SEL_237] (rows=72 width=656) Output:["_col0","_col1","_col2","_col3"] <-Reducer 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_236] - Select Operator [SEL_235] (rows=72 width=832) + Select Operator [SEL_235] (rows=72 width=656) Output:["_col4","_col5","_col6","_col7"] Top N Key Operator [TNK_234] (rows=72 width=353) keys:substr(_col0, 1, 20), (UDFToDouble(_col1) / _col2), (_col3 / _col4), (_col5 / _col6),top n:100 diff --git ql/src/test/results/clientpositive/perf/tez/query99.q.out ql/src/test/results/clientpositive/perf/tez/query99.q.out index d24d5cc..87e2713 100644 --- ql/src/test/results/clientpositive/perf/tez/query99.q.out +++ ql/src/test/results/clientpositive/perf/tez/query99.q.out @@ -96,24 +96,24 @@ Stage-1 Reducer 7 vectorized File Output Operator [FS_131] - Limit [LIM_130] (rows=100 width=590) + Limit [LIM_130] (rows=100 width=420) Number of rows:100 - Select Operator [SEL_129] (rows=3920468 width=590) + Select Operator [SEL_129] (rows=3920468 width=420) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 6 [SIMPLE_EDGE] vectorized SHUFFLE [RS_128] - Select Operator [SEL_127] (rows=3920468 width=590) + Select Operator [SEL_127] (rows=3920468 width=420) Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Group By Operator [GBY_126] (rows=3920468 width=406) + Group By Operator [GBY_126] (rows=3920468 width=321) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_29] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_28] (rows=7840936 width=406) + Group By Operator [GBY_28] (rows=7840936 width=321) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col13, _col15, _col11 - Top N Key Operator [TNK_59] (rows=15681873 width=386) + Top N Key Operator [TNK_59] (rows=15681873 width=301) keys:_col13, _col15, _col11,top n:100 - Merge Join Operator [MERGEJOIN_105] (rows=15681873 width=386) + Merge Join Operator [MERGEJOIN_105] (rows=15681873 width=301) Conds:RS_24._col2=RS_108._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col8","_col11","_col13","_col15"] <-Map 11 [SIMPLE_EDGE] vectorized SHUFFLE [RS_108] @@ -127,12 +127,12 @@ <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_104] (rows=282273729 width=305) + Merge Join Operator [MERGEJOIN_104] (rows=282273729 width=220) Conds:RS_21._col3=RS_125._col0(Inner),Output:["_col2","_col4","_col5","_col6","_col7","_col8","_col11","_col13"] <-Map 10 [SIMPLE_EDGE] vectorized SHUFFLE [RS_125] PartitionCols:_col0 - Select Operator [SEL_124] (rows=27 width=188) + Select Operator [SEL_124] (rows=27 width=103) Output:["_col0","_col1"] Filter Operator [FIL_123] (rows=27 width=104) predicate:w_warehouse_sk is not null diff --git ql/src/test/results/clientpositive/spark/union17.q.out ql/src/test/results/clientpositive/spark/union17.q.out index c645207..91939f2 100644 --- ql/src/test/results/clientpositive/spark/union17.q.out +++ ql/src/test/results/clientpositive/spark/union17.q.out @@ -85,13 +85,13 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 275 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 275 Basic stats: COMPLETE Column stats: PARTIAL Map 7 Map Operator Tree: TableScan @@ -107,13 +107,13 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 459 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 459 Basic stats: COMPLETE Column stats: PARTIAL Reducer 3 Reduce Operator Tree: Group By Operator @@ -171,13 +171,13 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 275 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 1 Data size: 280 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 275 Basic stats: COMPLETE Column stats: PARTIAL Reducer 9 Reduce Operator Tree: Group By Operator @@ -195,13 +195,13 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 459 Basic stats: COMPLETE Column stats: PARTIAL Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string), _col2 (type: string) null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 1 Data size: 464 Basic stats: COMPLETE Column stats: PARTIAL + Statistics: Num rows: 1 Data size: 459 Basic stats: COMPLETE Column stats: PARTIAL Stage: Stage-0 Move Operator diff --git ql/src/test/results/clientpositive/union17.q.out ql/src/test/results/clientpositive/union17.q.out index 480befa..45b0862 100644 --- ql/src/test/results/clientpositive/union17.q.out +++ ql/src/test/results/clientpositive/union17.q.out @@ -95,20 +95,20 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 70000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 68750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 70000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 68750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT substr(_col1, 5)) keys: _col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 501 Data size: 232464 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 501 Data size: 229959 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -130,20 +130,20 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 250 Data size: 70000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 68750 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 70000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 68750 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT substr(_col1, 5)) keys: _col0 (type: string), _col1 (type: string), substr(_col1, 5) (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 501 Data size: 232464 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 501 Data size: 229959 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -245,7 +245,7 @@ null sort order: zzz sort order: +++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 501 Data size: 232464 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 501 Data size: 229959 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reduce Operator Tree: Group By Operator diff --git ql/src/test/results/clientpositive/vector_case_when_1.q.out ql/src/test/results/clientpositive/vector_case_when_1.q.out index bedde47..3b359eb 100644 --- ql/src/test/results/clientpositive/vector_case_when_1.q.out +++ ql/src/test/results/clientpositive/vector_case_when_1.q.out @@ -213,13 +213,13 @@ native: true projectedOutputColumnNums: [4, 21, 26, 30, 34, 38, 42, 44, 46, 48, 50, 52, 54, 58, 61, 64, 67] selectExpressions: VectorUDFAdaptor(CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE ('Huge number') END)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, LongColLessLongScalar(col 4:int, val 100) -> 20:boolean) -> 21:string, VectorUDFAdaptor(CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE (null) END)(children: LongColEqualLongScalar(col 4:int, val 1) -> 22:boolean, LongColEqualLongScalar(col 4:int, val 2) -> 23:boolean, LongColLessLongScalar(col 4:int, val 10) -> 24:boolean, LongColLessLongScalar(col 4:int, val 100) -> 25:boolean) -> 26:string, VectorUDFAdaptor(CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') ELSE (null) END)(children: LongColEqualLongScalar(col 4:int, val 1) -> 27:boolean, LongColEqualLongScalar(col 4:int, val 2) -> 28:boolean, LongColLessLongScalar(col 4:int, val 10) -> 29:boolean) -> 30:string, IfExprLongColumnLongColumn(col 31:boolean, col 32:date, col 33:date)(children: StringGroupColEqualCharScalar(col 14:char(10), val SHIP) -> 31:boolean, VectorUDFDateAddColScalar(col 10:date, val 10) -> 32:date, VectorUDFDateAddColScalar(col 10:date, val 5) -> 33:date) -> 34:date, VectorUDFAdaptor(CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0D - l_discount))) ELSE (0.0D) END)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 35:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 36:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 36:double) -> 37:double) -> 38:double, VectorUDFAdaptor(CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0D - l_discount))) ELSE (0.0D) END)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 39:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 40:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 40:double) -> 41:double) -> 42:double, VectorUDFAdaptor(if((l_shipinstruct = 'DELIVER IN PERSON'), null, l_tax))(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 43:boolean) -> 44:decimal(10,2), VectorUDFAdaptor(if((l_shipinstruct = 'TAKE BACK RETURN'), l_tax, null))(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 45:boolean) -> 46:decimal(10,2), VectorUDFAdaptor(if((l_shipinstruct = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 47:boolean) -> 48:decimal(12,2), VectorUDFAdaptor(if((l_shipinstruct = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 49:boolean) -> 50:decimal(12,2), IfExprDecimal64ScalarDecimal64Column(col 51:boolean, decimal64Val 0, decimalVal 0, col 7:decimal(10,2)/DECIMAL_64)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 51:boolean) -> 52:decimal(10,2)/DECIMAL_64, IfExprDecimal64ColumnDecimal64Scalar(col 53:boolean, col 7:decimal(10,2)/DECIMAL_64, decimal64Val 0, decimalVal 0)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 53:boolean) -> 54:decimal(10,2)/DECIMAL_64, IfExprTimestampColumnColumn(col 55:boolean, col 56:timestampcol 57:timestamp)(children: LongColGreaterLongScalar(col 1:int, val 30) -> 55:boolean, CastDateToTimestamp(col 12:date) -> 56:timestamp, CastDateToTimestamp(col 11:date) -> 57:timestamp) -> 58:timestamp, VectorUDFAdaptor(if((l_suppkey > 10000), datediff(l_receiptdate, l_commitdate), null))(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 59:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 60:int) -> 61:int, VectorUDFAdaptor(if((l_suppkey > 10000), null, datediff(l_receiptdate, l_commitdate)))(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 62:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 63:int) -> 64:int, IfExprLongScalarLongScalar(col 66:boolean, val 14245, val 14609)(children: LongColGreaterLongScalar(col 65:int, val 100)(children: LongColModuloLongScalar(col 2:int, val 500) -> 65:int) -> 66:boolean) -> 67:date - Statistics: Num rows: 101 Data size: 141804 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 101 Data size: 118604 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 101 Data size: 141804 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 101 Data size: 118604 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -545,14 +545,19 @@ className: VectorSelectOperator native: true projectedOutputColumnNums: [4, 24, 33, 40, 44, 48, 52, 54, 56, 58, 60, 62, 64, 68, 71, 74, 77] +<<<<<<< HEAD + selectExpressions: IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 23:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 22:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean, val Somecol 21:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, IfExprStringScalarStringScalar(col 20:boolean, val Many, val Huge number)(children: LongColLessLongScalar(col 4:int, val 100) -> 20:boolean) -> 21:string) -> 22:string) -> 23:string) -> 24:string, IfExprStringScalarStringGroupColumn(col 25:boolean, val Singlecol 32:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 25:boolean, IfExprStringScalarStringGroupColumn(col 26:boolean, val Twocol 31:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 26:boolean, IfExprStringScalarStringGroupColumn(col 27:boolean, val Somecol 30:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 27:boolean, IfExprColumnNull(col 28:boolean, col 29:string, null)(children: LongColLessLongScalar(col 4:int, val 100) -> 28:boolean, ConstantVectorExpression(val Many) -> 29:string) -> 30:string) -> 31:string) -> 32:string) -> 33:string, IfExprStringScalarStringGroupColumn(col 34:boolean, val Singlecol 39:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 34:boolean, IfExprStringScalarStringGroupColumn(col 35:boolean, val Twocol 38:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 35:boolean, IfExprColumnNull(col 36:boolean, col 37:string, null)(children: LongColLessLongScalar(col 4:int, val 10) -> 36:boolean, ConstantVectorExpression(val Some) -> 37:string) -> 38:string) -> 39:string) -> 40:string, IfExprLongColumnLongColumn(col 41:boolean, col 42:date, col 43:date)(children: StringGroupColEqualCharScalar(col 14:char(10), val SHIP) -> 41:boolean, VectorUDFDateAddColScalar(col 10:date, val 10) -> 42:date, VectorUDFDateAddColScalar(col 10:date, val 5) -> 43:date) -> 44:date, IfExprDoubleColumnDoubleScalar(col 45:boolean, col 47:double, val 0.0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 45:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 46:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 46:double) -> 47:double) -> 48:double, IfExprDoubleColumnDoubleScalar(col 49:boolean, col 51:double, val 0.0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 49:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 50:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 50:double) -> 51:double) -> 52:double, IfExprNullColumn(col 53:boolean, null, col 78)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 53:boolean, ConvertDecimal64ToDecimal(col 7:decimal(10,2)/DECIMAL_64) -> 78:decimal(10,2)) -> 54:decimal(10,2), IfExprColumnNull(col 55:boolean, col 79:decimal(10,2), null)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 55:boolean, ConvertDecimal64ToDecimal(col 7:decimal(10,2)/DECIMAL_64) -> 79:decimal(10,2)) -> 56:decimal(10,2), VectorUDFAdaptor(if((l_shipinstruct = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 57:boolean) -> 58:decimal(12,2), VectorUDFAdaptor(if((l_shipinstruct = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 59:boolean) -> 60:decimal(12,2), IfExprDecimal64ScalarDecimal64Column(col 61:boolean, decimal64Val 0, decimalVal 0, col 7:decimal(10,2)/DECIMAL_64)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 61:boolean) -> 62:decimal(10,2)/DECIMAL_64, IfExprDecimal64ColumnDecimal64Scalar(col 63:boolean, col 7:decimal(10,2)/DECIMAL_64, decimal64Val 0, decimalVal 0)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 63:boolean) -> 64:decimal(10,2)/DECIMAL_64, IfExprTimestampColumnColumn(col 65:boolean, col 66:timestampcol 67:timestamp)(children: LongColGreaterLongScalar(col 1:int, val 30) -> 65:boolean, CastDateToTimestamp(col 12:date) -> 66:timestamp, CastDateToTimestamp(col 11:date) -> 67:timestamp) -> 68:timestamp, IfExprColumnNull(col 69:boolean, col 70:int, null)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 69:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 70:int) -> 71:int, IfExprNullColumn(col 72:boolean, null, col 73)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 72:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 73:int) -> 74:int, IfExprLongScalarLongScalar(col 76:boolean, val 14245, val 14609)(children: LongColGreaterLongScalar(col 75:int, val 100)(children: LongColModuloLongScalar(col 2:int, val 500) -> 75:int) -> 76:boolean) -> 77:date + Statistics: Num rows: 101 Data size: 118604 Basic stats: COMPLETE Column stats: COMPLETE +======= selectExpressions: IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 23:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 22:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean, val Somecol 21:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, IfExprStringScalarStringScalar(col 20:boolean, val Many, val Huge number)(children: LongColLessLongScalar(col 4:int, val 100) -> 20:boolean) -> 21:string) -> 22:string) -> 23:string) -> 24:string, IfExprStringScalarStringGroupColumn(col 25:boolean, val Singlecol 32:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 25:boolean, IfExprStringScalarStringGroupColumn(col 26:boolean, val Twocol 31:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 26:boolean, IfExprStringScalarStringGroupColumn(col 27:boolean, val Somecol 30:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 27:boolean, IfExprColumnNull(col 28:boolean, col 29:string, null)(children: LongColLessLongScalar(col 4:int, val 100) -> 28:boolean, ConstantVectorExpression(val Many) -> 29:string) -> 30:string) -> 31:string) -> 32:string) -> 33:string, IfExprStringScalarStringGroupColumn(col 34:boolean, val Singlecol 39:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 34:boolean, IfExprStringScalarStringGroupColumn(col 35:boolean, val Twocol 38:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 35:boolean, IfExprColumnNull(col 36:boolean, col 37:string, null)(children: LongColLessLongScalar(col 4:int, val 10) -> 36:boolean, ConstantVectorExpression(val Some) -> 37:string) -> 38:string) -> 39:string) -> 40:string, IfExprLongColumnLongColumn(col 41:boolean, col 42:date, col 43:date)(children: StringGroupColEqualCharScalar(col 14:char(10), val SHIP) -> 41:boolean, VectorUDFDateAddColScalar(col 10:date, val 10) -> 42:date, VectorUDFDateAddColScalar(col 10:date, val 5) -> 43:date) -> 44:date, IfExprDoubleColumnDoubleScalar(col 45:boolean, col 47:double, val 0.0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 45:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 46:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 46:double) -> 47:double) -> 48:double, IfExprDoubleColumnDoubleScalar(col 49:boolean, col 51:double, val 0.0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 49:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 50:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 50:double) -> 51:double) -> 52:double, IfExprNullColumn(col 53:boolean, null, col 7)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 53:boolean, col 7:decimal(10,2)/DECIMAL_64) -> 54:decimal(10,2)/DECIMAL_64, IfExprColumnNull(col 55:boolean, col 7:decimal(10,2)/DECIMAL_64, null)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 55:boolean, col 7:decimal(10,2)/DECIMAL_64) -> 56:decimal(10,2)/DECIMAL_64, VectorUDFAdaptor(if((l_shipinstruct = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 57:boolean) -> 58:decimal(12,2), VectorUDFAdaptor(if((l_shipinstruct = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 59:boolean) -> 60:decimal(12,2), IfExprDecimal64ScalarDecimal64Column(col 61:boolean, decimal64Val 0, decimalVal 0, col 7:decimal(10,2)/DECIMAL_64)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 61:boolean) -> 62:decimal(10,2)/DECIMAL_64, IfExprDecimal64ColumnDecimal64Scalar(col 63:boolean, col 7:decimal(10,2)/DECIMAL_64, decimal64Val 0, decimalVal 0)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 63:boolean) -> 64:decimal(10,2)/DECIMAL_64, IfExprTimestampColumnColumn(col 65:boolean, col 66:timestampcol 67:timestamp)(children: LongColGreaterLongScalar(col 1:int, val 30) -> 65:boolean, CastDateToTimestamp(col 12:date) -> 66:timestamp, CastDateToTimestamp(col 11:date) -> 67:timestamp) -> 68:timestamp, IfExprColumnNull(col 69:boolean, col 70:int, null)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 69:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 70:int) -> 71:int, IfExprNullColumn(col 72:boolean, null, col 73)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 72:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 73:int) -> 74:int, IfExprLongScalarLongScalar(col 76:boolean, val 14245, val 14609)(children: LongColGreaterLongScalar(col 75:int, val 100)(children: LongColModuloLongScalar(col 2:int, val 500) -> 75:int) -> 76:boolean) -> 77:date Statistics: Num rows: 101 Data size: 141804 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 101 Data size: 141804 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 101 Data size: 118604 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -878,14 +883,19 @@ className: VectorSelectOperator native: true projectedOutputColumnNums: [4, 27, 39, 48, 52, 57, 62, 64, 66, 71, 76, 78, 80, 84, 87, 90, 93] +<<<<<<< HEAD + selectExpressions: IfExprColumnCondExpr(col 17:boolean, col 18:stringcol 26:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, ConstantVectorExpression(val Single) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 25:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 19:boolean, ConstantVectorExpression(val Two) -> 20:string, IfExprColumnCondExpr(col 21:boolean, col 22:stringcol 24:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 21:boolean, ConstantVectorExpression(val Some) -> 22:string, IfExprStringScalarStringScalar(col 23:boolean, val Many, val Huge number)(children: LongColLessLongScalar(col 4:int, val 100) -> 23:boolean) -> 24:string) -> 25:string) -> 26:string) -> 27:string, IfExprColumnCondExpr(col 28:boolean, col 29:stringcol 38:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 28:boolean, ConstantVectorExpression(val Single) -> 29:string, IfExprColumnCondExpr(col 30:boolean, col 31:stringcol 37:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 30:boolean, ConstantVectorExpression(val Two) -> 31:string, IfExprColumnCondExpr(col 32:boolean, col 33:stringcol 36:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 32:boolean, ConstantVectorExpression(val Some) -> 33:string, IfExprColumnNull(col 34:boolean, col 35:string, null)(children: LongColLessLongScalar(col 4:int, val 100) -> 34:boolean, ConstantVectorExpression(val Many) -> 35:string) -> 36:string) -> 37:string) -> 38:string) -> 39:string, IfExprColumnCondExpr(col 40:boolean, col 41:stringcol 47:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 40:boolean, ConstantVectorExpression(val Single) -> 41:string, IfExprColumnCondExpr(col 42:boolean, col 43:stringcol 46:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 42:boolean, ConstantVectorExpression(val Two) -> 43:string, IfExprColumnNull(col 44:boolean, col 45:string, null)(children: LongColLessLongScalar(col 4:int, val 10) -> 44:boolean, ConstantVectorExpression(val Some) -> 45:string) -> 46:string) -> 47:string) -> 48:string, IfExprCondExprCondExpr(col 49:boolean, col 50:datecol 51:date)(children: StringGroupColEqualCharScalar(col 14:char(10), val SHIP) -> 49:boolean, VectorUDFDateAddColScalar(col 10:date, val 10) -> 50:date, VectorUDFDateAddColScalar(col 10:date, val 5) -> 51:date) -> 52:date, IfExprCondExprColumn(col 53:boolean, col 55:double, col 56:double)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 53:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 54:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 54:double) -> 55:double, ConstantVectorExpression(val 0.0) -> 56:double) -> 57:double, IfExprCondExprColumn(col 58:boolean, col 60:double, col 61:double)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 58:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 59:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 59:double) -> 60:double, ConstantVectorExpression(val 0.0) -> 61:double) -> 62:double, IfExprNullColumn(col 63:boolean, null, col 94)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 63:boolean, ConvertDecimal64ToDecimal(col 7:decimal(10,2)/DECIMAL_64) -> 94:decimal(10,2)) -> 64:decimal(10,2), IfExprColumnNull(col 65:boolean, col 95:decimal(10,2), null)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 65:boolean, ConvertDecimal64ToDecimal(col 7:decimal(10,2)/DECIMAL_64) -> 95:decimal(10,2)) -> 66:decimal(10,2), VectorUDFAdaptor(if((l_shipinstruct = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 70:boolean) -> 71:decimal(12,2), VectorUDFAdaptor(if((l_shipinstruct = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 75:boolean) -> 76:decimal(12,2), IfExprDecimal64ScalarDecimal64Column(col 77:boolean, decimal64Val 0, decimalVal 0, col 7:decimal(10,2)/DECIMAL_64)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 77:boolean) -> 78:decimal(10,2)/DECIMAL_64, IfExprDecimal64ColumnDecimal64Scalar(col 79:boolean, col 7:decimal(10,2)/DECIMAL_64, decimal64Val 0, decimalVal 0)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 79:boolean) -> 80:decimal(10,2)/DECIMAL_64, IfExprCondExprCondExpr(col 81:boolean, col 82:timestampcol 83:timestamp)(children: LongColGreaterLongScalar(col 1:int, val 30) -> 81:boolean, CastDateToTimestamp(col 12:date) -> 82:timestamp, CastDateToTimestamp(col 11:date) -> 83:timestamp) -> 84:timestamp, IfExprCondExprNull(col 85:boolean, col 86:int, null)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 85:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 86:int) -> 87:int, IfExprNullCondExpr(col 88:boolean, null, col 89:int)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 88:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 89:int) -> 90:int, IfExprLongScalarLongScalar(col 92:boolean, val 14245, val 14609)(children: LongColGreaterLongScalar(col 91:int, val 100)(children: LongColModuloLongScalar(col 2:int, val 500) -> 91:int) -> 92:boolean) -> 93:date + Statistics: Num rows: 101 Data size: 118604 Basic stats: COMPLETE Column stats: COMPLETE +======= selectExpressions: IfExprColumnCondExpr(col 17:boolean, col 18:stringcol 26:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, ConstantVectorExpression(val Single) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 25:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 19:boolean, ConstantVectorExpression(val Two) -> 20:string, IfExprColumnCondExpr(col 21:boolean, col 22:stringcol 24:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 21:boolean, ConstantVectorExpression(val Some) -> 22:string, IfExprStringScalarStringScalar(col 23:boolean, val Many, val Huge number)(children: LongColLessLongScalar(col 4:int, val 100) -> 23:boolean) -> 24:string) -> 25:string) -> 26:string) -> 27:string, IfExprColumnCondExpr(col 28:boolean, col 29:stringcol 38:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 28:boolean, ConstantVectorExpression(val Single) -> 29:string, IfExprColumnCondExpr(col 30:boolean, col 31:stringcol 37:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 30:boolean, ConstantVectorExpression(val Two) -> 31:string, IfExprColumnCondExpr(col 32:boolean, col 33:stringcol 36:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 32:boolean, ConstantVectorExpression(val Some) -> 33:string, IfExprColumnNull(col 34:boolean, col 35:string, null)(children: LongColLessLongScalar(col 4:int, val 100) -> 34:boolean, ConstantVectorExpression(val Many) -> 35:string) -> 36:string) -> 37:string) -> 38:string) -> 39:string, IfExprColumnCondExpr(col 40:boolean, col 41:stringcol 47:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 40:boolean, ConstantVectorExpression(val Single) -> 41:string, IfExprColumnCondExpr(col 42:boolean, col 43:stringcol 46:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 42:boolean, ConstantVectorExpression(val Two) -> 43:string, IfExprColumnNull(col 44:boolean, col 45:string, null)(children: LongColLessLongScalar(col 4:int, val 10) -> 44:boolean, ConstantVectorExpression(val Some) -> 45:string) -> 46:string) -> 47:string) -> 48:string, IfExprCondExprCondExpr(col 49:boolean, col 50:datecol 51:date)(children: StringGroupColEqualCharScalar(col 14:char(10), val SHIP) -> 49:boolean, VectorUDFDateAddColScalar(col 10:date, val 10) -> 50:date, VectorUDFDateAddColScalar(col 10:date, val 5) -> 51:date) -> 52:date, IfExprCondExprColumn(col 53:boolean, col 55:double, col 56:double)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 53:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 54:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 54:double) -> 55:double, ConstantVectorExpression(val 0.0) -> 56:double) -> 57:double, IfExprCondExprColumn(col 58:boolean, col 60:double, col 61:double)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 58:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 59:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 59:double) -> 60:double, ConstantVectorExpression(val 0.0) -> 61:double) -> 62:double, IfExprNullColumn(col 63:boolean, null, col 7)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 63:boolean, col 7:decimal(10,2)/DECIMAL_64) -> 64:decimal(10,2)/DECIMAL_64, IfExprColumnNull(col 65:boolean, col 7:decimal(10,2)/DECIMAL_64, null)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 65:boolean, col 7:decimal(10,2)/DECIMAL_64) -> 66:decimal(10,2)/DECIMAL_64, VectorUDFAdaptor(if((l_shipinstruct = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 70:boolean) -> 71:decimal(12,2), VectorUDFAdaptor(if((l_shipinstruct = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 75:boolean) -> 76:decimal(12,2), IfExprDecimal64ScalarDecimal64Column(col 77:boolean, decimal64Val 0, decimalVal 0, col 7:decimal(10,2)/DECIMAL_64)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 77:boolean) -> 78:decimal(10,2)/DECIMAL_64, IfExprDecimal64ColumnDecimal64Scalar(col 79:boolean, col 7:decimal(10,2)/DECIMAL_64, decimal64Val 0, decimalVal 0)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 79:boolean) -> 80:decimal(10,2)/DECIMAL_64, IfExprCondExprCondExpr(col 81:boolean, col 82:timestampcol 83:timestamp)(children: LongColGreaterLongScalar(col 1:int, val 30) -> 81:boolean, CastDateToTimestamp(col 12:date) -> 82:timestamp, CastDateToTimestamp(col 11:date) -> 83:timestamp) -> 84:timestamp, IfExprCondExprNull(col 85:boolean, col 86:int, null)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 85:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 86:int) -> 87:int, IfExprNullCondExpr(col 88:boolean, null, col 89:int)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 88:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 89:int) -> 90:int, IfExprLongScalarLongScalar(col 92:boolean, val 14245, val 14609)(children: LongColGreaterLongScalar(col 91:int, val 100)(children: LongColModuloLongScalar(col 2:int, val 500) -> 91:int) -> 92:boolean) -> 93:date Statistics: Num rows: 101 Data size: 141804 Basic stats: COMPLETE Column stats: COMPLETE +>>>>>>> apache/master File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 101 Data size: 141804 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 101 Data size: 118604 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/vector_case_when_2.q.out ql/src/test/results/clientpositive/vector_case_when_2.q.out index 288e6f0..a7b46fd 100644 --- ql/src/test/results/clientpositive/vector_case_when_2.q.out +++ ql/src/test/results/clientpositive/vector_case_when_2.q.out @@ -145,7 +145,7 @@ native: true projectedOutputColumnNums: [1, 3, 9, 14, 18, 22, 24, 27, 32, 38, 2] selectExpressions: VectorUDFAdaptor(CASE WHEN ((ctimestamp2 <= TIMESTAMP'1800-12-31 00:00:00')) THEN ('1800s or Earlier') WHEN ((ctimestamp2 < TIMESTAMP'1900-01-01 00:00:00')) THEN ('1900s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN ('Early 2010s') ELSE ('Unknown') END)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 1800-12-31 00:00:00) -> 5:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00) -> 6:boolean, TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 7:boolean, TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 8:boolean) -> 9:string, VectorUDFAdaptor(CASE WHEN ((ctimestamp2 <= TIMESTAMP'2000-12-31 23:59:59.999999999')) THEN ('Old') WHEN ((ctimestamp2 < TIMESTAMP'2006-01-01 00:00:00')) THEN ('Early 2000s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN ('Early 2010s') ELSE (null) END)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 10:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 11:boolean, TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 12:boolean, TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 13:boolean) -> 14:string, VectorUDFAdaptor(CASE WHEN ((ctimestamp2 <= TIMESTAMP'2000-12-31 23:59:59.999999999')) THEN ('Old') WHEN ((ctimestamp2 < TIMESTAMP'2006-01-01 00:00:00')) THEN ('Early 2000s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') ELSE (null) END)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 15:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 16:boolean, TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 17:boolean) -> 18:string, IfExprLongColumnLongColumn(col 19:boolean, col 20:int, col 21:int)(children: TimestampColLessTimestampScalar(col 1:timestamp, val 1974-10-04 17:21:03.989) -> 19:boolean, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 20:int, VectorUDFYearTimestamp(col 3:timestamp, field YEAR) -> 21:int) -> 22:int, VectorUDFAdaptor(CASE WHEN ((stimestamp1 like '%19%')) THEN (stimestamp1) ELSE ('2018-03-08 23:04:59') END)(children: SelectStringColLikeStringScalar(col 2:string) -> 23:boolean) -> 24:string, VectorUDFAdaptor(if((ctimestamp1 = TIMESTAMP'2021-09-24 03:18:32.413655165'), null, minute(ctimestamp1)))(children: TimestampColEqualTimestampScalar(col 1:timestamp, val 2021-09-24 03:18:32.413655165) -> 25:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 26:int) -> 27:int, VectorUDFAdaptor(if(((ctimestamp2 >= TIMESTAMP'5344-10-04 18:40:08.165') and (ctimestamp2 < TIMESTAMP'6631-11-13 16:31:29.702202248')), minute(ctimestamp1), null))(children: ColAndCol(col 28:boolean, col 29:boolean)(children: TimestampColGreaterEqualTimestampScalar(col 3:timestamp, val 5344-10-04 18:40:08.165) -> 28:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 6631-11-13 16:31:29.702202248) -> 29:boolean) -> 30:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 31:int) -> 32:int, IfExprLongColumnLongColumn(col 35:boolean, col 36:date, col 37:date)(children: DoubleColGreaterDoubleScalar(col 34:double, val 100.0)(children: DoubleColModuloDoubleScalar(col 33:double, val 500.0)(children: CastTimestampToDouble(col 1:timestamp) -> 33:double) -> 34:double) -> 35:boolean, VectorUDFDateAddColScalar(col 0:date, val 1) -> 36:date, VectorUDFDateAddColScalar(col 0:date, val 365) -> 37:date) -> 38:date - Statistics: Num rows: 51 Data size: 50745 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 50345 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp), _col10 (type: string), _col1 (type: timestamp) null sort order: zzz @@ -155,7 +155,7 @@ native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 51 Data size: 50745 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 50345 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: date) Execution mode: vectorized Map Vectorization: @@ -181,10 +181,10 @@ Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey2 (type: timestamp), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 51 Data size: 45084 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 44684 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 51 Data size: 45084 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 44684 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -409,7 +409,7 @@ native: true projectedOutputColumnNums: [1, 3, 12, 21, 28, 32, 34, 37, 42, 48, 2] selectExpressions: IfExprStringScalarStringGroupColumn(col 5:boolean, val 1800s or Earliercol 11:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 1800-12-31 00:00:00) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val 1900scol 10:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 9:string)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 7:boolean, IfExprStringScalarStringScalar(col 8:boolean, val Early 2010s, val Unknown)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 8:boolean) -> 9:string) -> 10:string) -> 11:string) -> 12:string, IfExprStringScalarStringGroupColumn(col 13:boolean, val Oldcol 20:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 13:boolean, IfExprStringScalarStringGroupColumn(col 14:boolean, val Early 2000scol 19:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 14:boolean, IfExprStringScalarStringGroupColumn(col 15:boolean, val Late 2000scol 18:string)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 15:boolean, IfExprColumnNull(col 16:boolean, col 17:string, null)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 16:boolean, ConstantVectorExpression(val Early 2010s) -> 17:string) -> 18:string) -> 19:string) -> 20:string) -> 21:string, IfExprStringScalarStringGroupColumn(col 22:boolean, val Oldcol 27:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 22:boolean, IfExprStringScalarStringGroupColumn(col 23:boolean, val Early 2000scol 26:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 23:boolean, IfExprColumnNull(col 24:boolean, col 25:string, null)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 24:boolean, ConstantVectorExpression(val Late 2000s) -> 25:string) -> 26:string) -> 27:string) -> 28:string, IfExprLongColumnLongColumn(col 29:boolean, col 30:int, col 31:int)(children: TimestampColLessTimestampScalar(col 1:timestamp, val 1974-10-04 17:21:03.989) -> 29:boolean, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 30:int, VectorUDFYearTimestamp(col 3:timestamp, field YEAR) -> 31:int) -> 32:int, IfExprStringGroupColumnStringScalar(col 33:boolean, col 2:string, val 2018-03-08 23:04:59)(children: SelectStringColLikeStringScalar(col 2:string) -> 33:boolean) -> 34:string, IfExprNullColumn(col 35:boolean, null, col 36)(children: TimestampColEqualTimestampScalar(col 1:timestamp, val 2021-09-24 03:18:32.413655165) -> 35:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 36:int) -> 37:int, IfExprColumnNull(col 40:boolean, col 41:int, null)(children: ColAndCol(col 38:boolean, col 39:boolean)(children: TimestampColGreaterEqualTimestampScalar(col 3:timestamp, val 5344-10-04 18:40:08.165) -> 38:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 6631-11-13 16:31:29.702202248) -> 39:boolean) -> 40:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 41:int) -> 42:int, IfExprLongColumnLongColumn(col 45:boolean, col 46:date, col 47:date)(children: DoubleColGreaterDoubleScalar(col 44:double, val 100.0)(children: DoubleColModuloDoubleScalar(col 43:double, val 500.0)(children: CastTimestampToDouble(col 1:timestamp) -> 43:double) -> 44:double) -> 45:boolean, VectorUDFDateAddColScalar(col 0:date, val 1) -> 46:date, VectorUDFDateAddColScalar(col 0:date, val 365) -> 47:date) -> 48:date - Statistics: Num rows: 51 Data size: 50745 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 50345 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp), _col10 (type: string), _col1 (type: timestamp) null sort order: zzz @@ -419,7 +419,7 @@ native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 51 Data size: 50745 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 50345 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: date) Execution mode: vectorized Map Vectorization: @@ -445,10 +445,10 @@ Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey2 (type: timestamp), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 51 Data size: 45084 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 44684 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 51 Data size: 45084 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 44684 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -673,7 +673,7 @@ native: true projectedOutputColumnNums: [1, 3, 15, 27, 36, 40, 42, 45, 50, 56, 2] selectExpressions: IfExprColumnCondExpr(col 5:boolean, col 6:stringcol 14:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 1800-12-31 00:00:00) -> 5:boolean, ConstantVectorExpression(val 1800s or Earlier) -> 6:string, IfExprColumnCondExpr(col 7:boolean, col 8:stringcol 13:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00) -> 7:boolean, ConstantVectorExpression(val 1900s) -> 8:string, IfExprColumnCondExpr(col 9:boolean, col 10:stringcol 12:string)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 9:boolean, ConstantVectorExpression(val Late 2000s) -> 10:string, IfExprStringScalarStringScalar(col 11:boolean, val Early 2010s, val Unknown)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 11:boolean) -> 12:string) -> 13:string) -> 14:string) -> 15:string, IfExprColumnCondExpr(col 16:boolean, col 17:stringcol 26:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 16:boolean, ConstantVectorExpression(val Old) -> 17:string, IfExprColumnCondExpr(col 18:boolean, col 19:stringcol 25:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 18:boolean, ConstantVectorExpression(val Early 2000s) -> 19:string, IfExprColumnCondExpr(col 20:boolean, col 21:stringcol 24:string)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 20:boolean, ConstantVectorExpression(val Late 2000s) -> 21:string, IfExprColumnNull(col 22:boolean, col 23:string, null)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 22:boolean, ConstantVectorExpression(val Early 2010s) -> 23:string) -> 24:string) -> 25:string) -> 26:string) -> 27:string, IfExprColumnCondExpr(col 28:boolean, col 29:stringcol 35:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 28:boolean, ConstantVectorExpression(val Old) -> 29:string, IfExprColumnCondExpr(col 30:boolean, col 31:stringcol 34:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 30:boolean, ConstantVectorExpression(val Early 2000s) -> 31:string, IfExprColumnNull(col 32:boolean, col 33:string, null)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 32:boolean, ConstantVectorExpression(val Late 2000s) -> 33:string) -> 34:string) -> 35:string) -> 36:string, IfExprCondExprCondExpr(col 37:boolean, col 38:intcol 39:int)(children: TimestampColLessTimestampScalar(col 1:timestamp, val 1974-10-04 17:21:03.989) -> 37:boolean, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 38:int, VectorUDFYearTimestamp(col 3:timestamp, field YEAR) -> 39:int) -> 40:int, IfExprStringGroupColumnStringScalar(col 41:boolean, col 2:string, val 2018-03-08 23:04:59)(children: SelectStringColLikeStringScalar(col 2:string) -> 41:boolean) -> 42:string, IfExprNullCondExpr(col 43:boolean, null, col 44:int)(children: TimestampColEqualTimestampScalar(col 1:timestamp, val 2021-09-24 03:18:32.413655165) -> 43:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 44:int) -> 45:int, IfExprCondExprNull(col 48:boolean, col 49:int, null)(children: ColAndCol(col 46:boolean, col 47:boolean)(children: TimestampColGreaterEqualTimestampScalar(col 3:timestamp, val 5344-10-04 18:40:08.165) -> 46:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 6631-11-13 16:31:29.702202248) -> 47:boolean) -> 48:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 49:int) -> 50:int, IfExprCondExprCondExpr(col 53:boolean, col 54:datecol 55:date)(children: DoubleColGreaterDoubleScalar(col 52:double, val 100.0)(children: DoubleColModuloDoubleScalar(col 51:double, val 500.0)(children: CastTimestampToDouble(col 1:timestamp) -> 51:double) -> 52:double) -> 53:boolean, VectorUDFDateAddColScalar(col 0:date, val 1) -> 54:date, VectorUDFDateAddColScalar(col 0:date, val 365) -> 55:date) -> 56:date - Statistics: Num rows: 51 Data size: 50745 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 50345 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp), _col10 (type: string), _col1 (type: timestamp) null sort order: zzz @@ -683,7 +683,7 @@ native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 51 Data size: 50745 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 50345 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: date) Execution mode: vectorized Map Vectorization: @@ -709,10 +709,10 @@ Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey2 (type: timestamp), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 51 Data size: 45084 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 44684 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 51 Data size: 45084 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 44684 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/vector_groupby4.q.out ql/src/test/results/clientpositive/vector_groupby4.q.out index 925b96c..8ea182a 100644 --- ql/src/test/results/clientpositive/vector_groupby4.q.out +++ ql/src/test/results/clientpositive/vector_groupby4.q.out @@ -90,7 +90,7 @@ keys: KEY._col0 (type: string) mode: partial1 outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -114,7 +114,7 @@ native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Map Vectorization: enabled: true @@ -134,10 +134,10 @@ keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0 - Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -146,7 +146,7 @@ Select Operator expressions: _col0 (type: string) outputColumnNames: c1 - Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -187,7 +187,7 @@ native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE value expressions: c1 (type: string) Execution mode: vectorized Map Vectorization: diff --git ql/src/test/results/clientpositive/vector_groupby6.q.out ql/src/test/results/clientpositive/vector_groupby6.q.out index b478656..2cba267 100644 --- ql/src/test/results/clientpositive/vector_groupby6.q.out +++ ql/src/test/results/clientpositive/vector_groupby6.q.out @@ -90,7 +90,7 @@ keys: KEY._col0 (type: string) mode: partial1 outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -114,7 +114,7 @@ native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Map Vectorization: enabled: true @@ -134,10 +134,10 @@ keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0 - Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -146,7 +146,7 @@ Select Operator expressions: _col0 (type: string) outputColumnNames: c1 - Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -187,7 +187,7 @@ native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE value expressions: c1 (type: string) Execution mode: vectorized Map Vectorization: diff --git ql/src/test/results/clientpositive/vector_if_expr.q.out ql/src/test/results/clientpositive/vector_if_expr.q.out index 58c2e1e..292e8c6 100644 --- ql/src/test/results/clientpositive/vector_if_expr.q.out +++ ql/src/test/results/clientpositive/vector_if_expr.q.out @@ -41,7 +41,7 @@ native: true projectedOutputColumnNums: [10, 13] selectExpressions: IfExprStringScalarStringScalar(col 10:boolean, val first, val second) -> 13:string - Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4587 Data size: 426534 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean) null sort order: z @@ -51,7 +51,7 @@ native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4587 Data size: 426534 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: vectorized Map Vectorization: @@ -71,10 +71,10 @@ Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4587 Data size: 426534 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4587 Data size: 426534 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/vector_nvl.q.out ql/src/test/results/clientpositive/vector_nvl.q.out index 43ca0ec..26bae3f 100644 --- ql/src/test/results/clientpositive/vector_nvl.q.out +++ ql/src/test/results/clientpositive/vector_nvl.q.out @@ -140,19 +140,19 @@ native: true projectedOutputColumnNums: [4, 14] selectExpressions: VectorCoalesce(columns [4, 13])(children: col 4:float, ConstantVectorExpression(val 1.0) -> 13:float) -> 14:float - Statistics: Num rows: 12288 Data size: 85848 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 73392 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 10 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 10 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/vectorization_multi_value.q.out ql/src/test/results/clientpositive/vectorization_multi_value.q.out index 2fec50d..b4507fb 100644 --- ql/src/test/results/clientpositive/vectorization_multi_value.q.out +++ ql/src/test/results/clientpositive/vectorization_multi_value.q.out @@ -64,13 +64,13 @@ native: true projectedOutputColumnNums: [4] selectExpressions: IfExprCondExprNull(col 2:boolean, col 3:map, null)(children: ConstantVectorExpression(val 1) -> 2:boolean, VectorUDFAdaptor(map('a':'b')) -> 3:map) -> 4:map - Statistics: Num rows: 3 Data size: 978 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 326 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 3 Data size: 978 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 326 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -143,13 +143,13 @@ native: true projectedOutputColumnNums: [5] selectExpressions: IfExprCondExprNull(col 2:boolean, col 4:map>, null)(children: ConstantVectorExpression(val 1) -> 2:boolean, VectorUDFAdaptor(map('a':map('b':'c')))(children: VectorUDFAdaptor(map('b':'c')) -> 3:map) -> 4:map>) -> 5:map> - Statistics: Num rows: 3 Data size: 723 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 241 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 3 Data size: 723 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 241 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -222,13 +222,13 @@ native: true projectedOutputColumnNums: [4] selectExpressions: IfExprCondExprNull(col 2:boolean, col 3:map, null)(children: ConstantVectorExpression(val 1) -> 2:boolean, VectorUDFAdaptor(map('a':a)) -> 3:map) -> 4:map - Statistics: Num rows: 3 Data size: 2760 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 920 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 3 Data size: 2760 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 920 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -301,13 +301,13 @@ native: true projectedOutputColumnNums: [5] selectExpressions: IfExprCondExprNull(col 2:boolean, col 4:map>, null)(children: ConstantVectorExpression(val 1) -> 2:boolean, VectorUDFAdaptor(map('a':map('b':a)))(children: VectorUDFAdaptor(map('b':a)) -> 3:map) -> 4:map>) -> 5:map> - Statistics: Num rows: 3 Data size: 4968 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1656 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 3 Data size: 4968 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1656 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -380,13 +380,13 @@ native: true projectedOutputColumnNums: [4] selectExpressions: IfExprCondExprNull(col 2:boolean, col 3:array, null)(children: ConstantVectorExpression(val 1) -> 2:boolean, VectorUDFAdaptor(array('a','b')) -> 3:array) -> 4:array - Statistics: Num rows: 3 Data size: 1224 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 3 Data size: 1224 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -459,13 +459,13 @@ native: true projectedOutputColumnNums: [6] selectExpressions: IfExprCondExprNull(col 2:boolean, col 5:array>, null)(children: ConstantVectorExpression(val 1) -> 2:boolean, VectorUDFAdaptor(array(array('a','b'),array('c','d')))(children: VectorUDFAdaptor(array('a','b')) -> 3:array, VectorUDFAdaptor(array('c','d')) -> 4:array) -> 5:array>) -> 6:array> - Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -538,13 +538,13 @@ native: true projectedOutputColumnNums: [4] selectExpressions: IfExprCondExprNull(col 2:boolean, col 3:array, null)(children: ConstantVectorExpression(val 1) -> 2:boolean, VectorUDFAdaptor(array('a',a)) -> 3:array) -> 4:array - Statistics: Num rows: 3 Data size: 5760 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 3 Data size: 5760 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -617,13 +617,13 @@ native: true projectedOutputColumnNums: [6] selectExpressions: IfExprCondExprNull(col 2:boolean, col 5:array>, null)(children: ConstantVectorExpression(val 1) -> 2:boolean, VectorUDFAdaptor(array(array('a',a),array('b','c')))(children: VectorUDFAdaptor(array('a',a)) -> 3:array, VectorUDFAdaptor(array('b','c')) -> 4:array) -> 5:array>) -> 6:array> - Statistics: Num rows: 3 Data size: 57840 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 19280 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 3 Data size: 57840 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 19280 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/vectorized_string_funcs.q.out ql/src/test/results/clientpositive/vectorized_string_funcs.q.out index 75da191..2c0504a 100644 --- ql/src/test/results/clientpositive/vectorized_string_funcs.q.out +++ ql/src/test/results/clientpositive/vectorized_string_funcs.q.out @@ -68,10 +68,10 @@ Select Operator expressions: substr(cstring1, 1, 2) (type: string), substr(cstring1, 2) (type: string), lower(cstring1) (type: string), upper(cstring1) (type: string), upper(cstring1) (type: string), length(cstring1) (type: int), trim(cstring1) (type: string), ltrim(cstring1) (type: string), rtrim(cstring1) (type: string), concat(cstring1, cstring2) (type: string), concat('>', cstring1) (type: string), concat(cstring1, '<') (type: string), concat(substr(cstring1, 1, 2), substr(cstring2, 1, 2)) (type: string) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 1024 Data size: 2265088 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1024 Data size: 2024426 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1024 Data size: 2265088 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1024 Data size: 2024426 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out index 907edb6..c94eb90 100644 --- ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out @@ -268,7 +268,7 @@ native: true projectedOutputColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 0, 1, 3, 13, 14, 15, 16, 17] selectExpressions: VectorUDFUnixTimeStampTimestamp(col 1:timestamp) -> 5:bigint, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 6:int, VectorUDFMonthTimestamp(col 1:timestamp, field MONTH) -> 7:int, VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 8:int, VectorUDFWeekOfYearTimestamp(col 1:timestamp, field WEEK_OF_YEAR) -> 9:int, VectorUDFHourTimestamp(col 1:timestamp, field HOUR_OF_DAY) -> 10:int, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 11:int, VectorUDFSecondTimestamp(col 1:timestamp, field SECOND) -> 12:int, IfExprTimestampColumnScalar(col 0:boolean, col 1:timestamp, val 1319-01-25 08:31:57.778) -> 13:timestamp, IfExprTimestampScalarColumn(col 0:boolean, val 2000-12-18 00:42:30.0005, col 1:timestamp) -> 14:timestamp, IfExprTimestampColumnColumn(col 0:boolean, col 1:timestampcol 3:timestamp) -> 15:timestamp, IfExprColumnNull(col 0:boolean, col 1:timestamp, null)(children: col 0:boolean, col 1:timestamp) -> 16:timestamp, IfExprNullColumn(col 0:boolean, null, col 3)(children: col 0:boolean, col 3:timestamp) -> 17:timestamp - Statistics: Num rows: 52 Data size: 16436 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 11916 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z @@ -278,7 +278,7 @@ native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 52 Data size: 16436 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 11916 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: boolean), _col10 (type: timestamp), _col11 (type: timestamp), _col12 (type: timestamp), _col13 (type: timestamp), _col14 (type: timestamp), _col15 (type: timestamp), _col16 (type: timestamp) Execution mode: vectorized Map Vectorization: @@ -298,10 +298,10 @@ Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: boolean), VALUE._col8 (type: timestamp), VALUE._col9 (type: timestamp), VALUE._col10 (type: timestamp), VALUE._col11 (type: timestamp), VALUE._col12 (type: timestamp), VALUE._col13 (type: timestamp), VALUE._col14 (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 - Statistics: Num rows: 52 Data size: 16436 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 11916 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 52 Data size: 16436 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 11916 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat