diff --git common/src/java/org/apache/hadoop/hive/conf/HiveConf.java common/src/java/org/apache/hadoop/hive/conf/HiveConf.java index 2f695d4..83c2eb0 100644 --- common/src/java/org/apache/hadoop/hive/conf/HiveConf.java +++ common/src/java/org/apache/hadoop/hive/conf/HiveConf.java @@ -2498,6 +2498,9 @@ "higher compute cost. (NDV means the number of distinct values.). It only affects the FM-Sketch \n" + "(not the HLL algorithm which is the default), where it computes the number of necessary\n" + " bitvectors to achieve the accuracy."), + HIVE_STATS_USE_UDF_ESTIMATORS("hive.stats.use.statestimators", true, + "Statestimators are able to provide more accurate column statistic infos for UDF results."), + /** * @deprecated Use MetastoreConf.STATS_NDV_TUNER */ diff --git contrib/src/test/results/clientpositive/udaf_example_group_concat.q.out contrib/src/test/results/clientpositive/udaf_example_group_concat.q.out index 15dd4c0..509b016 100644 --- contrib/src/test/results/clientpositive/udaf_example_group_concat.q.out +++ contrib/src/test/results/clientpositive/udaf_example_group_concat.q.out @@ -39,13 +39,13 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 526000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 501250 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 526000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 501250 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: array) Reduce Operator Tree: Group By Operator @@ -53,10 +53,10 @@ keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 250 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 67250 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 67250 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index cb2d0a7..2673580 100644 --- ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -27,6 +27,7 @@ import java.util.List; import java.util.Map; import java.util.Map.Entry; +import java.util.Optional; import java.util.Set; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; @@ -73,6 +74,9 @@ import org.apache.hadoop.hive.ql.plan.Statistics; import org.apache.hadoop.hive.ql.plan.Statistics.State; import org.apache.hadoop.hive.ql.stats.BasicStats.Factory; +import org.apache.hadoop.hive.ql.stats.estimator.IStatEstimator; +import org.apache.hadoop.hive.ql.stats.estimator.IStatEstimatorProvider; +import org.apache.hadoop.hive.ql.udf.generic.GenericUDAFSum; import org.apache.hadoop.hive.ql.udf.generic.GenericUDF; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFBridge; import org.apache.hadoop.hive.ql.udf.generic.NDV; @@ -81,6 +85,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.ConstantObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector.Category; +import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector.PrimitiveCategory; import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantListObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantMapObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StandardConstantStructObjectInspector; @@ -1528,18 +1533,7 @@ return null; } } else if (end instanceof ExprNodeConstantDesc) { - - // constant projection - ExprNodeConstantDesc encd = (ExprNodeConstantDesc) end; - - colName = encd.getName(); - colType = encd.getTypeString(); - if (encd.getValue() == null) { - // null projection - numNulls = numRows; - } else { - countDistincts = 1; - } + return buildColStatForConstant(conf, numRows, (ExprNodeConstantDesc) end); } else if (end instanceof ExprNodeGenericFuncDesc) { ExprNodeGenericFuncDesc engfd = (ExprNodeGenericFuncDesc) end; colName = engfd.getName(); @@ -1560,6 +1554,27 @@ } } + if (conf.getBoolVar(ConfVars.HIVE_STATS_USE_UDF_ESTIMATORS)) { + Optional sep = engfd.getGenericUDF().adapt(IStatEstimatorProvider.class); + if (sep.isPresent()) { + Optional se = sep.get().getStatEstimator(); + if (se.isPresent()) { + List csList = new ArrayList(); + for (ExprNodeDesc child : engfd.getChildren()) { + ColStatistics cs = getColStatisticsFromExpression(conf, parentStats, child); + csList.add(cs); + } + Optional res = se.get().estimate(csList); + if (res.isPresent()) { + ColStatistics newStats = res.get(); + colType = colType.toLowerCase(); + newStats.setColumnType(colType); + newStats.setColumnName(colName); + return newStats; + } + } + } + } // fallback to default countDistincts = getNDVFor(engfd, numRows, parentStats); } else if (end instanceof ExprNodeColumnListDesc) { @@ -1590,6 +1605,43 @@ return colStats; } + private static ColStatistics buildColStatForConstant(HiveConf conf, long numRows, ExprNodeConstantDesc encd) { + + long numNulls = 0; + long countDistincts = 0; + if (encd.getValue() == null) { + // null projection + numNulls = numRows; + } else { + countDistincts = 1; + } + String colType = encd.getTypeString(); + colType = colType.toLowerCase(); + ObjectInspector oi = encd.getWritableObjectInspector(); + double avgColSize = getAvgColLenOf(conf, oi, colType); + ColStatistics colStats = new ColStatistics(encd.getName(), colType); + colStats.setAvgColLen(avgColSize); + colStats.setCountDistint(countDistincts); + colStats.setNumNulls(numNulls); + + Optional value = getLongConstValue(encd); + if (value.isPresent()) { + colStats.setRange(value.get(), value.get()); + } + return colStats; + } + + private static Optional getLongConstValue(ExprNodeConstantDesc encd) { + if (encd.getValue() != null) { + String constant = encd.getValue().toString(); + PrimitiveCategory category = GenericUDAFSum.getReturnType(encd.getTypeInfo()); + if (category == PrimitiveCategory.LONG) { + return Optional.of(Long.parseLong(constant)); + } + } + return Optional.empty(); + } + private static boolean isWideningCast(ExprNodeGenericFuncDesc engfd) { GenericUDF udf = engfd.getGenericUDF(); if (!FunctionRegistry.isOpCast(udf)) { diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/IStatEstimator.java ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/IStatEstimator.java new file mode 100644 index 0000000..a72ecd2 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/IStatEstimator.java @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.stats.estimator; + +import java.util.List; +import java.util.Optional; + +import org.apache.hadoop.hive.ql.plan.ColStatistics; + +/** + * Enables statistics related computation on UDFs + */ +public interface IStatEstimator { + + /** + * Computes the output statistics of the actual UDF. + * + * @param argStats the statistics for every argument of the UDF. + */ + public Optional estimate(List argStats); +} diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/IStatEstimatorProvider.java ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/IStatEstimatorProvider.java new file mode 100644 index 0000000..be9a934 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/IStatEstimatorProvider.java @@ -0,0 +1,31 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.stats.estimator; + +import java.util.Optional; + +/** + * Marker interface for UDFs to communicate that the usage of StatEstimators is supported by the UDF. + */ +public interface IStatEstimatorProvider { + + /** + * Returns the stat estimator for the given UDF instance. + */ + public Optional getStatEstimator(); +} diff --git ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/StatEstimators.java ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/StatEstimators.java new file mode 100644 index 0000000..05d894a --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/stats/estimator/StatEstimators.java @@ -0,0 +1,51 @@ +package org.apache.hadoop.hive.ql.stats.estimator; + +import java.util.Optional; + +import org.apache.hadoop.hive.ql.plan.ColStatistics; + +public class StatEstimators { + + public static class WorstStatCombiner { + + private boolean inited; + private ColStatistics result; + + public void add(ColStatistics stat) { + if (!inited) { + inited = true; + result = stat.clone(); + result.setRange(null); + result.setIsEstimated(true); + return; + } else { + + if (stat.getAvgColLen() > result.getAvgColLen()) { + result.setAvgColLen(stat.getAvgColLen()); + } + if (stat.getCountDistint() > result.getCountDistint()) { + result.setCountDistint(stat.getCountDistint()); + } + if (stat.getNumNulls() > result.getNumNulls()) { + result.setNumNulls(stat.getNumNulls()); + } + if (stat.getNumTrues() > result.getNumTrues()) { + result.setNumTrues(stat.getNumTrues()); + } + if (stat.getNumFalses() > result.getNumFalses()) { + result.setNumFalses(stat.getNumFalses()); + } + if (stat.isFilteredColumn()) { + result.setFilterColumn(); + } + + } + + } + public Optional getResult() { + return Optional.of(result); + + } + } + +} diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSubstr.java ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSubstr.java index 5b1964c..e20b009 100755 --- ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSubstr.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/UDFSubstr.java @@ -19,12 +19,18 @@ package org.apache.hadoop.hive.ql.udf; import java.util.Arrays; +import java.util.List; +import java.util.Optional; import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDF; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringSubstrColStart; import org.apache.hadoop.hive.ql.exec.vector.expressions.StringSubstrColStartLen; +import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.apache.hadoop.hive.ql.plan.ColStatistics.Range; +import org.apache.hadoop.hive.ql.stats.estimator.IStatEstimator; +import org.apache.hadoop.hive.ql.stats.estimator.IStatEstimatorProvider; import org.apache.hadoop.io.BytesWritable; import org.apache.hadoop.io.IntWritable; import org.apache.hadoop.io.Text; @@ -48,7 +54,7 @@ + " > SELECT _FUNC_('Facebook', 5, 1) FROM src LIMIT 1;\n" + " 'b'") @VectorizedExpressions({StringSubstrColStart.class, StringSubstrColStartLen.class}) -public class UDFSubstr extends UDF { +public class UDFSubstr extends UDF implements IStatEstimatorProvider { private final int[] index; private final Text r; @@ -131,4 +137,52 @@ public BytesWritable evaluate(BytesWritable bw, IntWritable pos){ return evaluate(bw, pos, maxValue); } + + @Override + public Optional getStatEstimator() { + return Optional.of(new SubStrStatEstimator()); + } + + private static class SubStrStatEstimator implements IStatEstimator { + + @Override + public Optional estimate(List csList) { + ColStatistics cs = csList.get(0).clone(); + + // this might bad in a skewed case; consider: + // 1 row with 1000 long string + // 99 rows with 0 length + // orig avg is 10 + // new avg is 5 (if substr(5)) ; but in reality it will stay ~10 + Optional start = getRangeWidth(csList.get(1).getRange()); + Range startRange = csList.get(1).getRange(); + if (startRange != null && startRange.minValue != null) { + double newAvgColLen = cs.getAvgColLen() - startRange.minValue.doubleValue(); + if (newAvgColLen > 0) { + cs.setAvgColLen(newAvgColLen); + } + + } + + if (csList.size() >= 2) { + Range lengthRange = csList.get(2).getRange(); + if (lengthRange != null && lengthRange.maxValue != null) { + Double w = lengthRange.maxValue.doubleValue(); + if (cs.getAvgColLen() > w) { + cs.setAvgColLen(w); + } + } + } + + return Optional.of(cs); + } + + private Optional getRangeWidth(Range range) { + if (range.minValue != null && range.maxValue != null) { + return Optional.of(range.maxValue.doubleValue() - range.minValue.doubleValue()); + } + return Optional.empty(); + } + + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java index 6597f4b..c1bf325 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDF.java @@ -20,6 +20,7 @@ import java.io.Closeable; import java.io.IOException; +import java.util.Optional; import org.apache.hadoop.hive.common.classification.InterfaceAudience; import org.apache.hadoop.hive.common.classification.InterfaceStability; @@ -638,4 +639,12 @@ return i + ORDINAL_SUFFIXES[i % 10]; } } + + @SuppressWarnings("unchecked") + public Optional adapt(Class clazz) { + if (clazz.isInstance(this)) { + return Optional.of((T) this); + } + return Optional.empty(); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBridge.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBridge.java index 7a644fc..377ab37 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBridge.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFBridge.java @@ -21,6 +21,7 @@ import java.io.Serializable; import java.lang.reflect.Method; import java.util.ArrayList; +import java.util.Optional; import org.apache.hadoop.hive.common.type.HiveDecimal; import org.apache.hadoop.hive.ql.exec.FunctionRegistry; @@ -28,6 +29,8 @@ import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.Utilities; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.stats.estimator.IStatEstimator; +import org.apache.hadoop.hive.ql.stats.estimator.IStatEstimatorProvider; import org.apache.hadoop.hive.ql.udf.generic.GenericUDFUtils.ConversionHelper; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; @@ -44,7 +47,7 @@ * UDF class needs to be serialized with the plan. * */ -public class GenericUDFBridge extends GenericUDF implements Serializable { +public class GenericUDFBridge extends GenericUDF implements Serializable, IStatEstimatorProvider { private static final long serialVersionUID = 4994861742809511113L; /** @@ -97,7 +100,7 @@ this.isOperator = isOperator; this.udfClassName = udfClassName; } - + // For Java serialization only public GenericUDFBridge() { } @@ -151,7 +154,7 @@ public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException { try { - udf = (UDF)getUdfClassInternal().newInstance(); + udf = getUdfClassInternal().newInstance(); } catch (Exception e) { throw new UDFArgumentException( "Unable to instantiate UDF implementation class " + udfClassName + ": " + e); @@ -249,4 +252,22 @@ public interface UdfWhitelistChecker { boolean isUdfAllowed(Class clazz); } + + @SuppressWarnings("unchecked") + @Override + public Optional adapt(Class clazz) { + if (clazz.isInstance(udf)) { + return Optional.of((T) udf); + } + return super.adapt(clazz); + } + + @Override + public Optional getStatEstimator() { + if (IStatEstimatorProvider.class.isInstance(udf)) { + IStatEstimatorProvider sep = (IStatEstimatorProvider) udf; + return sep.getStatEstimator(); + } + return Optional.empty(); + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCase.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCase.java index 06e9d00..a5fec3b 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCase.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCase.java @@ -18,16 +18,24 @@ package org.apache.hadoop.hive.ql.udf.generic; +import java.util.List; +import java.util.Optional; + import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.apache.hadoop.hive.ql.stats.estimator.IStatEstimator; +import org.apache.hadoop.hive.ql.stats.estimator.IStatEstimatorProvider; +import org.apache.hadoop.hive.ql.stats.estimator.StatEstimators; +import org.apache.hadoop.hive.ql.stats.estimator.StatEstimators.WorstStatCombiner; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; /** * GenericUDF Class for SQL construct "CASE a WHEN b THEN c [ELSE f] END". - * + * * NOTES: 1. a and b should be compatible, or an exception will be * thrown. 2. c and f should be compatible types, or an exception will be * thrown. @@ -49,7 +57,7 @@ + " END\n" + " FROM emp_details") -public class GenericUDFCase extends GenericUDF { +public class GenericUDFCase extends GenericUDF implements IStatEstimatorProvider { private transient ObjectInspector[] argumentOIs; private transient GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver; private transient GenericUDFUtils.ReturnObjectInspectorResolver caseOIResolver; @@ -138,4 +146,23 @@ return sb.toString(); } + @Override + public Optional getStatEstimator() { + return Optional.of(new CaseStatEstimator()); + } + + static class CaseStatEstimator implements IStatEstimator { + + @Override + public Optional estimate(List argStats) { + WorstStatCombiner combiner = new StatEstimators.WorstStatCombiner(); + for (int i = 1; i < argStats.size(); i += 2) { + combiner.add(argStats.get(i)); + } + combiner.add(argStats.get(argStats.size() - 1)); + return combiner.getResult(); + } + } + + } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java index 8ebe9e0..26b895c 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFCoalesce.java @@ -18,9 +18,17 @@ package org.apache.hadoop.hive.ql.udf.generic; +import java.util.List; +import java.util.Optional; + import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.apache.hadoop.hive.ql.stats.estimator.IStatEstimator; +import org.apache.hadoop.hive.ql.stats.estimator.IStatEstimatorProvider; +import org.apache.hadoop.hive.ql.stats.estimator.StatEstimators; +import org.apache.hadoop.hive.ql.stats.estimator.StatEstimators.WorstStatCombiner; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; /** @@ -33,7 +41,7 @@ value = "_FUNC_(a1, a2, ...) - Returns the first non-null argument", extended = "Example:\n" + " > SELECT _FUNC_(NULL, 1, NULL) FROM src LIMIT 1;\n" + " 1") -public class GenericUDFCoalesce extends GenericUDF { +public class GenericUDFCoalesce extends GenericUDF implements IStatEstimatorProvider { private transient ObjectInspector[] argumentOIs; private transient GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver; @@ -72,4 +80,20 @@ return getStandardDisplayString("COALESCE", children, ","); } + @Override + public Optional getStatEstimator() { + return Optional.of(new CoalesceStatEstimator()); + } + + static class CoalesceStatEstimator implements IStatEstimator { + + @Override + public Optional estimate(List argStats) { + WorstStatCombiner combiner = new StatEstimators.WorstStatCombiner(); + for (int i = 0; i < argStats.size(); i++) { + combiner.add(argStats.get(i)); + } + return combiner.getResult(); + } + } } diff --git ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java index 23708dc..aa5faab 100644 --- ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java +++ ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFIf.java @@ -18,6 +18,9 @@ package org.apache.hadoop.hive.ql.udf.generic; +import java.util.List; +import java.util.Optional; + import org.apache.hadoop.hive.ql.exec.Description; import org.apache.hadoop.hive.ql.exec.UDFArgumentException; import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException; @@ -25,6 +28,11 @@ import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressions; import org.apache.hadoop.hive.ql.exec.vector.VectorizedExpressionsSupportDecimal64; import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.ColStatistics; +import org.apache.hadoop.hive.ql.stats.estimator.IStatEstimator; +import org.apache.hadoop.hive.ql.stats.estimator.IStatEstimatorProvider; +import org.apache.hadoop.hive.ql.stats.estimator.StatEstimators; +import org.apache.hadoop.hive.ql.stats.estimator.StatEstimators.WorstStatCombiner; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; @@ -115,7 +123,7 @@ IfExprTimestampScalarColumn.class, IfExprTimestampScalarScalar.class, }) @VectorizedExpressionsSupportDecimal64() -public class GenericUDFIf extends GenericUDF { +public class GenericUDFIf extends GenericUDF implements IStatEstimatorProvider { private transient ObjectInspector[] argumentOIs; private transient GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver; @@ -171,4 +179,22 @@ assert (children.length == 3); return getStandardDisplayString("if", children); } + + @Override + public Optional getStatEstimator() { + return Optional.of(new IfStatEstimator()); + } + + static class IfStatEstimator implements IStatEstimator { + + @Override + public Optional estimate(List argStats) { + WorstStatCombiner combiner = new StatEstimators.WorstStatCombiner(); + combiner.add(argStats.get(1)); + combiner.add(argStats.get(2)); + return combiner.getResult(); + } + + } + } diff --git ql/src/test/queries/clientpositive/udf_coalesce.q ql/src/test/queries/clientpositive/udf_coalesce.q index 7d87580..6c6594a 100644 --- ql/src/test/queries/clientpositive/udf_coalesce.q +++ ql/src/test/queries/clientpositive/udf_coalesce.q @@ -1,6 +1,7 @@ --! qt:dataset:src_thrift --! qt:dataset:src -set hive.fetch.task.conversion=more; +set hive.cbo.enable=false; +set hive.fetch.task.conversion=none; DESCRIBE FUNCTION coalesce; DESCRIBE FUNCTION EXTENDED coalesce; @@ -47,6 +48,10 @@ FROM src tablesample (1 rows); EXPLAIN +SELECT COALESCE(key,'x') from src limit 1; + + +EXPLAIN SELECT COALESCE(src_thrift.lint[1], 999), COALESCE(src_thrift.lintstring[0].mystring, '999'), COALESCE(src_thrift.mstringstring['key_2'], '999') diff --git ql/src/test/queries/clientpositive/w1.q ql/src/test/queries/clientpositive/w1.q new file mode 100644 index 0000000..4f259a7 --- /dev/null +++ ql/src/test/queries/clientpositive/w1.q @@ -0,0 +1,14 @@ +set hive.fetch.task.conversion=none; + +create table t (a string); + +insert into t values +('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'), +('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'), +('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'); + +explain analyze +select a from t; + +explain analyze +select substr(a,1,4) from t; diff --git ql/src/test/results/clientpositive/constprog_when_case.q.out ql/src/test/results/clientpositive/constprog_when_case.q.out index f75fa1c..8d3dd98 100644 --- ql/src/test/results/clientpositive/constprog_when_case.q.out +++ ql/src/test/results/clientpositive/constprog_when_case.q.out @@ -52,10 +52,10 @@ Select Operator expressions: if((bool0 is true or (null and bool0 is not true and bool0 is not false)), key0, if((((not bool0) is true and bool0 is not true) or (null and bool0 is not true and bool0 is not false)), key1, key2)) (type: string) outputColumnNames: _col0 - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1000 Data size: 184000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 91000 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/groupby4.q.out ql/src/test/results/clientpositive/groupby4.q.out index 7528034..d861e47 100644 --- ql/src/test/results/clientpositive/groupby4.q.out +++ ql/src/test/results/clientpositive/groupby4.q.out @@ -49,7 +49,7 @@ keys: KEY._col0 (type: string) mode: partial1 outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -66,17 +66,17 @@ null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0 - Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -85,7 +85,7 @@ Select Operator expressions: _col0 (type: string) outputColumnNames: c1 - Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -119,7 +119,7 @@ null sort order: sort order: Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE value expressions: c1 (type: string) Execution mode: vectorized Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/groupby4_noskew.q.out ql/src/test/results/clientpositive/groupby4_noskew.q.out index 6550017..842bda3 100644 --- ql/src/test/results/clientpositive/groupby4_noskew.q.out +++ ql/src/test/results/clientpositive/groupby4_noskew.q.out @@ -47,10 +47,10 @@ keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0 - Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -59,7 +59,7 @@ Select Operator expressions: _col0 (type: string) outputColumnNames: c1 - Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -92,7 +92,7 @@ Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE value expressions: c1 (type: string) Execution mode: vectorized Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/groupby6.q.out ql/src/test/results/clientpositive/groupby6.q.out index be673c5..a72afb2 100644 --- ql/src/test/results/clientpositive/groupby6.q.out +++ ql/src/test/results/clientpositive/groupby6.q.out @@ -49,7 +49,7 @@ keys: KEY._col0 (type: string) mode: partial1 outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -66,17 +66,17 @@ null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0 - Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -85,7 +85,7 @@ Select Operator expressions: _col0 (type: string) outputColumnNames: c1 - Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -119,7 +119,7 @@ null sort order: sort order: Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE value expressions: c1 (type: string) Execution mode: vectorized Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/groupby6_map.q.out ql/src/test/results/clientpositive/groupby6_map.q.out index ed812ce..9f8c791 100644 --- ql/src/test/results/clientpositive/groupby6_map.q.out +++ ql/src/test/results/clientpositive/groupby6_map.q.out @@ -40,23 +40,23 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 46000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 46000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: mergepartial outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 46000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 46000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -65,7 +65,7 @@ Select Operator expressions: _col0 (type: string) outputColumnNames: c1 - Statistics: Num rows: 250 Data size: 46000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(c1, 'hll') minReductionHashAggr: 0.99 diff --git ql/src/test/results/clientpositive/groupby6_map_skew.q.out ql/src/test/results/clientpositive/groupby6_map_skew.q.out index 57f8c3f..43fbc84 100644 --- ql/src/test/results/clientpositive/groupby6_map_skew.q.out +++ ql/src/test/results/clientpositive/groupby6_map_skew.q.out @@ -41,20 +41,20 @@ minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 46000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z sort order: + Map-reduce partition columns: rand() (type: double) - Statistics: Num rows: 250 Data size: 46000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: partials outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 46000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -71,17 +71,17 @@ null sort order: z sort order: + Map-reduce partition columns: _col0 (type: string) - Statistics: Num rows: 250 Data size: 46000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Reduce Operator Tree: Group By Operator keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0 - Statistics: Num rows: 250 Data size: 46000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 250 Data size: 46000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -90,7 +90,7 @@ Select Operator expressions: _col0 (type: string) outputColumnNames: c1 - Statistics: Num rows: 250 Data size: 46000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 250 Data size: 21250 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(c1, 'hll') minReductionHashAggr: 0.99 diff --git ql/src/test/results/clientpositive/groupby6_noskew.q.out ql/src/test/results/clientpositive/groupby6_noskew.q.out index 689775e..9545ca3 100644 --- ql/src/test/results/clientpositive/groupby6_noskew.q.out +++ ql/src/test/results/clientpositive/groupby6_noskew.q.out @@ -47,10 +47,10 @@ keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0 - Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -59,7 +59,7 @@ Select Operator expressions: _col0 (type: string) outputColumnNames: c1 - Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -92,7 +92,7 @@ Reduce Output Operator null sort order: sort order: - Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE value expressions: c1 (type: string) Execution mode: vectorized Reduce Operator Tree: diff --git ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out index 8fa21f2..756c179 100644 --- ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out +++ ql/src/test/results/clientpositive/groupby_multi_single_reducer.q.out @@ -94,14 +94,14 @@ keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 316 Data size: 68256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 36972 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 316 Data size: 120080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 316 Data size: 120080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -109,20 +109,20 @@ name: default.dest_g4 Filter Operator predicate: (KEY._col0 >= 5) (type: boolean) - Statistics: Num rows: 166 Data size: 45650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 29216 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 105 Data size: 22680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 105 Data size: 12285 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 105 Data size: 39900 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 105 Data size: 29505 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 105 Data size: 39900 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 105 Data size: 29505 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -130,20 +130,20 @@ name: default.dest_g2 Filter Operator predicate: (KEY._col0 < 5) (type: boolean) - Statistics: Num rows: 166 Data size: 45650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 29216 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 105 Data size: 22680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 105 Data size: 12285 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 105 Data size: 39900 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 105 Data size: 29505 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 105 Data size: 39900 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 105 Data size: 29505 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -349,14 +349,14 @@ keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 316 Data size: 68256 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 36972 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 316 Data size: 120080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 316 Data size: 120080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 88796 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -364,20 +364,20 @@ name: default.dest_g4 Filter Operator predicate: (KEY._col0 >= 5) (type: boolean) - Statistics: Num rows: 166 Data size: 45650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 29216 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 105 Data size: 22680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 105 Data size: 12285 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 105 Data size: 39900 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 105 Data size: 29505 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 105 Data size: 39900 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 105 Data size: 29505 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -385,20 +385,20 @@ name: default.dest_g2 Filter Operator predicate: (KEY._col0 < 5) (type: boolean) - Statistics: Num rows: 166 Data size: 45650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 29216 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), sum(KEY._col1:0._col0), sum(DISTINCT KEY._col1:1._col0), count(VALUE._col0) keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 105 Data size: 22680 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 105 Data size: 12285 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), concat(_col0, _col2) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 105 Data size: 39900 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 105 Data size: 29505 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 105 Data size: 39900 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 105 Data size: 29505 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -467,11 +467,11 @@ keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 500 Data size: 196000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 97000 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col2 (type: bigint), concat(_col0, _col3) (type: string), _col3 (type: double), _col4 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 500 Data size: 196000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 146500 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -480,20 +480,20 @@ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 >= 5) (type: boolean) - Statistics: Num rows: 166 Data size: 76194 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 43326 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT KEY._col2:0._col0), sum(KEY._col2:0._col0), count(VALUE._col0) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 166 Data size: 65072 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 32204 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), UDFToInteger(_col2) (type: int), concat(_col0, _col3) (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 166 Data size: 63080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 46646 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 166 Data size: 63080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 166 Data size: 46646 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -508,7 +508,7 @@ key expressions: _col0 (type: string), _col1 (type: bigint) null sort order: zz sort order: ++ - Statistics: Num rows: 500 Data size: 196000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 146500 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: string), _col3 (type: double), _col4 (type: bigint) Execution mode: vectorized @@ -516,17 +516,17 @@ Select Operator expressions: KEY.reducesinkkey0 (type: string), KEY.reducesinkkey1 (type: bigint), VALUE._col0 (type: string), VALUE._col1 (type: double), VALUE._col2 (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 500 Data size: 196000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 146500 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 - Statistics: Num rows: 10 Data size: 3920 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 2930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), _col2 (type: string), UDFToInteger(_col3) (type: int), UDFToInteger(_col4) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 10 Data size: 3800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 2810 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 10 Data size: 3800 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 2810 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat diff --git ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out index e4bc26e..d151470 100644 --- ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out +++ ql/src/test/results/clientpositive/groupby_multi_single_reducer2.q.out @@ -63,20 +63,20 @@ Statistics: Num rows: 332 Data size: 59096 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (KEY._col0 >= 5) (type: boolean) - Statistics: Num rows: 110 Data size: 30250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 110 Data size: 19360 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0) keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1 - Statistics: Num rows: 105 Data size: 20160 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 105 Data size: 9765 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), UDFToInteger(_col1) (type: int) outputColumnNames: _col0, _col1 - Statistics: Num rows: 105 Data size: 19740 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 105 Data size: 9345 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 105 Data size: 19740 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 105 Data size: 9345 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -85,7 +85,7 @@ Select Operator expressions: _col0 (type: string), _col1 (type: int) outputColumnNames: key, c1 - Statistics: Num rows: 105 Data size: 19740 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 105 Data size: 9345 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll') minReductionHashAggr: 0.99 @@ -100,20 +100,20 @@ serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Filter Operator predicate: (KEY._col0 < 5) (type: boolean) - Statistics: Num rows: 110 Data size: 30250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 110 Data size: 19360 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(DISTINCT KEY._col1:0._col0), count(VALUE._col0) keys: KEY._col0 (type: string) mode: complete outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 105 Data size: 21000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 105 Data size: 10605 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), UDFToInteger(_col1) (type: int), UDFToInteger(_col2) (type: int) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 105 Data size: 20160 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 105 Data size: 9765 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 105 Data size: 20160 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 105 Data size: 9765 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.TextInputFormat output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat @@ -122,7 +122,7 @@ Select Operator expressions: _col0 (type: string), _col1 (type: int), _col2 (type: int) outputColumnNames: key, c1, c2 - Statistics: Num rows: 105 Data size: 20160 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 105 Data size: 9765 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(c1, 'hll'), compute_stats(c2, 'hll') minReductionHashAggr: 0.99 diff --git ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out index ed969fa..9b869a1 100644 --- ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out +++ ql/src/test/results/clientpositive/infer_bucket_sort_dyn_part.q.out @@ -419,15 +419,14 @@ POSTHOOK: Input: default@srcpart@ds=2008-04-08/hr=12 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-4 depends on stages: Stage-0, Stage-2 - Stage-3 depends on stages: Stage-1 - Stage-9 depends on stages: Stage-3 , consists of Stage-6, Stage-5, Stage-7 - Stage-6 - Stage-0 depends on stages: Stage-6, Stage-5, Stage-8 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 + Stage-4 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0, Stage-8 + Stage-3 Stage-5 - Stage-7 - Stage-8 depends on stages: Stage-7 + Stage-6 depends on stages: Stage-5 + Stage-8 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 @@ -466,100 +465,37 @@ Select Operator expressions: _col0 (type: string), CAST( _col1 AS STRING) (type: string), if(((UDFToDouble(_col0) % 100.0D) = 0.0D), '11', '12') (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 316 Data size: 143780 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 112812 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 316 Data size: 112812 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.test_table_n8 Select Operator expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) outputColumnNames: key, value, ds, hr - Statistics: Num rows: 316 Data size: 173484 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 142516 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 158 Data size: 182964 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col0 (type: string), _col1 (type: string) - null sort order: zz - sort order: ++ - Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 158 Data size: 182964 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col2 (type: struct), _col3 (type: struct) - Execution mode: vectorized - Reduce Operator Tree: - Group By Operator - aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) - keys: KEY._col0 (type: string), KEY._col1 (type: string) - mode: mergepartial - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 158 Data size: 182964 Basic stats: COMPLETE Column stats: COMPLETE - Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) - outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 158 Data size: 182964 Basic stats: COMPLETE Column stats: COMPLETE - File Output Operator - compressed: false - Statistics: Num rows: 158 Data size: 182964 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - - Stage: Stage-4 - Stats Work - Basic Stats Work: - Column Stats Desc: - Columns: key, value - Column Types: string, string - Table: default.test_table_n8 - - Stage: Stage-3 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col2 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 316 Data size: 143780 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string) - Execution mode: vectorized - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 316 Data size: 143780 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.test_table_n8 - - Stage: Stage-9 + Stage: Stage-7 Conditional Operator - Stage: Stage-6 + Stage: Stage-4 Move Operator files: hdfs directory: true @@ -578,6 +514,21 @@ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.test_table_n8 + Stage: Stage-2 + Stats Work + Basic Stats Work: + Column Stats Desc: + Columns: key, value + Column Types: string, string + Table: default.test_table_n8 + + Stage: Stage-3 + Merge File Operator + Map Operator Tree: + RCFile Merge Operator + merge level: block + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + Stage: Stage-5 Merge File Operator Map Operator Tree: @@ -585,19 +536,43 @@ merge level: block input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - Stage: Stage-7 - Merge File Operator - Map Operator Tree: - RCFile Merge Operator - merge level: block - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - - Stage: Stage-8 + Stage: Stage-6 Move Operator files: hdfs directory: true #### A masked pattern was here #### + Stage: Stage-8 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string), _col1 (type: string) + null sort order: zz + sort order: ++ + Map-reduce partition columns: _col0 (type: string), _col1 (type: string) + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col2 (type: struct), _col3 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) + keys: KEY._col0 (type: string), KEY._col1 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) + outputColumnNames: _col0, _col1, _col2, _col3 + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + PREHOOK: query: INSERT OVERWRITE TABLE test_table_n8 PARTITION (ds = '2008-04-08', hr) SELECT key, value, IF (key % 100 == 0, '11', '12') FROM (SELECT key, COUNT(*) AS value FROM srcpart diff --git ql/src/test/results/clientpositive/list_bucket_dml_6.q.out ql/src/test/results/clientpositive/list_bucket_dml_6.q.out index f88f5bf..3ec1d32 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_6.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_6.q.out @@ -49,14 +49,14 @@ Select Operator expressions: key (type: string), value (type: string), if(((UDFToDouble(key) % 100.0D) = 0.0D), 'a1', 'b1') (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 264000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Static Partition Specification: ds=2008-04-08/ - Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 264000 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -84,20 +84,20 @@ Select Operator expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) outputColumnNames: key, value, ds, hr - Statistics: Num rows: 1000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col2 (type: struct), _col3 (type: struct) auto parallelism: false @@ -212,17 +212,17 @@ keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -430,14 +430,14 @@ Select Operator expressions: key (type: string), value (type: string), if(((UDFToDouble(key) % 100.0D) = 0.0D), 'a1', 'b1') (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 264000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Static Partition Specification: ds=2008-04-08/ - Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 264000 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -465,20 +465,20 @@ Select Operator expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) outputColumnNames: key, value, ds, hr - Statistics: Num rows: 1000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col2 (type: struct), _col3 (type: struct) auto parallelism: false @@ -593,17 +593,17 @@ keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/list_bucket_dml_7.q.out ql/src/test/results/clientpositive/list_bucket_dml_7.q.out index 43804d6..c41ae71 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_7.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_7.q.out @@ -49,14 +49,14 @@ Select Operator expressions: key (type: string), value (type: string), if(((UDFToDouble(key) % 100.0D) = 0.0D), 'a1', 'b1') (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 264000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Static Partition Specification: ds=2008-04-08/ - Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 264000 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -84,20 +84,20 @@ Select Operator expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) outputColumnNames: key, value, ds, hr - Statistics: Num rows: 1000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col2 (type: struct), _col3 (type: struct) auto parallelism: false @@ -212,17 +212,17 @@ keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat @@ -430,14 +430,14 @@ Select Operator expressions: key (type: string), value (type: string), if(((UDFToDouble(key) % 100.0D) = 0.0D), 'a1', 'b1') (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 264000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Static Partition Specification: ds=2008-04-08/ - Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 264000 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -465,20 +465,20 @@ Select Operator expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) outputColumnNames: key, value, ds, hr - Statistics: Num rows: 1000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col2 (type: struct), _col3 (type: struct) auto parallelism: false @@ -593,17 +593,17 @@ keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/list_bucket_dml_8.q.out ql/src/test/results/clientpositive/list_bucket_dml_8.q.out index 8551f57..2b82c86 100644 --- ql/src/test/results/clientpositive/list_bucket_dml_8.q.out +++ ql/src/test/results/clientpositive/list_bucket_dml_8.q.out @@ -49,14 +49,14 @@ Select Operator expressions: key (type: string), value (type: string), if(((UDFToDouble(key) % 100.0D) = 0.0D), 'a1', 'b1') (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 264000 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 1 #### A masked pattern was here #### NumFilesPerFileSink: 1 Static Partition Specification: ds=2008-04-08/ - Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 264000 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat @@ -84,20 +84,20 @@ Select Operator expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) outputColumnNames: key, value, ds, hr - Statistics: Num rows: 1000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE tag: -1 value expressions: _col2 (type: struct), _col3 (type: struct) auto parallelism: false @@ -212,17 +212,17 @@ keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false GlobalTableId: 0 #### A masked pattern was here #### NumFilesPerFileSink: 1 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE #### A masked pattern was here #### table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat diff --git ql/src/test/results/clientpositive/llap/udf_coalesce.q.out ql/src/test/results/clientpositive/llap/udf_coalesce.q.out index f35ec7a..a934fdc 100644 --- ql/src/test/results/clientpositive/llap/udf_coalesce.q.out +++ ql/src/test/results/clientpositive/llap/udf_coalesce.q.out @@ -61,20 +61,39 @@ POSTHOOK: Input: default@src #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Row Limit Per Split: 1 + Statistics: Num rows: 500 Data size: 5312 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: 1 (type: int), 1 (type: int), 2 (type: int), 1 (type: int), 3 (type: int), 4 (type: int), '1' (type: string), '1' (type: string), '2' (type: string), '1' (type: string), '3' (type: string), '4' (type: string), 1 (type: decimal(1,0)), 1 (type: decimal(1,0)), 2 (type: decimal(1,0)), 2 (type: decimal(1,0)), 2 (type: decimal(1,0)), null (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 + Statistics: Num rows: 500 Data size: 547004 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 500 Data size: 547004 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: no inputs + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: src - Row Limit Per Split: 1 - Select Operator - expressions: 1 (type: int), 1 (type: int), 2 (type: int), 1 (type: int), 3 (type: int), 4 (type: int), '1' (type: string), '1' (type: string), '2' (type: string), '1' (type: string), '3' (type: string), '4' (type: string), 1 (type: decimal(1,0)), 1 (type: decimal(1,0)), 2 (type: decimal(1,0)), 2 (type: decimal(1,0)), 2 (type: decimal(1,0)), null (type: int) - outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17 - ListSink + ListSink PREHOOK: query: SELECT COALESCE(1), COALESCE(1, 2), @@ -122,6 +141,53 @@ #### A masked pattern was here #### 1 1 2 1 3 4 1 1 2 1 3 4 1 1 2 2 2 NULL PREHOOK: query: EXPLAIN +SELECT COALESCE(key,'x') from src limit 1 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: EXPLAIN +SELECT COALESCE(key,'x') from src limit 1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: COALESCE(key,'x') (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 500 Data size: 43500 Basic stats: COMPLETE Column stats: COMPLETE + Limit + Number of rows: 1 + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1 Data size: 87 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: no inputs + + Stage: Stage-0 + Fetch Operator + limit: 1 + Processor Tree: + ListSink + +PREHOOK: query: EXPLAIN SELECT COALESCE(src_thrift.lint[1], 999), COALESCE(src_thrift.lintstring[0].mystring, '999'), COALESCE(src_thrift.mstringstring['key_2'], '999') @@ -138,19 +204,38 @@ POSTHOOK: Input: default@src_thrift #### A masked pattern was here #### STAGE DEPENDENCIES: - Stage-0 is a root stage + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: src_thrift + Statistics: Num rows: 11 Data size: 39600 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: COALESCE(lint[1],999) (type: int), COALESCE(lintstring[0].mystring,'999') (type: string), COALESCE(mstringstring['key_2'],'999') (type: string) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 11 Data size: 39600 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 39600 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: no inputs + Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: - TableScan - alias: src_thrift - Select Operator - expressions: COALESCE(lint[1],999) (type: int), COALESCE(lintstring[0].mystring,'999') (type: string), COALESCE(mstringstring['key_2'],'999') (type: string) - outputColumnNames: _col0, _col1, _col2 - ListSink + ListSink PREHOOK: query: SELECT COALESCE(src_thrift.lint[1], 999), COALESCE(src_thrift.lintstring[0].mystring, '999'), diff --git ql/src/test/results/clientpositive/llap/vector_case_when_1.q.out ql/src/test/results/clientpositive/llap/vector_case_when_1.q.out index db7140a..6d58dd3 100644 --- ql/src/test/results/clientpositive/llap/vector_case_when_1.q.out +++ ql/src/test/results/clientpositive/llap/vector_case_when_1.q.out @@ -216,13 +216,13 @@ native: true projectedOutputColumnNums: [4, 21, 26, 30, 34, 38, 42, 44, 46, 48, 50, 52, 54, 58, 61, 64, 67] selectExpressions: VectorUDFAdaptor(CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE ('Huge number') END)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, LongColLessLongScalar(col 4:int, val 100) -> 20:boolean) -> 21:string, VectorUDFAdaptor(CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE (null) END)(children: LongColEqualLongScalar(col 4:int, val 1) -> 22:boolean, LongColEqualLongScalar(col 4:int, val 2) -> 23:boolean, LongColLessLongScalar(col 4:int, val 10) -> 24:boolean, LongColLessLongScalar(col 4:int, val 100) -> 25:boolean) -> 26:string, VectorUDFAdaptor(CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') ELSE (null) END)(children: LongColEqualLongScalar(col 4:int, val 1) -> 27:boolean, LongColEqualLongScalar(col 4:int, val 2) -> 28:boolean, LongColLessLongScalar(col 4:int, val 10) -> 29:boolean) -> 30:string, IfExprLongColumnLongColumn(col 31:boolean, col 32:date, col 33:date)(children: StringGroupColEqualCharScalar(col 14:char(10), val SHIP) -> 31:boolean, VectorUDFDateAddColScalar(col 10:date, val 10) -> 32:date, VectorUDFDateAddColScalar(col 10:date, val 5) -> 33:date) -> 34:date, VectorUDFAdaptor(CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0D - l_discount))) ELSE (0.0D) END)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 35:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 36:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 36:double) -> 37:double) -> 38:double, VectorUDFAdaptor(CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0D - l_discount))) ELSE (0.0D) END)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 39:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 40:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 40:double) -> 41:double) -> 42:double, VectorUDFAdaptor(if((l_shipinstruct = 'DELIVER IN PERSON'), null, l_tax))(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 43:boolean) -> 44:decimal(10,2), VectorUDFAdaptor(if((l_shipinstruct = 'TAKE BACK RETURN'), l_tax, null))(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 45:boolean) -> 46:decimal(10,2), VectorUDFAdaptor(if((l_shipinstruct = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 47:boolean) -> 48:decimal(12,2), VectorUDFAdaptor(if((l_shipinstruct = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 49:boolean) -> 50:decimal(12,2), IfExprDecimal64ScalarDecimal64Column(col 51:boolean, decimal64Val 0, decimalVal 0, col 7:decimal(10,2)/DECIMAL_64)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 51:boolean) -> 52:decimal(10,2)/DECIMAL_64, IfExprDecimal64ColumnDecimal64Scalar(col 53:boolean, col 7:decimal(10,2)/DECIMAL_64, decimal64Val 0, decimalVal 0)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 53:boolean) -> 54:decimal(10,2)/DECIMAL_64, IfExprTimestampColumnColumn(col 55:boolean, col 56:timestampcol 57:timestamp)(children: LongColGreaterLongScalar(col 1:int, val 30) -> 55:boolean, CastDateToTimestamp(col 12:date) -> 56:timestamp, CastDateToTimestamp(col 11:date) -> 57:timestamp) -> 58:timestamp, VectorUDFAdaptor(if((l_suppkey > 10000), datediff(l_receiptdate, l_commitdate), null))(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 59:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 60:int) -> 61:int, VectorUDFAdaptor(if((l_suppkey > 10000), null, datediff(l_receiptdate, l_commitdate)))(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 62:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 63:int) -> 64:int, IfExprLongScalarLongScalar(col 66:boolean, val 14245, val 14609)(children: LongColGreaterLongScalar(col 65:int, val 100)(children: LongColModuloLongScalar(col 2:int, val 500) -> 65:int) -> 66:boolean) -> 67:date - Statistics: Num rows: 101 Data size: 141804 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 101 Data size: 118604 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 101 Data size: 141804 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 101 Data size: 118604 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -553,13 +553,13 @@ native: true projectedOutputColumnNums: [4, 24, 33, 40, 44, 48, 52, 54, 56, 58, 60, 62, 64, 68, 71, 74, 77] selectExpressions: IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 23:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 22:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean, val Somecol 21:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, IfExprStringScalarStringScalar(col 20:boolean, val Many, val Huge number)(children: LongColLessLongScalar(col 4:int, val 100) -> 20:boolean) -> 21:string) -> 22:string) -> 23:string) -> 24:string, IfExprStringScalarStringGroupColumn(col 25:boolean, val Singlecol 32:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 25:boolean, IfExprStringScalarStringGroupColumn(col 26:boolean, val Twocol 31:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 26:boolean, IfExprStringScalarStringGroupColumn(col 27:boolean, val Somecol 30:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 27:boolean, IfExprColumnNull(col 28:boolean, col 29:string, null)(children: LongColLessLongScalar(col 4:int, val 100) -> 28:boolean, ConstantVectorExpression(val Many) -> 29:string) -> 30:string) -> 31:string) -> 32:string) -> 33:string, IfExprStringScalarStringGroupColumn(col 34:boolean, val Singlecol 39:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 34:boolean, IfExprStringScalarStringGroupColumn(col 35:boolean, val Twocol 38:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 35:boolean, IfExprColumnNull(col 36:boolean, col 37:string, null)(children: LongColLessLongScalar(col 4:int, val 10) -> 36:boolean, ConstantVectorExpression(val Some) -> 37:string) -> 38:string) -> 39:string) -> 40:string, IfExprLongColumnLongColumn(col 41:boolean, col 42:date, col 43:date)(children: StringGroupColEqualCharScalar(col 14:char(10), val SHIP) -> 41:boolean, VectorUDFDateAddColScalar(col 10:date, val 10) -> 42:date, VectorUDFDateAddColScalar(col 10:date, val 5) -> 43:date) -> 44:date, IfExprDoubleColumnDoubleScalar(col 45:boolean, col 47:double, val 0.0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 45:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 46:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 46:double) -> 47:double) -> 48:double, IfExprDoubleColumnDoubleScalar(col 49:boolean, col 51:double, val 0.0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 49:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 50:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 50:double) -> 51:double) -> 52:double, IfExprNullColumn(col 53:boolean, null, col 78)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 53:boolean, ConvertDecimal64ToDecimal(col 7:decimal(10,2)/DECIMAL_64) -> 78:decimal(10,2)) -> 54:decimal(10,2), IfExprColumnNull(col 55:boolean, col 79:decimal(10,2), null)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 55:boolean, ConvertDecimal64ToDecimal(col 7:decimal(10,2)/DECIMAL_64) -> 79:decimal(10,2)) -> 56:decimal(10,2), VectorUDFAdaptor(if((l_shipinstruct = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 57:boolean) -> 58:decimal(12,2), VectorUDFAdaptor(if((l_shipinstruct = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 59:boolean) -> 60:decimal(12,2), IfExprDecimal64ScalarDecimal64Column(col 61:boolean, decimal64Val 0, decimalVal 0, col 7:decimal(10,2)/DECIMAL_64)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 61:boolean) -> 62:decimal(10,2)/DECIMAL_64, IfExprDecimal64ColumnDecimal64Scalar(col 63:boolean, col 7:decimal(10,2)/DECIMAL_64, decimal64Val 0, decimalVal 0)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 63:boolean) -> 64:decimal(10,2)/DECIMAL_64, IfExprTimestampColumnColumn(col 65:boolean, col 66:timestampcol 67:timestamp)(children: LongColGreaterLongScalar(col 1:int, val 30) -> 65:boolean, CastDateToTimestamp(col 12:date) -> 66:timestamp, CastDateToTimestamp(col 11:date) -> 67:timestamp) -> 68:timestamp, IfExprColumnNull(col 69:boolean, col 70:int, null)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 69:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 70:int) -> 71:int, IfExprNullColumn(col 72:boolean, null, col 73)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 72:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 73:int) -> 74:int, IfExprLongScalarLongScalar(col 76:boolean, val 14245, val 14609)(children: LongColGreaterLongScalar(col 75:int, val 100)(children: LongColModuloLongScalar(col 2:int, val 500) -> 75:int) -> 76:boolean) -> 77:date - Statistics: Num rows: 101 Data size: 141804 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 101 Data size: 118604 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 101 Data size: 141804 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 101 Data size: 118604 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -890,13 +890,13 @@ native: true projectedOutputColumnNums: [4, 27, 39, 48, 52, 57, 62, 64, 66, 71, 76, 78, 80, 84, 87, 90, 93] selectExpressions: IfExprColumnCondExpr(col 17:boolean, col 18:stringcol 26:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, ConstantVectorExpression(val Single) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 25:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 19:boolean, ConstantVectorExpression(val Two) -> 20:string, IfExprColumnCondExpr(col 21:boolean, col 22:stringcol 24:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 21:boolean, ConstantVectorExpression(val Some) -> 22:string, IfExprStringScalarStringScalar(col 23:boolean, val Many, val Huge number)(children: LongColLessLongScalar(col 4:int, val 100) -> 23:boolean) -> 24:string) -> 25:string) -> 26:string) -> 27:string, IfExprColumnCondExpr(col 28:boolean, col 29:stringcol 38:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 28:boolean, ConstantVectorExpression(val Single) -> 29:string, IfExprColumnCondExpr(col 30:boolean, col 31:stringcol 37:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 30:boolean, ConstantVectorExpression(val Two) -> 31:string, IfExprColumnCondExpr(col 32:boolean, col 33:stringcol 36:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 32:boolean, ConstantVectorExpression(val Some) -> 33:string, IfExprColumnNull(col 34:boolean, col 35:string, null)(children: LongColLessLongScalar(col 4:int, val 100) -> 34:boolean, ConstantVectorExpression(val Many) -> 35:string) -> 36:string) -> 37:string) -> 38:string) -> 39:string, IfExprColumnCondExpr(col 40:boolean, col 41:stringcol 47:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 40:boolean, ConstantVectorExpression(val Single) -> 41:string, IfExprColumnCondExpr(col 42:boolean, col 43:stringcol 46:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 42:boolean, ConstantVectorExpression(val Two) -> 43:string, IfExprColumnNull(col 44:boolean, col 45:string, null)(children: LongColLessLongScalar(col 4:int, val 10) -> 44:boolean, ConstantVectorExpression(val Some) -> 45:string) -> 46:string) -> 47:string) -> 48:string, IfExprCondExprCondExpr(col 49:boolean, col 50:datecol 51:date)(children: StringGroupColEqualCharScalar(col 14:char(10), val SHIP) -> 49:boolean, VectorUDFDateAddColScalar(col 10:date, val 10) -> 50:date, VectorUDFDateAddColScalar(col 10:date, val 5) -> 51:date) -> 52:date, IfExprCondExprColumn(col 53:boolean, col 55:double, col 56:double)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 53:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 54:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 54:double) -> 55:double, ConstantVectorExpression(val 0.0) -> 56:double) -> 57:double, IfExprCondExprColumn(col 58:boolean, col 60:double, col 61:double)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 58:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 59:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 59:double) -> 60:double, ConstantVectorExpression(val 0.0) -> 61:double) -> 62:double, IfExprNullColumn(col 63:boolean, null, col 94)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 63:boolean, ConvertDecimal64ToDecimal(col 7:decimal(10,2)/DECIMAL_64) -> 94:decimal(10,2)) -> 64:decimal(10,2), IfExprColumnNull(col 65:boolean, col 95:decimal(10,2), null)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 65:boolean, ConvertDecimal64ToDecimal(col 7:decimal(10,2)/DECIMAL_64) -> 95:decimal(10,2)) -> 66:decimal(10,2), VectorUDFAdaptor(if((l_shipinstruct = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 70:boolean) -> 71:decimal(12,2), VectorUDFAdaptor(if((l_shipinstruct = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 75:boolean) -> 76:decimal(12,2), IfExprDecimal64ScalarDecimal64Column(col 77:boolean, decimal64Val 0, decimalVal 0, col 7:decimal(10,2)/DECIMAL_64)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 77:boolean) -> 78:decimal(10,2)/DECIMAL_64, IfExprDecimal64ColumnDecimal64Scalar(col 79:boolean, col 7:decimal(10,2)/DECIMAL_64, decimal64Val 0, decimalVal 0)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 79:boolean) -> 80:decimal(10,2)/DECIMAL_64, IfExprCondExprCondExpr(col 81:boolean, col 82:timestampcol 83:timestamp)(children: LongColGreaterLongScalar(col 1:int, val 30) -> 81:boolean, CastDateToTimestamp(col 12:date) -> 82:timestamp, CastDateToTimestamp(col 11:date) -> 83:timestamp) -> 84:timestamp, IfExprCondExprNull(col 85:boolean, col 86:int, null)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 85:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 86:int) -> 87:int, IfExprNullCondExpr(col 88:boolean, null, col 89:int)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 88:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 89:int) -> 90:int, IfExprLongScalarLongScalar(col 92:boolean, val 14245, val 14609)(children: LongColGreaterLongScalar(col 91:int, val 100)(children: LongColModuloLongScalar(col 2:int, val 500) -> 91:int) -> 92:boolean) -> 93:date - Statistics: Num rows: 101 Data size: 141804 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 101 Data size: 118604 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 101 Data size: 141804 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 101 Data size: 118604 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out index bcc7dd2..cc72f45 100644 --- ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out +++ ql/src/test/results/clientpositive/llap/vector_case_when_2.q.out @@ -151,7 +151,7 @@ native: true projectedOutputColumnNums: [1, 3, 9, 14, 18, 22, 24, 27, 32, 38, 2] selectExpressions: VectorUDFAdaptor(CASE WHEN ((ctimestamp2 <= TIMESTAMP'1800-12-31 00:00:00')) THEN ('1800s or Earlier') WHEN ((ctimestamp2 < TIMESTAMP'1900-01-01 00:00:00')) THEN ('1900s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN ('Early 2010s') ELSE ('Unknown') END)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 1800-12-31 00:00:00) -> 5:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00) -> 6:boolean, TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 7:boolean, TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 8:boolean) -> 9:string, VectorUDFAdaptor(CASE WHEN ((ctimestamp2 <= TIMESTAMP'2000-12-31 23:59:59.999999999')) THEN ('Old') WHEN ((ctimestamp2 < TIMESTAMP'2006-01-01 00:00:00')) THEN ('Early 2000s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN ('Early 2010s') ELSE (null) END)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 10:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 11:boolean, TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 12:boolean, TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 13:boolean) -> 14:string, VectorUDFAdaptor(CASE WHEN ((ctimestamp2 <= TIMESTAMP'2000-12-31 23:59:59.999999999')) THEN ('Old') WHEN ((ctimestamp2 < TIMESTAMP'2006-01-01 00:00:00')) THEN ('Early 2000s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') ELSE (null) END)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 15:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 16:boolean, TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 17:boolean) -> 18:string, IfExprLongColumnLongColumn(col 19:boolean, col 20:int, col 21:int)(children: TimestampColLessTimestampScalar(col 1:timestamp, val 1974-10-04 17:21:03.989) -> 19:boolean, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 20:int, VectorUDFYearTimestamp(col 3:timestamp, field YEAR) -> 21:int) -> 22:int, VectorUDFAdaptor(CASE WHEN ((stimestamp1 like '%19%')) THEN (stimestamp1) ELSE ('2018-03-08 23:04:59') END)(children: SelectStringColLikeStringScalar(col 2:string) -> 23:boolean) -> 24:string, VectorUDFAdaptor(if((ctimestamp1 = TIMESTAMP'2021-09-24 03:18:32.413655165'), null, minute(ctimestamp1)))(children: TimestampColEqualTimestampScalar(col 1:timestamp, val 2021-09-24 03:18:32.413655165) -> 25:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 26:int) -> 27:int, VectorUDFAdaptor(if(((ctimestamp2 >= TIMESTAMP'5344-10-04 18:40:08.165') and (ctimestamp2 < TIMESTAMP'6631-11-13 16:31:29.702202248')), minute(ctimestamp1), null))(children: ColAndCol(col 28:boolean, col 29:boolean)(children: TimestampColGreaterEqualTimestampScalar(col 3:timestamp, val 5344-10-04 18:40:08.165) -> 28:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 6631-11-13 16:31:29.702202248) -> 29:boolean) -> 30:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 31:int) -> 32:int, IfExprLongColumnLongColumn(col 35:boolean, col 36:date, col 37:date)(children: DoubleColGreaterDoubleScalar(col 34:double, val 100.0)(children: DoubleColModuloDoubleScalar(col 33:double, val 500.0)(children: CastTimestampToDouble(col 1:timestamp) -> 33:double) -> 34:double) -> 35:boolean, VectorUDFDateAddColScalar(col 0:date, val 1) -> 36:date, VectorUDFDateAddColScalar(col 0:date, val 365) -> 37:date) -> 38:date - Statistics: Num rows: 51 Data size: 50745 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 50345 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp), _col10 (type: string), _col1 (type: timestamp) null sort order: zzz @@ -162,7 +162,7 @@ native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 9:string, 14:string, 18:string, 22:int, 24:string, 27:int, 32:int, 38:date - Statistics: Num rows: 51 Data size: 50745 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 50345 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: date) Execution mode: vectorized, llap LLAP IO: all inputs @@ -204,13 +204,13 @@ className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 2, 3, 4, 5, 6, 7, 8, 9, 10] - Statistics: Num rows: 51 Data size: 45084 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 44684 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 51 Data size: 45084 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 44684 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -441,7 +441,7 @@ native: true projectedOutputColumnNums: [1, 3, 12, 21, 28, 32, 34, 37, 42, 48, 2] selectExpressions: IfExprStringScalarStringGroupColumn(col 5:boolean, val 1800s or Earliercol 11:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 1800-12-31 00:00:00) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val 1900scol 10:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 9:string)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 7:boolean, IfExprStringScalarStringScalar(col 8:boolean, val Early 2010s, val Unknown)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 8:boolean) -> 9:string) -> 10:string) -> 11:string) -> 12:string, IfExprStringScalarStringGroupColumn(col 13:boolean, val Oldcol 20:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 13:boolean, IfExprStringScalarStringGroupColumn(col 14:boolean, val Early 2000scol 19:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 14:boolean, IfExprStringScalarStringGroupColumn(col 15:boolean, val Late 2000scol 18:string)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 15:boolean, IfExprColumnNull(col 16:boolean, col 17:string, null)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 16:boolean, ConstantVectorExpression(val Early 2010s) -> 17:string) -> 18:string) -> 19:string) -> 20:string) -> 21:string, IfExprStringScalarStringGroupColumn(col 22:boolean, val Oldcol 27:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 22:boolean, IfExprStringScalarStringGroupColumn(col 23:boolean, val Early 2000scol 26:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 23:boolean, IfExprColumnNull(col 24:boolean, col 25:string, null)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 24:boolean, ConstantVectorExpression(val Late 2000s) -> 25:string) -> 26:string) -> 27:string) -> 28:string, IfExprLongColumnLongColumn(col 29:boolean, col 30:int, col 31:int)(children: TimestampColLessTimestampScalar(col 1:timestamp, val 1974-10-04 17:21:03.989) -> 29:boolean, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 30:int, VectorUDFYearTimestamp(col 3:timestamp, field YEAR) -> 31:int) -> 32:int, IfExprStringGroupColumnStringScalar(col 33:boolean, col 2:string, val 2018-03-08 23:04:59)(children: SelectStringColLikeStringScalar(col 2:string) -> 33:boolean) -> 34:string, IfExprNullColumn(col 35:boolean, null, col 36)(children: TimestampColEqualTimestampScalar(col 1:timestamp, val 2021-09-24 03:18:32.413655165) -> 35:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 36:int) -> 37:int, IfExprColumnNull(col 40:boolean, col 41:int, null)(children: ColAndCol(col 38:boolean, col 39:boolean)(children: TimestampColGreaterEqualTimestampScalar(col 3:timestamp, val 5344-10-04 18:40:08.165) -> 38:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 6631-11-13 16:31:29.702202248) -> 39:boolean) -> 40:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 41:int) -> 42:int, IfExprLongColumnLongColumn(col 45:boolean, col 46:date, col 47:date)(children: DoubleColGreaterDoubleScalar(col 44:double, val 100.0)(children: DoubleColModuloDoubleScalar(col 43:double, val 500.0)(children: CastTimestampToDouble(col 1:timestamp) -> 43:double) -> 44:double) -> 45:boolean, VectorUDFDateAddColScalar(col 0:date, val 1) -> 46:date, VectorUDFDateAddColScalar(col 0:date, val 365) -> 47:date) -> 48:date - Statistics: Num rows: 51 Data size: 50745 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 50345 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp), _col10 (type: string), _col1 (type: timestamp) null sort order: zzz @@ -452,7 +452,7 @@ native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 12:string, 21:string, 28:string, 32:int, 34:string, 37:int, 42:int, 48:date - Statistics: Num rows: 51 Data size: 50745 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 50345 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: date) Execution mode: vectorized, llap LLAP IO: all inputs @@ -494,13 +494,13 @@ className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 2, 3, 4, 5, 6, 7, 8, 9, 10] - Statistics: Num rows: 51 Data size: 45084 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 44684 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 51 Data size: 45084 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 44684 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -731,7 +731,7 @@ native: true projectedOutputColumnNums: [1, 3, 15, 27, 36, 40, 42, 45, 50, 56, 2] selectExpressions: IfExprColumnCondExpr(col 5:boolean, col 6:stringcol 14:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 1800-12-31 00:00:00) -> 5:boolean, ConstantVectorExpression(val 1800s or Earlier) -> 6:string, IfExprColumnCondExpr(col 7:boolean, col 8:stringcol 13:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00) -> 7:boolean, ConstantVectorExpression(val 1900s) -> 8:string, IfExprColumnCondExpr(col 9:boolean, col 10:stringcol 12:string)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 9:boolean, ConstantVectorExpression(val Late 2000s) -> 10:string, IfExprStringScalarStringScalar(col 11:boolean, val Early 2010s, val Unknown)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 11:boolean) -> 12:string) -> 13:string) -> 14:string) -> 15:string, IfExprColumnCondExpr(col 16:boolean, col 17:stringcol 26:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 16:boolean, ConstantVectorExpression(val Old) -> 17:string, IfExprColumnCondExpr(col 18:boolean, col 19:stringcol 25:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 18:boolean, ConstantVectorExpression(val Early 2000s) -> 19:string, IfExprColumnCondExpr(col 20:boolean, col 21:stringcol 24:string)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 20:boolean, ConstantVectorExpression(val Late 2000s) -> 21:string, IfExprColumnNull(col 22:boolean, col 23:string, null)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 22:boolean, ConstantVectorExpression(val Early 2010s) -> 23:string) -> 24:string) -> 25:string) -> 26:string) -> 27:string, IfExprColumnCondExpr(col 28:boolean, col 29:stringcol 35:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 28:boolean, ConstantVectorExpression(val Old) -> 29:string, IfExprColumnCondExpr(col 30:boolean, col 31:stringcol 34:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 30:boolean, ConstantVectorExpression(val Early 2000s) -> 31:string, IfExprColumnNull(col 32:boolean, col 33:string, null)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 32:boolean, ConstantVectorExpression(val Late 2000s) -> 33:string) -> 34:string) -> 35:string) -> 36:string, IfExprCondExprCondExpr(col 37:boolean, col 38:intcol 39:int)(children: TimestampColLessTimestampScalar(col 1:timestamp, val 1974-10-04 17:21:03.989) -> 37:boolean, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 38:int, VectorUDFYearTimestamp(col 3:timestamp, field YEAR) -> 39:int) -> 40:int, IfExprStringGroupColumnStringScalar(col 41:boolean, col 2:string, val 2018-03-08 23:04:59)(children: SelectStringColLikeStringScalar(col 2:string) -> 41:boolean) -> 42:string, IfExprNullCondExpr(col 43:boolean, null, col 44:int)(children: TimestampColEqualTimestampScalar(col 1:timestamp, val 2021-09-24 03:18:32.413655165) -> 43:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 44:int) -> 45:int, IfExprCondExprNull(col 48:boolean, col 49:int, null)(children: ColAndCol(col 46:boolean, col 47:boolean)(children: TimestampColGreaterEqualTimestampScalar(col 3:timestamp, val 5344-10-04 18:40:08.165) -> 46:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 6631-11-13 16:31:29.702202248) -> 47:boolean) -> 48:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 49:int) -> 50:int, IfExprCondExprCondExpr(col 53:boolean, col 54:datecol 55:date)(children: DoubleColGreaterDoubleScalar(col 52:double, val 100.0)(children: DoubleColModuloDoubleScalar(col 51:double, val 500.0)(children: CastTimestampToDouble(col 1:timestamp) -> 51:double) -> 52:double) -> 53:boolean, VectorUDFDateAddColScalar(col 0:date, val 1) -> 54:date, VectorUDFDateAddColScalar(col 0:date, val 365) -> 55:date) -> 56:date - Statistics: Num rows: 51 Data size: 50745 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 50345 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp), _col10 (type: string), _col1 (type: timestamp) null sort order: zzz @@ -742,7 +742,7 @@ native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 15:string, 27:string, 36:string, 40:int, 42:string, 45:int, 50:int, 56:date - Statistics: Num rows: 51 Data size: 50745 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 50345 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: date) Execution mode: vectorized, llap LLAP IO: all inputs @@ -784,13 +784,13 @@ className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 2, 3, 4, 5, 6, 7, 8, 9, 10] - Statistics: Num rows: 51 Data size: 45084 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 44684 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 51 Data size: 45084 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 44684 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_groupby4.q.out ql/src/test/results/clientpositive/llap/vector_groupby4.q.out index e31fe56..f8d6e2d 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby4.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby4.q.out @@ -108,7 +108,7 @@ keys: KEY._col0 (type: string) mode: partial1 outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z @@ -118,7 +118,7 @@ className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -139,13 +139,13 @@ keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0 - Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -158,7 +158,7 @@ className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: @@ -167,7 +167,7 @@ className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE value expressions: c1 (type: string) Reducer 4 Execution mode: llap diff --git ql/src/test/results/clientpositive/llap/vector_groupby6.q.out ql/src/test/results/clientpositive/llap/vector_groupby6.q.out index 25b6be6..2042f6d 100644 --- ql/src/test/results/clientpositive/llap/vector_groupby6.q.out +++ ql/src/test/results/clientpositive/llap/vector_groupby6.q.out @@ -108,7 +108,7 @@ keys: KEY._col0 (type: string) mode: partial1 outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string) null sort order: z @@ -118,7 +118,7 @@ className: VectorReduceSinkStringOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE Reducer 3 Execution mode: vectorized, llap Reduce Vectorization: @@ -139,13 +139,13 @@ keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0 - Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -158,7 +158,7 @@ className: VectorSelectOperator native: true projectedOutputColumnNums: [0] - Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: @@ -167,7 +167,7 @@ className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE value expressions: c1 (type: string) Reducer 4 Execution mode: llap diff --git ql/src/test/results/clientpositive/llap/vector_if_expr.q.out ql/src/test/results/clientpositive/llap/vector_if_expr.q.out index 5d955bc..ed73482 100644 --- ql/src/test/results/clientpositive/llap/vector_if_expr.q.out +++ ql/src/test/results/clientpositive/llap/vector_if_expr.q.out @@ -47,7 +47,7 @@ native: true projectedOutputColumnNums: [10, 13] selectExpressions: IfExprStringScalarStringScalar(col 10:boolean, val first, val second) -> 13:string - Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4587 Data size: 426534 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean) null sort order: z @@ -56,7 +56,7 @@ className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4587 Data size: 426534 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: vectorized, llap LLAP IO: all inputs @@ -85,13 +85,13 @@ className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1] - Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4587 Data size: 426534 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4587 Data size: 426534 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_nvl.q.out ql/src/test/results/clientpositive/llap/vector_nvl.q.out index 420520e..5346ed4 100644 --- ql/src/test/results/clientpositive/llap/vector_nvl.q.out +++ ql/src/test/results/clientpositive/llap/vector_nvl.q.out @@ -147,19 +147,19 @@ native: true projectedOutputColumnNums: [4, 14] selectExpressions: VectorCoalesce(columns [4, 13])(children: col 4:float, ConstantVectorExpression(val 1.0) -> 13:float) -> 14:float - Statistics: Num rows: 12288 Data size: 85848 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 73392 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 10 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 10 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/vector_udf1.q.out ql/src/test/results/clientpositive/llap/vector_udf1.q.out index 956ff47..8ca8a8e 100644 --- ql/src/test/results/clientpositive/llap/vector_udf1.q.out +++ ql/src/test/results/clientpositive/llap/vector_udf1.q.out @@ -1 +1 @@ -PREHOOK: query: drop table varchar_udf_1_n2 PREHOOK: type: DROPTABLE POSTHOOK: query: drop table varchar_udf_1_n2 POSTHOOK: type: DROPTABLE PREHOOK: query: create table varchar_udf_1_n2 (c1 string, c2 string, c3 varchar(10), c4 varchar(20), d1 string, d2 string, d3 varchar(10), d4 varchar(10)) STORED AS ORC PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@varchar_udf_1_n2 POSTHOOK: query: create table varchar_udf_1_n2 (c1 string, c2 string, c3 varchar(10), c4 varchar(20), d1 string, d2 string, d3 varchar(10), d4 varchar(10)) STORED AS ORC POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@varchar_udf_1_n2 PREHOOK: query: insert overwrite table varchar_udf_1_n2 select key, value, key, value, '2015-01-14', '2015-01-14', '2017-01-11', '2017-01-11' from src where key = '238' limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@varchar_udf_1_n2 POSTHOOK: query: insert overwrite table varchar_udf_1_n2 select key, value, key, value, '2015-01-14', '2015-01-14', '2017-01-11', '2017-01-11' from src where key = '238' limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@varchar_udf_1_n2 POSTHOOK: Lineage: varchar_udf_1_n2.c1 SIMPLE [] POSTHOOK: Lineage: varchar_udf_1_n2.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: varchar_udf_1_n2.c3 EXPRESSION [] POSTHOOK: Lineage: varchar_udf_1_n2.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: varchar_udf_1_n2.d1 SIMPLE [] POSTHOOK: Lineage: varchar_udf_1_n2.d2 SIMPLE [] POSTHOOK: Lineage: varchar_udf_1_n2.d3 EXPRESSION [] POSTHOOK: Lineage: varchar_udf_1_n2.d4 EXPRESSION [] PREHOOK: query: explain vectorization detail select concat(c1, c2), concat(c3, c4), concat(c1, c2) = concat(c3, c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select concat(c1, c2), concat(c3, c4), concat(c1, c2) = concat(c3, c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: concat(c1, c2) (type: string), concat(c3, c4) (type: varchar(30)), (concat(c1, c2) = CAST( concat(c3, c4) AS STRING)) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: StringGroupConcatColCol(col 0:string, col 1:string) -> 9:string, StringGroupConcatColCol(col 2:varchar(10), col 3:varchar(20)) -> 10:varchar(30), StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: StringGroupConcatColCol(col 0:string, col 1:string) -> 11:string, StringGroupConcatColCol(col 2:varchar(10), col 3:varchar(20)) -> 12:varchar(30)) -> 13:boolean Statistics: Num rows: 1 Data size: 302 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 302 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 302 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [0, 1, 2, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [string, string, string, string, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select concat(c1, c2), concat(c3, c4), concat(c1, c2) = concat(c3, c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select concat(c1, c2), concat(c3, c4), concat(c1, c2) = concat(c3, c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### 238val_238 238val_238 true PREHOOK: query: explain vectorization detail select upper(c2), upper(c4), upper(c2) = upper(c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select upper(c2), upper(c4), upper(c2) = upper(c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: upper(c2) (type: string), upper(c4) (type: varchar(20)), (upper(c2) = CAST( upper(c4) AS STRING)) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: StringUpper(col 1:string) -> 9:string, StringUpper(col 3:varchar(20)) -> 10:varchar(20), StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: StringUpper(col 1:string) -> 11:string, StringUpper(col 3:varchar(20)) -> 12:varchar(20)) -> 13:boolean Statistics: Num rows: 1 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [string, string, string, string, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select upper(c2), upper(c4), upper(c2) = upper(c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select upper(c2), upper(c4), upper(c2) = upper(c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### VAL_238 VAL_238 true PREHOOK: query: explain vectorization detail select lower(c2), lower(c4), lower(c2) = lower(c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select lower(c2), lower(c4), lower(c2) = lower(c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: lower(c2) (type: string), lower(c4) (type: varchar(20)), (lower(c2) = CAST( lower(c4) AS STRING)) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: StringLower(col 1:string) -> 9:string, StringLower(col 3:varchar(20)) -> 10:varchar(20), StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: StringLower(col 1:string) -> 11:string, StringLower(col 3:varchar(20)) -> 12:varchar(20)) -> 13:boolean Statistics: Num rows: 1 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [string, string, string, string, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select lower(c2), lower(c4), lower(c2) = lower(c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select lower(c2), lower(c4), lower(c2) = lower(c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### val_238 val_238 true PREHOOK: query: explain vectorization detail select ascii(c2), ascii(c4), ascii(c2) = ascii(c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select ascii(c2), ascii(c4), ascii(c2) = ascii(c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: ascii(c2) (type: int), ascii(c4) (type: int), (ascii(c2) = ascii(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: VectorUDFAdaptor(ascii(c2)) -> 9:int, VectorUDFAdaptor(ascii(c4)) -> 10:int, LongColEqualLongColumn(col 11:int, col 12:int)(children: VectorUDFAdaptor(ascii(c2)) -> 11:int, VectorUDFAdaptor(ascii(c4)) -> 12:int) -> 13:boolean Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [bigint, bigint, bigint, bigint, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select ascii(c2), ascii(c4), ascii(c2) = ascii(c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select ascii(c2), ascii(c4), ascii(c2) = ascii(c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### 118 118 true PREHOOK: query: explain vectorization detail select concat_ws('|', c1, c2), concat_ws('|', c3, c4), concat_ws('|', c1, c2) = concat_ws('|', c3, c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select concat_ws('|', c1, c2), concat_ws('|', c3, c4), concat_ws('|', c1, c2) = concat_ws('|', c3, c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: concat_ws('|', c1, c2) (type: string), concat_ws('|', c3, c4) (type: string), (concat_ws('|', c1, c2) = concat_ws('|', c3, c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: VectorUDFAdaptor(concat_ws('|', c1, c2)) -> 9:string, VectorUDFAdaptor(concat_ws('|', c3, c4)) -> 10:string, StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: VectorUDFAdaptor(concat_ws('|', c1, c2)) -> 11:string, VectorUDFAdaptor(concat_ws('|', c3, c4)) -> 12:string) -> 13:boolean Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [0, 1, 2, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [string, string, string, string, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select concat_ws('|', c1, c2), concat_ws('|', c3, c4), concat_ws('|', c1, c2) = concat_ws('|', c3, c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select concat_ws('|', c1, c2), concat_ws('|', c3, c4), concat_ws('|', c1, c2) = concat_ws('|', c3, c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### 238|val_238 238|val_238 true PREHOOK: query: explain vectorization detail select decode(encode(c2, 'US-ASCII'), 'US-ASCII'), decode(encode(c4, 'US-ASCII'), 'US-ASCII'), decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 'US-ASCII') from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select decode(encode(c2, 'US-ASCII'), 'US-ASCII'), decode(encode(c4, 'US-ASCII'), 'US-ASCII'), decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 'US-ASCII') from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: decode(encode(c2,'US-ASCII'),'US-ASCII') (type: string), decode(encode(c4,'US-ASCII'),'US-ASCII') (type: string), (decode(encode(c2,'US-ASCII'),'US-ASCII') = decode(encode(c4,'US-ASCII'),'US-ASCII')) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [10, 12, 17] selectExpressions: VectorUDFAdaptor(decode(encode(c2,'US-ASCII'),'US-ASCII'))(children: VectorUDFAdaptor(encode(c2,'US-ASCII')) -> 9:binary) -> 10:string, VectorUDFAdaptor(decode(encode(c4,'US-ASCII'),'US-ASCII'))(children: VectorUDFAdaptor(encode(c4,'US-ASCII')) -> 11:binary) -> 12:string, StringGroupColEqualStringGroupColumn(col 14:string, col 16:string)(children: VectorUDFAdaptor(decode(encode(c2,'US-ASCII'),'US-ASCII'))(children: VectorUDFAdaptor(encode(c2,'US-ASCII')) -> 13:binary) -> 14:string, VectorUDFAdaptor(decode(encode(c4,'US-ASCII'),'US-ASCII'))(children: VectorUDFAdaptor(encode(c4,'US-ASCII')) -> 15:binary) -> 16:string) -> 17:boolean Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [string, string, string, string, string, string, string, string, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select decode(encode(c2, 'US-ASCII'), 'US-ASCII'), decode(encode(c4, 'US-ASCII'), 'US-ASCII'), decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 'US-ASCII') from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select decode(encode(c2, 'US-ASCII'), 'US-ASCII'), decode(encode(c4, 'US-ASCII'), 'US-ASCII'), decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 'US-ASCII') from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### val_238 val_238 true PREHOOK: query: explain vectorization detail select instr(c2, '_'), instr(c4, '_'), instr(c2, '_') = instr(c4, '_') from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select instr(c2, '_'), instr(c4, '_'), instr(c2, '_') = instr(c4, '_') from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: instr(c2, '_') (type: int), instr(c4, '_') (type: int), (instr(c2, '_') = instr(c4, '_')) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: VectorUDFAdaptor(instr(c2, '_')) -> 9:int, VectorUDFAdaptor(instr(c4, '_')) -> 10:int, LongColEqualLongColumn(col 11:int, col 12:int)(children: VectorUDFAdaptor(instr(c2, '_')) -> 11:int, VectorUDFAdaptor(instr(c4, '_')) -> 12:int) -> 13:boolean Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [bigint, bigint, bigint, bigint, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select instr(c2, '_'), instr(c4, '_'), instr(c2, '_') = instr(c4, '_') from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select instr(c2, '_'), instr(c4, '_'), instr(c2, '_') = instr(c4, '_') from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### 4 4 true PREHOOK: query: explain vectorization detail select replace(c1, '_', c2), replace(c3, '_', c4), replace(c1, '_', c2) = replace(c3, '_', c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select replace(c1, '_', c2), replace(c3, '_', c4), replace(c1, '_', c2) = replace(c3, '_', c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: replace(c1, '_', c2) (type: string), replace(c3, '_', c4) (type: string), (replace(c1, '_', c2) = replace(c3, '_', c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: VectorUDFAdaptor(replace(c1, '_', c2)) -> 9:string, VectorUDFAdaptor(replace(c3, '_', c4)) -> 10:string, StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: VectorUDFAdaptor(replace(c1, '_', c2)) -> 11:string, VectorUDFAdaptor(replace(c3, '_', c4)) -> 12:string) -> 13:boolean Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [0, 1, 2, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [string, string, string, string, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select replace(c1, '_', c2), replace(c3, '_', c4), replace(c1, '_', c2) = replace(c3, '_', c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select replace(c1, '_', c2), replace(c3, '_', c4), replace(c1, '_', c2) = replace(c3, '_', c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### 238 238 true PREHOOK: query: explain vectorization detail select reverse(c2), reverse(c4), reverse(c2) = reverse(c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select reverse(c2), reverse(c4), reverse(c2) = reverse(c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: reverse(c2) (type: string), reverse(c4) (type: string), (reverse(c2) = reverse(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: VectorUDFAdaptor(reverse(c2)) -> 9:string, VectorUDFAdaptor(reverse(c4)) -> 10:string, StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: VectorUDFAdaptor(reverse(c2)) -> 11:string, VectorUDFAdaptor(reverse(c4)) -> 12:string) -> 13:boolean Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [string, string, string, string, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select reverse(c2), reverse(c4), reverse(c2) = reverse(c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select reverse(c2), reverse(c4), reverse(c2) = reverse(c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### 832_lav 832_lav true PREHOOK: query: explain vectorization detail select next_day(d1, 'TU'), next_day(d4, 'WE'), next_day(d1, 'TU') = next_day(d4, 'WE') from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select next_day(d1, 'TU'), next_day(d4, 'WE'), next_day(d1, 'TU') = next_day(d4, 'WE') from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: next_day(d1, 'TU') (type: string), next_day(d4, 'WE') (type: string), (next_day(d1, 'TU') = next_day(d4, 'WE')) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: VectorUDFAdaptor(next_day(d1, 'TU')) -> 9:string, VectorUDFAdaptor(next_day(d4, 'WE')) -> 10:string, StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: VectorUDFAdaptor(next_day(d1, 'TU')) -> 11:string, VectorUDFAdaptor(next_day(d4, 'WE')) -> 12:string) -> 13:boolean Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [4, 7] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [string, string, string, string, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select next_day(d1, 'TU'), next_day(d4, 'WE'), next_day(d1, 'TU') = next_day(d4, 'WE') from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select next_day(d1, 'TU'), next_day(d4, 'WE'), next_day(d1, 'TU') = next_day(d4, 'WE') from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### 2015-01-20 2017-01-18 false PREHOOK: query: explain vectorization detail select months_between(d1, d3), months_between(d2, d4), months_between(d1, d3) = months_between(d2, d4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select months_between(d1, d3), months_between(d2, d4), months_between(d1, d3) = months_between(d2, d4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: months_between(d1, d3) (type: double), months_between(d2, d4) (type: double), (months_between(d1, d3) = months_between(d2, d4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: VectorUDFAdaptor(months_between(d1, d3)) -> 9:double, VectorUDFAdaptor(months_between(d2, d4)) -> 10:double, DoubleColEqualDoubleColumn(col 11:double, col 12:double)(children: VectorUDFAdaptor(months_between(d1, d3)) -> 11:double, VectorUDFAdaptor(months_between(d2, d4)) -> 12:double) -> 13:boolean Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [4, 5, 6, 7] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [double, double, double, double, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select months_between(d1, d3), months_between(d2, d4), months_between(d1, d3) = months_between(d2, d4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select months_between(d1, d3), months_between(d2, d4), months_between(d1, d3) = months_between(d2, d4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### -23.90322581 -23.90322581 true PREHOOK: query: explain vectorization detail select length(c2), length(c4), length(c2) = length(c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select length(c2), length(c4), length(c2) = length(c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: length(c2) (type: int), length(c4) (type: int), (length(c2) = length(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: StringLength(col 1:string) -> 9:int, StringLength(col 3:varchar(20)) -> 10:int, LongColEqualLongColumn(col 11:int, col 12:int)(children: StringLength(col 1:string) -> 11:int, StringLength(col 3:varchar(20)) -> 12:int) -> 13:boolean Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [bigint, bigint, bigint, bigint, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select length(c2), length(c4), length(c2) = length(c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select length(c2), length(c4), length(c2) = length(c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### 7 7 true PREHOOK: query: explain vectorization detail select locate('a', 'abcdabcd', 3), locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3), locate('a', 'abcdabcd', 3) = locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select locate('a', 'abcdabcd', 3), locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3), locate('a', 'abcdabcd', 3) = locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: 5 (type: int), 5 (type: int), true (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 11] selectExpressions: ConstantVectorExpression(val 5) -> 9:int, ConstantVectorExpression(val 5) -> 10:int, ConstantVectorExpression(val 1) -> 11:boolean Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [bigint, bigint, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select locate('a', 'abcdabcd', 3), locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3), locate('a', 'abcdabcd', 3) = locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select locate('a', 'abcdabcd', 3), locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3), locate('a', 'abcdabcd', 3) = locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### 5 5 true PREHOOK: query: explain vectorization detail select lpad(c2, 15, ' '), lpad(c4, 15, ' '), lpad(c2, 15, ' ') = lpad(c4, 15, ' ') from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select lpad(c2, 15, ' '), lpad(c4, 15, ' '), lpad(c2, 15, ' ') = lpad(c4, 15, ' ') from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: lpad(c2, 15, ' ') (type: string), lpad(c4, 15, ' ') (type: string), (lpad(c2, 15, ' ') = lpad(c4, 15, ' ')) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: VectorUDFAdaptor(lpad(c2, 15, ' ')) -> 9:string, VectorUDFAdaptor(lpad(c4, 15, ' ')) -> 10:string, StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: VectorUDFAdaptor(lpad(c2, 15, ' ')) -> 11:string, VectorUDFAdaptor(lpad(c4, 15, ' ')) -> 12:string) -> 13:boolean Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [string, string, string, string, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select lpad(c2, 15, ' '), lpad(c4, 15, ' '), lpad(c2, 15, ' ') = lpad(c4, 15, ' ') from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select lpad(c2, 15, ' '), lpad(c4, 15, ' '), lpad(c2, 15, ' ') = lpad(c4, 15, ' ') from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### val_238 val_238 true PREHOOK: query: explain vectorization detail select ltrim(c2), ltrim(c4), ltrim(c2) = ltrim(c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select ltrim(c2), ltrim(c4), ltrim(c2) = ltrim(c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: ltrim(c2) (type: string), ltrim(c4) (type: string), (ltrim(c2) = ltrim(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: StringLTrim(col 1:string) -> 9:string, StringLTrim(col 3:varchar(20)) -> 10:string, StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: StringLTrim(col 1:string) -> 11:string, StringLTrim(col 3:varchar(20)) -> 12:string) -> 13:boolean Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [string, string, string, string, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select ltrim(c2), ltrim(c4), ltrim(c2) = ltrim(c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select ltrim(c2), ltrim(c4), ltrim(c2) = ltrim(c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### val_238 val_238 true PREHOOK: query: explain vectorization detail select c2 regexp 'val', c4 regexp 'val', (c2 regexp 'val') = (c4 regexp 'val') from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select c2 regexp 'val', c4 regexp 'val', (c2 regexp 'val') = (c4 regexp 'val') from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: c2 regexp 'val' (type: boolean), c4 regexp 'val' (type: boolean), (c2 regexp 'val' = c4 regexp 'val') (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: VectorUDFAdaptor(c2 regexp 'val') -> 9:boolean, VectorUDFAdaptor(c4 regexp 'val') -> 10:boolean, LongColEqualLongColumn(col 11:boolean, col 12:boolean)(children: VectorUDFAdaptor(c2 regexp 'val') -> 11:boolean, VectorUDFAdaptor(c4 regexp 'val') -> 12:boolean) -> 13:boolean Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [bigint, bigint, bigint, bigint, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select c2 regexp 'val', c4 regexp 'val', (c2 regexp 'val') = (c4 regexp 'val') from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select c2 regexp 'val', c4 regexp 'val', (c2 regexp 'val') = (c4 regexp 'val') from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### true true true PREHOOK: query: explain vectorization detail select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: regexp_extract(c2, 'val_([0-9]+)', 1) (type: string), regexp_extract(c4, 'val_([0-9]+)', 1) (type: string), (regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1)) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: VectorUDFAdaptor(regexp_extract(c2, 'val_([0-9]+)', 1)) -> 9:string, VectorUDFAdaptor(regexp_extract(c4, 'val_([0-9]+)', 1)) -> 10:string, StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: VectorUDFAdaptor(regexp_extract(c2, 'val_([0-9]+)', 1)) -> 11:string, VectorUDFAdaptor(regexp_extract(c4, 'val_([0-9]+)', 1)) -> 12:string) -> 13:boolean Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [string, string, string, string, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### 238 238 true PREHOOK: query: explain vectorization detail select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: regexp_replace(c2, 'val', 'replaced') (type: string), regexp_replace(c4, 'val', 'replaced') (type: string), (regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced')) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: VectorUDFAdaptor(regexp_replace(c2, 'val', 'replaced')) -> 9:string, VectorUDFAdaptor(regexp_replace(c4, 'val', 'replaced')) -> 10:string, StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: VectorUDFAdaptor(regexp_replace(c2, 'val', 'replaced')) -> 11:string, VectorUDFAdaptor(regexp_replace(c4, 'val', 'replaced')) -> 12:string) -> 13:boolean Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [string, string, string, string, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### replaced_238 replaced_238 true PREHOOK: query: explain vectorization detail select reverse(c2), reverse(c4), reverse(c2) = reverse(c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select reverse(c2), reverse(c4), reverse(c2) = reverse(c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: reverse(c2) (type: string), reverse(c4) (type: string), (reverse(c2) = reverse(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: VectorUDFAdaptor(reverse(c2)) -> 9:string, VectorUDFAdaptor(reverse(c4)) -> 10:string, StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: VectorUDFAdaptor(reverse(c2)) -> 11:string, VectorUDFAdaptor(reverse(c4)) -> 12:string) -> 13:boolean Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [string, string, string, string, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select reverse(c2), reverse(c4), reverse(c2) = reverse(c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select reverse(c2), reverse(c4), reverse(c2) = reverse(c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### 832_lav 832_lav true PREHOOK: query: explain vectorization detail select rpad(c2, 15, ' '), rpad(c4, 15, ' '), rpad(c2, 15, ' ') = rpad(c4, 15, ' ') from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select rpad(c2, 15, ' '), rpad(c4, 15, ' '), rpad(c2, 15, ' ') = rpad(c4, 15, ' ') from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: rpad(c2, 15, ' ') (type: string), rpad(c4, 15, ' ') (type: string), (rpad(c2, 15, ' ') = rpad(c4, 15, ' ')) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: VectorUDFAdaptor(rpad(c2, 15, ' ')) -> 9:string, VectorUDFAdaptor(rpad(c4, 15, ' ')) -> 10:string, StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: VectorUDFAdaptor(rpad(c2, 15, ' ')) -> 11:string, VectorUDFAdaptor(rpad(c4, 15, ' ')) -> 12:string) -> 13:boolean Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [string, string, string, string, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select rpad(c2, 15, ' '), rpad(c4, 15, ' '), rpad(c2, 15, ' ') = rpad(c4, 15, ' ') from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select rpad(c2, 15, ' '), rpad(c4, 15, ' '), rpad(c2, 15, ' ') = rpad(c4, 15, ' ') from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### val_238 val_238 true PREHOOK: query: explain vectorization detail select rtrim(c2), rtrim(c4), rtrim(c2) = rtrim(c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select rtrim(c2), rtrim(c4), rtrim(c2) = rtrim(c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: rtrim(c2) (type: string), rtrim(c4) (type: string), (rtrim(c2) = rtrim(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: StringRTrim(col 1:string) -> 9:string, StringRTrim(col 3:varchar(20)) -> 10:string, StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: StringRTrim(col 1:string) -> 11:string, StringRTrim(col 3:varchar(20)) -> 12:string) -> 13:boolean Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [string, string, string, string, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select rtrim(c2), rtrim(c4), rtrim(c2) = rtrim(c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select rtrim(c2), rtrim(c4), rtrim(c2) = rtrim(c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### val_238 val_238 true PREHOOK: query: explain vectorization detail select sentences('See spot run. See jane run.'), sentences(cast('See spot run. See jane run.' as varchar(50))) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select sentences('See spot run. See jane run.'), sentences(cast('See spot run. See jane run.' as varchar(50))) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: sentences('See spot run. See jane run.') (type: array>), sentences('See spot run. See jane run.') (type: array>) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10] selectExpressions: VectorUDFAdaptor(sentences('See spot run. See jane run.')) -> 9:array>, VectorUDFAdaptor(sentences('See spot run. See jane run.')) -> 10:array> Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [array>, array>] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select sentences('See spot run. See jane run.'), sentences(cast('See spot run. See jane run.' as varchar(50))) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select sentences('See spot run. See jane run.'), sentences(cast('See spot run. See jane run.' as varchar(50))) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### [["See","spot","run"],["See","jane","run"]] [["See","spot","run"],["See","jane","run"]] PREHOOK: query: explain vectorization detail select split(c2, '_'), split(c4, '_') from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select split(c2, '_'), split(c4, '_') from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: split(c2, '_') (type: array), split(c4, '_') (type: array) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10] selectExpressions: VectorUDFAdaptor(split(c2, '_')) -> 9:array, VectorUDFAdaptor(split(c4, '_')) -> 10:array Statistics: Num rows: 1 Data size: 3840 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 3840 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 3840 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [array, array] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select split(c2, '_'), split(c4, '_') from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select split(c2, '_'), split(c4, '_') from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### ["val","238"] ["val","238"] PREHOOK: query: explain vectorization detail select str_to_map('a:1,b:2,c:3',',',':'), str_to_map(cast('a:1,b:2,c:3' as varchar(20)),',',':') from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select str_to_map('a:1,b:2,c:3',',',':'), str_to_map(cast('a:1,b:2,c:3' as varchar(20)),',',':') from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: str_to_map('a:1,b:2,c:3',',',':') (type: map), str_to_map('a:1,b:2,c:3',',',':') (type: map) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10] selectExpressions: VectorUDFAdaptor(str_to_map('a:1,b:2,c:3',',',':')) -> 9:map, VectorUDFAdaptor(str_to_map('a:1,b:2,c:3',',',':')) -> 10:map Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [map, map] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select str_to_map('a:1,b:2,c:3',',',':'), str_to_map(cast('a:1,b:2,c:3' as varchar(20)),',',':') from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select str_to_map('a:1,b:2,c:3',',',':'), str_to_map(cast('a:1,b:2,c:3' as varchar(20)),',',':') from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### {"a":"1","b":"2","c":"3"} {"a":"1","b":"2","c":"3"} PREHOOK: query: explain vectorization detail select substr(c2, 1, 3), substr(c4, 1, 3), substr(c2, 1, 3) = substr(c4, 1, 3) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select substr(c2, 1, 3), substr(c4, 1, 3), substr(c2, 1, 3) = substr(c4, 1, 3) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: substr(c2, 1, 3) (type: string), substr(c4, 1, 3) (type: string), (substr(c2, 1, 3) = substr(c4, 1, 3)) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: StringSubstrColStartLen(col 1:string, start 0, length 3) -> 9:string, StringSubstrColStartLen(col 3:varchar(20), start 0, length 3) -> 10:string, StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: StringSubstrColStartLen(col 1:string, start 0, length 3) -> 11:string, StringSubstrColStartLen(col 3:varchar(20), start 0, length 3) -> 12:string) -> 13:boolean Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [string, string, string, string, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select substr(c2, 1, 3), substr(c4, 1, 3), substr(c2, 1, 3) = substr(c4, 1, 3) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select substr(c2, 1, 3), substr(c4, 1, 3), substr(c2, 1, 3) = substr(c4, 1, 3) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### val val true PREHOOK: query: explain vectorization detail select trim(c2), trim(c4), trim(c2) = trim(c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select trim(c2), trim(c4), trim(c2) = trim(c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: trim(c2) (type: string), trim(c4) (type: string), (trim(c2) = trim(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: StringTrim(col 1:string) -> 9:string, StringTrim(col 3:varchar(20)) -> 10:string, StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: StringTrim(col 1:string) -> 11:string, StringTrim(col 3:varchar(20)) -> 12:string) -> 13:boolean Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [string, string, string, string, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select trim(c2), trim(c4), trim(c2) = trim(c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select trim(c2), trim(c4), trim(c2) = trim(c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### val_238 val_238 true PREHOOK: query: explain vectorization detail select compute_stats(c2, 16), compute_stats(c4, 16) from varchar_udf_1_n2 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select compute_stats(c2, 16), compute_stats(c4, 16) from varchar_udf_1_n2 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c2 (type: string), c4 (type: varchar(20)) outputColumnNames: _col0, _col2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(_col0, 16), compute_stats(_col2, 16) minReductionHashAggr: 0.0 mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported vectorized: false Reducer 2 Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported vectorized: false Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink PREHOOK: query: select compute_stats(c2, 'fm', 16), compute_stats(c4, 'fm', 16) from varchar_udf_1_n2 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select compute_stats(c2, 'fm', 16), compute_stats(c4, 'fm', 16) from varchar_udf_1_n2 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":FM} {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":FM} PREHOOK: query: explain vectorization detail select min(c2), min(c4) from varchar_udf_1_n2 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select min(c2), min(c4) from varchar_udf_1_n2 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: c2 (type: string), c4 (type: varchar(20)) outputColumnNames: c2, c4 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 3] Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(c2), min(c4) Group By Vectorization: aggregators: VectorUDAFMinString(col 1:string) -> string, VectorUDAFMinString(col 3:varchar(20)) -> varchar(20) className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] minReductionHashAggr: 0.0 mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 0:string, 1:varchar(20) Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: varchar(20)) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true reduceColumnNullOrder: reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 2 dataColumns: VALUE._col0:string, VALUE._col1:varchar(20) partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), min(VALUE._col1) Group By Vectorization: aggregators: VectorUDAFMinString(col 0:string) -> string, VectorUDAFMinString(col 1:varchar(20)) -> varchar(20) className: VectorGroupByOperator groupByMode: MERGEPARTIAL native: false vectorProcessingMode: GLOBAL projectedOutputColumnNums: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink PREHOOK: query: select min(c2), min(c4) from varchar_udf_1_n2 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select min(c2), min(c4) from varchar_udf_1_n2 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### val_238 val_238 PREHOOK: query: explain vectorization detail select max(c2), max(c4) from varchar_udf_1_n2 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select max(c2), max(c4) from varchar_udf_1_n2 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: c2 (type: string), c4 (type: varchar(20)) outputColumnNames: c2, c4 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 3] Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(c2), max(c4) Group By Vectorization: aggregators: VectorUDAFMaxString(col 1:string) -> string, VectorUDAFMaxString(col 3:varchar(20)) -> varchar(20) className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] minReductionHashAggr: 0.0 mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 0:string, 1:varchar(20) Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: varchar(20)) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true reduceColumnNullOrder: reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 2 dataColumns: VALUE._col0:string, VALUE._col1:varchar(20) partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), max(VALUE._col1) Group By Vectorization: aggregators: VectorUDAFMaxString(col 0:string) -> string, VectorUDAFMaxString(col 1:varchar(20)) -> varchar(20) className: VectorGroupByOperator groupByMode: MERGEPARTIAL native: false vectorProcessingMode: GLOBAL projectedOutputColumnNums: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink PREHOOK: query: select max(c2), max(c4) from varchar_udf_1_n2 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select max(c2), max(c4) from varchar_udf_1_n2 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### val_238 val_238 PREHOOK: query: drop table varchar_udf_1_n2 PREHOOK: type: DROPTABLE PREHOOK: Input: default@varchar_udf_1_n2 PREHOOK: Output: default@varchar_udf_1_n2 POSTHOOK: query: drop table varchar_udf_1_n2 POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@varchar_udf_1_n2 POSTHOOK: Output: default@varchar_udf_1_n2 +PREHOOK: query: drop table varchar_udf_1_n2 PREHOOK: type: DROPTABLE POSTHOOK: query: drop table varchar_udf_1_n2 POSTHOOK: type: DROPTABLE PREHOOK: query: create table varchar_udf_1_n2 (c1 string, c2 string, c3 varchar(10), c4 varchar(20), d1 string, d2 string, d3 varchar(10), d4 varchar(10)) STORED AS ORC PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@varchar_udf_1_n2 POSTHOOK: query: create table varchar_udf_1_n2 (c1 string, c2 string, c3 varchar(10), c4 varchar(20), d1 string, d2 string, d3 varchar(10), d4 varchar(10)) STORED AS ORC POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default POSTHOOK: Output: default@varchar_udf_1_n2 PREHOOK: query: insert overwrite table varchar_udf_1_n2 select key, value, key, value, '2015-01-14', '2015-01-14', '2017-01-11', '2017-01-11' from src where key = '238' limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@src PREHOOK: Output: default@varchar_udf_1_n2 POSTHOOK: query: insert overwrite table varchar_udf_1_n2 select key, value, key, value, '2015-01-14', '2015-01-14', '2017-01-11', '2017-01-11' from src where key = '238' limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@src POSTHOOK: Output: default@varchar_udf_1_n2 POSTHOOK: Lineage: varchar_udf_1_n2.c1 SIMPLE [] POSTHOOK: Lineage: varchar_udf_1_n2.c2 SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: varchar_udf_1_n2.c3 EXPRESSION [] POSTHOOK: Lineage: varchar_udf_1_n2.c4 EXPRESSION [(src)src.FieldSchema(name:value, type:string, comment:default), ] POSTHOOK: Lineage: varchar_udf_1_n2.d1 SIMPLE [] POSTHOOK: Lineage: varchar_udf_1_n2.d2 SIMPLE [] POSTHOOK: Lineage: varchar_udf_1_n2.d3 EXPRESSION [] POSTHOOK: Lineage: varchar_udf_1_n2.d4 EXPRESSION [] PREHOOK: query: explain vectorization detail select concat(c1, c2), concat(c3, c4), concat(c1, c2) = concat(c3, c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select concat(c1, c2), concat(c3, c4), concat(c1, c2) = concat(c3, c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: concat(c1, c2) (type: string), concat(c3, c4) (type: varchar(30)), (concat(c1, c2) = CAST( concat(c3, c4) AS STRING)) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: StringGroupConcatColCol(col 0:string, col 1:string) -> 9:string, StringGroupConcatColCol(col 2:varchar(10), col 3:varchar(20)) -> 10:varchar(30), StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: StringGroupConcatColCol(col 0:string, col 1:string) -> 11:string, StringGroupConcatColCol(col 2:varchar(10), col 3:varchar(20)) -> 12:varchar(30)) -> 13:boolean Statistics: Num rows: 1 Data size: 302 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 302 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 302 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [0, 1, 2, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [string, string, string, string, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select concat(c1, c2), concat(c3, c4), concat(c1, c2) = concat(c3, c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select concat(c1, c2), concat(c3, c4), concat(c1, c2) = concat(c3, c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### 238val_238 238val_238 true PREHOOK: query: explain vectorization detail select upper(c2), upper(c4), upper(c2) = upper(c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select upper(c2), upper(c4), upper(c2) = upper(c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: upper(c2) (type: string), upper(c4) (type: varchar(20)), (upper(c2) = CAST( upper(c4) AS STRING)) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: StringUpper(col 1:string) -> 9:string, StringUpper(col 3:varchar(20)) -> 10:varchar(20), StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: StringUpper(col 1:string) -> 11:string, StringUpper(col 3:varchar(20)) -> 12:varchar(20)) -> 13:boolean Statistics: Num rows: 1 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [string, string, string, string, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select upper(c2), upper(c4), upper(c2) = upper(c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select upper(c2), upper(c4), upper(c2) = upper(c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### VAL_238 VAL_238 true PREHOOK: query: explain vectorization detail select lower(c2), lower(c4), lower(c2) = lower(c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select lower(c2), lower(c4), lower(c2) = lower(c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: lower(c2) (type: string), lower(c4) (type: varchar(20)), (lower(c2) = CAST( lower(c4) AS STRING)) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: StringLower(col 1:string) -> 9:string, StringLower(col 3:varchar(20)) -> 10:varchar(20), StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: StringLower(col 1:string) -> 11:string, StringLower(col 3:varchar(20)) -> 12:varchar(20)) -> 13:boolean Statistics: Num rows: 1 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 292 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [string, string, string, string, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select lower(c2), lower(c4), lower(c2) = lower(c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select lower(c2), lower(c4), lower(c2) = lower(c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### val_238 val_238 true PREHOOK: query: explain vectorization detail select ascii(c2), ascii(c4), ascii(c2) = ascii(c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select ascii(c2), ascii(c4), ascii(c2) = ascii(c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: ascii(c2) (type: int), ascii(c4) (type: int), (ascii(c2) = ascii(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: VectorUDFAdaptor(ascii(c2)) -> 9:int, VectorUDFAdaptor(ascii(c4)) -> 10:int, LongColEqualLongColumn(col 11:int, col 12:int)(children: VectorUDFAdaptor(ascii(c2)) -> 11:int, VectorUDFAdaptor(ascii(c4)) -> 12:int) -> 13:boolean Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [bigint, bigint, bigint, bigint, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select ascii(c2), ascii(c4), ascii(c2) = ascii(c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select ascii(c2), ascii(c4), ascii(c2) = ascii(c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### 118 118 true PREHOOK: query: explain vectorization detail select concat_ws('|', c1, c2), concat_ws('|', c3, c4), concat_ws('|', c1, c2) = concat_ws('|', c3, c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select concat_ws('|', c1, c2), concat_ws('|', c3, c4), concat_ws('|', c1, c2) = concat_ws('|', c3, c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: concat_ws('|', c1, c2) (type: string), concat_ws('|', c3, c4) (type: string), (concat_ws('|', c1, c2) = concat_ws('|', c3, c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: VectorUDFAdaptor(concat_ws('|', c1, c2)) -> 9:string, VectorUDFAdaptor(concat_ws('|', c3, c4)) -> 10:string, StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: VectorUDFAdaptor(concat_ws('|', c1, c2)) -> 11:string, VectorUDFAdaptor(concat_ws('|', c3, c4)) -> 12:string) -> 13:boolean Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [0, 1, 2, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [string, string, string, string, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select concat_ws('|', c1, c2), concat_ws('|', c3, c4), concat_ws('|', c1, c2) = concat_ws('|', c3, c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select concat_ws('|', c1, c2), concat_ws('|', c3, c4), concat_ws('|', c1, c2) = concat_ws('|', c3, c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### 238|val_238 238|val_238 true PREHOOK: query: explain vectorization detail select decode(encode(c2, 'US-ASCII'), 'US-ASCII'), decode(encode(c4, 'US-ASCII'), 'US-ASCII'), decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 'US-ASCII') from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select decode(encode(c2, 'US-ASCII'), 'US-ASCII'), decode(encode(c4, 'US-ASCII'), 'US-ASCII'), decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 'US-ASCII') from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: decode(encode(c2,'US-ASCII'),'US-ASCII') (type: string), decode(encode(c4,'US-ASCII'),'US-ASCII') (type: string), (decode(encode(c2,'US-ASCII'),'US-ASCII') = decode(encode(c4,'US-ASCII'),'US-ASCII')) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [10, 12, 17] selectExpressions: VectorUDFAdaptor(decode(encode(c2,'US-ASCII'),'US-ASCII'))(children: VectorUDFAdaptor(encode(c2,'US-ASCII')) -> 9:binary) -> 10:string, VectorUDFAdaptor(decode(encode(c4,'US-ASCII'),'US-ASCII'))(children: VectorUDFAdaptor(encode(c4,'US-ASCII')) -> 11:binary) -> 12:string, StringGroupColEqualStringGroupColumn(col 14:string, col 16:string)(children: VectorUDFAdaptor(decode(encode(c2,'US-ASCII'),'US-ASCII'))(children: VectorUDFAdaptor(encode(c2,'US-ASCII')) -> 13:binary) -> 14:string, VectorUDFAdaptor(decode(encode(c4,'US-ASCII'),'US-ASCII'))(children: VectorUDFAdaptor(encode(c4,'US-ASCII')) -> 15:binary) -> 16:string) -> 17:boolean Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [string, string, string, string, string, string, string, string, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select decode(encode(c2, 'US-ASCII'), 'US-ASCII'), decode(encode(c4, 'US-ASCII'), 'US-ASCII'), decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 'US-ASCII') from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select decode(encode(c2, 'US-ASCII'), 'US-ASCII'), decode(encode(c4, 'US-ASCII'), 'US-ASCII'), decode(encode(c2, 'US-ASCII'), 'US-ASCII') = decode(encode(c4, 'US-ASCII'), 'US-ASCII') from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### val_238 val_238 true PREHOOK: query: explain vectorization detail select instr(c2, '_'), instr(c4, '_'), instr(c2, '_') = instr(c4, '_') from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select instr(c2, '_'), instr(c4, '_'), instr(c2, '_') = instr(c4, '_') from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: instr(c2, '_') (type: int), instr(c4, '_') (type: int), (instr(c2, '_') = instr(c4, '_')) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: VectorUDFAdaptor(instr(c2, '_')) -> 9:int, VectorUDFAdaptor(instr(c4, '_')) -> 10:int, LongColEqualLongColumn(col 11:int, col 12:int)(children: VectorUDFAdaptor(instr(c2, '_')) -> 11:int, VectorUDFAdaptor(instr(c4, '_')) -> 12:int) -> 13:boolean Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [bigint, bigint, bigint, bigint, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select instr(c2, '_'), instr(c4, '_'), instr(c2, '_') = instr(c4, '_') from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select instr(c2, '_'), instr(c4, '_'), instr(c2, '_') = instr(c4, '_') from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### 4 4 true PREHOOK: query: explain vectorization detail select replace(c1, '_', c2), replace(c3, '_', c4), replace(c1, '_', c2) = replace(c3, '_', c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select replace(c1, '_', c2), replace(c3, '_', c4), replace(c1, '_', c2) = replace(c3, '_', c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 356 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: replace(c1, '_', c2) (type: string), replace(c3, '_', c4) (type: string), (replace(c1, '_', c2) = replace(c3, '_', c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: VectorUDFAdaptor(replace(c1, '_', c2)) -> 9:string, VectorUDFAdaptor(replace(c3, '_', c4)) -> 10:string, StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: VectorUDFAdaptor(replace(c1, '_', c2)) -> 11:string, VectorUDFAdaptor(replace(c3, '_', c4)) -> 12:string) -> 13:boolean Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [0, 1, 2, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [string, string, string, string, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select replace(c1, '_', c2), replace(c3, '_', c4), replace(c1, '_', c2) = replace(c3, '_', c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select replace(c1, '_', c2), replace(c3, '_', c4), replace(c1, '_', c2) = replace(c3, '_', c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### 238 238 true PREHOOK: query: explain vectorization detail select reverse(c2), reverse(c4), reverse(c2) = reverse(c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select reverse(c2), reverse(c4), reverse(c2) = reverse(c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: reverse(c2) (type: string), reverse(c4) (type: string), (reverse(c2) = reverse(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: VectorUDFAdaptor(reverse(c2)) -> 9:string, VectorUDFAdaptor(reverse(c4)) -> 10:string, StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: VectorUDFAdaptor(reverse(c2)) -> 11:string, VectorUDFAdaptor(reverse(c4)) -> 12:string) -> 13:boolean Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [string, string, string, string, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select reverse(c2), reverse(c4), reverse(c2) = reverse(c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select reverse(c2), reverse(c4), reverse(c2) = reverse(c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### 832_lav 832_lav true PREHOOK: query: explain vectorization detail select next_day(d1, 'TU'), next_day(d4, 'WE'), next_day(d1, 'TU') = next_day(d4, 'WE') from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select next_day(d1, 'TU'), next_day(d4, 'WE'), next_day(d1, 'TU') = next_day(d4, 'WE') from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 188 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: next_day(d1, 'TU') (type: string), next_day(d4, 'WE') (type: string), (next_day(d1, 'TU') = next_day(d4, 'WE')) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: VectorUDFAdaptor(next_day(d1, 'TU')) -> 9:string, VectorUDFAdaptor(next_day(d4, 'WE')) -> 10:string, StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: VectorUDFAdaptor(next_day(d1, 'TU')) -> 11:string, VectorUDFAdaptor(next_day(d4, 'WE')) -> 12:string) -> 13:boolean Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [4, 7] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [string, string, string, string, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select next_day(d1, 'TU'), next_day(d4, 'WE'), next_day(d1, 'TU') = next_day(d4, 'WE') from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select next_day(d1, 'TU'), next_day(d4, 'WE'), next_day(d1, 'TU') = next_day(d4, 'WE') from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### 2015-01-20 2017-01-18 false PREHOOK: query: explain vectorization detail select months_between(d1, d3), months_between(d2, d4), months_between(d1, d3) = months_between(d2, d4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select months_between(d1, d3), months_between(d2, d4), months_between(d1, d3) = months_between(d2, d4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 376 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: months_between(d1, d3) (type: double), months_between(d2, d4) (type: double), (months_between(d1, d3) = months_between(d2, d4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: VectorUDFAdaptor(months_between(d1, d3)) -> 9:double, VectorUDFAdaptor(months_between(d2, d4)) -> 10:double, DoubleColEqualDoubleColumn(col 11:double, col 12:double)(children: VectorUDFAdaptor(months_between(d1, d3)) -> 11:double, VectorUDFAdaptor(months_between(d2, d4)) -> 12:double) -> 13:boolean Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 20 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [4, 5, 6, 7] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [double, double, double, double, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select months_between(d1, d3), months_between(d2, d4), months_between(d1, d3) = months_between(d2, d4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select months_between(d1, d3), months_between(d2, d4), months_between(d1, d3) = months_between(d2, d4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### -23.90322581 -23.90322581 true PREHOOK: query: explain vectorization detail select length(c2), length(c4), length(c2) = length(c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select length(c2), length(c4), length(c2) = length(c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: length(c2) (type: int), length(c4) (type: int), (length(c2) = length(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: StringLength(col 1:string) -> 9:int, StringLength(col 3:varchar(20)) -> 10:int, LongColEqualLongColumn(col 11:int, col 12:int)(children: StringLength(col 1:string) -> 11:int, StringLength(col 3:varchar(20)) -> 12:int) -> 13:boolean Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [bigint, bigint, bigint, bigint, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select length(c2), length(c4), length(c2) = length(c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select length(c2), length(c4), length(c2) = length(c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### 7 7 true PREHOOK: query: explain vectorization detail select locate('a', 'abcdabcd', 3), locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3), locate('a', 'abcdabcd', 3) = locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select locate('a', 'abcdabcd', 3), locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3), locate('a', 'abcdabcd', 3) = locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: 5 (type: int), 5 (type: int), true (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 11] selectExpressions: ConstantVectorExpression(val 5) -> 9:int, ConstantVectorExpression(val 5) -> 10:int, ConstantVectorExpression(val 1) -> 11:boolean Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [bigint, bigint, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select locate('a', 'abcdabcd', 3), locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3), locate('a', 'abcdabcd', 3) = locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select locate('a', 'abcdabcd', 3), locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3), locate('a', 'abcdabcd', 3) = locate(cast('a' as varchar(1)), cast('abcdabcd' as varchar(10)), 3) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### 5 5 true PREHOOK: query: explain vectorization detail select lpad(c2, 15, ' '), lpad(c4, 15, ' '), lpad(c2, 15, ' ') = lpad(c4, 15, ' ') from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select lpad(c2, 15, ' '), lpad(c4, 15, ' '), lpad(c2, 15, ' ') = lpad(c4, 15, ' ') from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: lpad(c2, 15, ' ') (type: string), lpad(c4, 15, ' ') (type: string), (lpad(c2, 15, ' ') = lpad(c4, 15, ' ')) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: VectorUDFAdaptor(lpad(c2, 15, ' ')) -> 9:string, VectorUDFAdaptor(lpad(c4, 15, ' ')) -> 10:string, StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: VectorUDFAdaptor(lpad(c2, 15, ' ')) -> 11:string, VectorUDFAdaptor(lpad(c4, 15, ' ')) -> 12:string) -> 13:boolean Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [string, string, string, string, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select lpad(c2, 15, ' '), lpad(c4, 15, ' '), lpad(c2, 15, ' ') = lpad(c4, 15, ' ') from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select lpad(c2, 15, ' '), lpad(c4, 15, ' '), lpad(c2, 15, ' ') = lpad(c4, 15, ' ') from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### val_238 val_238 true PREHOOK: query: explain vectorization detail select ltrim(c2), ltrim(c4), ltrim(c2) = ltrim(c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select ltrim(c2), ltrim(c4), ltrim(c2) = ltrim(c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: ltrim(c2) (type: string), ltrim(c4) (type: string), (ltrim(c2) = ltrim(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: StringLTrim(col 1:string) -> 9:string, StringLTrim(col 3:varchar(20)) -> 10:string, StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: StringLTrim(col 1:string) -> 11:string, StringLTrim(col 3:varchar(20)) -> 12:string) -> 13:boolean Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [string, string, string, string, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select ltrim(c2), ltrim(c4), ltrim(c2) = ltrim(c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select ltrim(c2), ltrim(c4), ltrim(c2) = ltrim(c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### val_238 val_238 true PREHOOK: query: explain vectorization detail select c2 regexp 'val', c4 regexp 'val', (c2 regexp 'val') = (c4 regexp 'val') from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select c2 regexp 'val', c4 regexp 'val', (c2 regexp 'val') = (c4 regexp 'val') from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: c2 regexp 'val' (type: boolean), c4 regexp 'val' (type: boolean), (c2 regexp 'val' = c4 regexp 'val') (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: VectorUDFAdaptor(c2 regexp 'val') -> 9:boolean, VectorUDFAdaptor(c4 regexp 'val') -> 10:boolean, LongColEqualLongColumn(col 11:boolean, col 12:boolean)(children: VectorUDFAdaptor(c2 regexp 'val') -> 11:boolean, VectorUDFAdaptor(c4 regexp 'val') -> 12:boolean) -> 13:boolean Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 12 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [bigint, bigint, bigint, bigint, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select c2 regexp 'val', c4 regexp 'val', (c2 regexp 'val') = (c4 regexp 'val') from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select c2 regexp 'val', c4 regexp 'val', (c2 regexp 'val') = (c4 regexp 'val') from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### true true true PREHOOK: query: explain vectorization detail select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: regexp_extract(c2, 'val_([0-9]+)', 1) (type: string), regexp_extract(c4, 'val_([0-9]+)', 1) (type: string), (regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1)) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: VectorUDFAdaptor(regexp_extract(c2, 'val_([0-9]+)', 1)) -> 9:string, VectorUDFAdaptor(regexp_extract(c4, 'val_([0-9]+)', 1)) -> 10:string, StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: VectorUDFAdaptor(regexp_extract(c2, 'val_([0-9]+)', 1)) -> 11:string, VectorUDFAdaptor(regexp_extract(c4, 'val_([0-9]+)', 1)) -> 12:string) -> 13:boolean Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [string, string, string, string, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select regexp_extract(c2, 'val_([0-9]+)', 1), regexp_extract(c4, 'val_([0-9]+)', 1), regexp_extract(c2, 'val_([0-9]+)', 1) = regexp_extract(c4, 'val_([0-9]+)', 1) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### 238 238 true PREHOOK: query: explain vectorization detail select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: regexp_replace(c2, 'val', 'replaced') (type: string), regexp_replace(c4, 'val', 'replaced') (type: string), (regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced')) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: VectorUDFAdaptor(regexp_replace(c2, 'val', 'replaced')) -> 9:string, VectorUDFAdaptor(regexp_replace(c4, 'val', 'replaced')) -> 10:string, StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: VectorUDFAdaptor(regexp_replace(c2, 'val', 'replaced')) -> 11:string, VectorUDFAdaptor(regexp_replace(c4, 'val', 'replaced')) -> 12:string) -> 13:boolean Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [string, string, string, string, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select regexp_replace(c2, 'val', 'replaced'), regexp_replace(c4, 'val', 'replaced'), regexp_replace(c2, 'val', 'replaced') = regexp_replace(c4, 'val', 'replaced') from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### replaced_238 replaced_238 true PREHOOK: query: explain vectorization detail select reverse(c2), reverse(c4), reverse(c2) = reverse(c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select reverse(c2), reverse(c4), reverse(c2) = reverse(c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: reverse(c2) (type: string), reverse(c4) (type: string), (reverse(c2) = reverse(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: VectorUDFAdaptor(reverse(c2)) -> 9:string, VectorUDFAdaptor(reverse(c4)) -> 10:string, StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: VectorUDFAdaptor(reverse(c2)) -> 11:string, VectorUDFAdaptor(reverse(c4)) -> 12:string) -> 13:boolean Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [string, string, string, string, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select reverse(c2), reverse(c4), reverse(c2) = reverse(c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select reverse(c2), reverse(c4), reverse(c2) = reverse(c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### 832_lav 832_lav true PREHOOK: query: explain vectorization detail select rpad(c2, 15, ' '), rpad(c4, 15, ' '), rpad(c2, 15, ' ') = rpad(c4, 15, ' ') from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select rpad(c2, 15, ' '), rpad(c4, 15, ' '), rpad(c2, 15, ' ') = rpad(c4, 15, ' ') from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: rpad(c2, 15, ' ') (type: string), rpad(c4, 15, ' ') (type: string), (rpad(c2, 15, ' ') = rpad(c4, 15, ' ')) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: VectorUDFAdaptor(rpad(c2, 15, ' ')) -> 9:string, VectorUDFAdaptor(rpad(c4, 15, ' ')) -> 10:string, StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: VectorUDFAdaptor(rpad(c2, 15, ' ')) -> 11:string, VectorUDFAdaptor(rpad(c4, 15, ' ')) -> 12:string) -> 13:boolean Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [string, string, string, string, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select rpad(c2, 15, ' '), rpad(c4, 15, ' '), rpad(c2, 15, ' ') = rpad(c4, 15, ' ') from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select rpad(c2, 15, ' '), rpad(c4, 15, ' '), rpad(c2, 15, ' ') = rpad(c4, 15, ' ') from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### val_238 val_238 true PREHOOK: query: explain vectorization detail select rtrim(c2), rtrim(c4), rtrim(c2) = rtrim(c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select rtrim(c2), rtrim(c4), rtrim(c2) = rtrim(c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: rtrim(c2) (type: string), rtrim(c4) (type: string), (rtrim(c2) = rtrim(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: StringRTrim(col 1:string) -> 9:string, StringRTrim(col 3:varchar(20)) -> 10:string, StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: StringRTrim(col 1:string) -> 11:string, StringRTrim(col 3:varchar(20)) -> 12:string) -> 13:boolean Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [string, string, string, string, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select rtrim(c2), rtrim(c4), rtrim(c2) = rtrim(c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select rtrim(c2), rtrim(c4), rtrim(c2) = rtrim(c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### val_238 val_238 true PREHOOK: query: explain vectorization detail select sentences('See spot run. See jane run.'), sentences(cast('See spot run. See jane run.' as varchar(50))) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select sentences('See spot run. See jane run.'), sentences(cast('See spot run. See jane run.' as varchar(50))) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: sentences('See spot run. See jane run.') (type: array>), sentences('See spot run. See jane run.') (type: array>) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10] selectExpressions: VectorUDFAdaptor(sentences('See spot run. See jane run.')) -> 9:array>, VectorUDFAdaptor(sentences('See spot run. See jane run.')) -> 10:array> Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 112 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [array>, array>] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select sentences('See spot run. See jane run.'), sentences(cast('See spot run. See jane run.' as varchar(50))) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select sentences('See spot run. See jane run.'), sentences(cast('See spot run. See jane run.' as varchar(50))) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### [["See","spot","run"],["See","jane","run"]] [["See","spot","run"],["See","jane","run"]] PREHOOK: query: explain vectorization detail select split(c2, '_'), split(c4, '_') from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select split(c2, '_'), split(c4, '_') from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: split(c2, '_') (type: array), split(c4, '_') (type: array) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10] selectExpressions: VectorUDFAdaptor(split(c2, '_')) -> 9:array, VectorUDFAdaptor(split(c4, '_')) -> 10:array Statistics: Num rows: 1 Data size: 3840 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 3840 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 3840 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [array, array] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select split(c2, '_'), split(c4, '_') from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select split(c2, '_'), split(c4, '_') from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### ["val","238"] ["val","238"] PREHOOK: query: explain vectorization detail select str_to_map('a:1,b:2,c:3',',',':'), str_to_map(cast('a:1,b:2,c:3' as varchar(20)),',',':') from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select str_to_map('a:1,b:2,c:3',',',':'), str_to_map(cast('a:1,b:2,c:3' as varchar(20)),',',':') from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 732 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: str_to_map('a:1,b:2,c:3',',',':') (type: map), str_to_map('a:1,b:2,c:3',',',':') (type: map) outputColumnNames: _col0, _col1 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10] selectExpressions: VectorUDFAdaptor(str_to_map('a:1,b:2,c:3',',',':')) -> 9:map, VectorUDFAdaptor(str_to_map('a:1,b:2,c:3',',',':')) -> 10:map Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 1508 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: true vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [map, map] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select str_to_map('a:1,b:2,c:3',',',':'), str_to_map(cast('a:1,b:2,c:3' as varchar(20)),',',':') from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select str_to_map('a:1,b:2,c:3',',',':'), str_to_map(cast('a:1,b:2,c:3' as varchar(20)),',',':') from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### {"a":"1","b":"2","c":"3"} {"a":"1","b":"2","c":"3"} PREHOOK: query: explain vectorization detail select substr(c2, 1, 3), substr(c4, 1, 3), substr(c2, 1, 3) = substr(c4, 1, 3) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select substr(c2, 1, 3), substr(c4, 1, 3), substr(c2, 1, 3) = substr(c4, 1, 3) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: substr(c2, 1, 3) (type: string), substr(c4, 1, 3) (type: string), (substr(c2, 1, 3) = substr(c4, 1, 3)) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: StringSubstrColStartLen(col 1:string, start 0, length 3) -> 9:string, StringSubstrColStartLen(col 3:varchar(20), start 0, length 3) -> 10:string, StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: StringSubstrColStartLen(col 1:string, start 0, length 3) -> 11:string, StringSubstrColStartLen(col 3:varchar(20), start 0, length 3) -> 12:string) -> 13:boolean Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 178 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [string, string, string, string, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select substr(c2, 1, 3), substr(c4, 1, 3), substr(c2, 1, 3) = substr(c4, 1, 3) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select substr(c2, 1, 3), substr(c4, 1, 3), substr(c2, 1, 3) = substr(c4, 1, 3) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### val val true PREHOOK: query: explain vectorization detail select trim(c2), trim(c4), trim(c2) = trim(c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select trim(c2), trim(c4), trim(c2) = trim(c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: trim(c2) (type: string), trim(c4) (type: string), (trim(c2) = trim(c4)) (type: boolean) outputColumnNames: _col0, _col1, _col2 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [9, 10, 13] selectExpressions: StringTrim(col 1:string) -> 9:string, StringTrim(col 3:varchar(20)) -> 10:string, StringGroupColEqualStringGroupColumn(col 11:string, col 12:string)(children: StringTrim(col 1:string) -> 11:string, StringTrim(col 3:varchar(20)) -> 12:string) -> 13:boolean Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Limit Vectorization: className: VectorLimitOperator native: true Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 372 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [string, string, string, string, bigint] Stage: Stage-0 Fetch Operator limit: 1 Processor Tree: ListSink PREHOOK: query: select trim(c2), trim(c4), trim(c2) = trim(c4) from varchar_udf_1_n2 limit 1 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select trim(c2), trim(c4), trim(c2) = trim(c4) from varchar_udf_1_n2 limit 1 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### val_238 val_238 true PREHOOK: query: explain vectorization detail select compute_stats(c2, 16), compute_stats(c4, 16) from varchar_udf_1_n2 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select compute_stats(c2, 16), compute_stats(c4, 16) from varchar_udf_1_n2 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: c2 (type: string), c4 (type: varchar(20)) outputColumnNames: _col0, _col2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(_col0, 16), compute_stats(_col2, 16) minReductionHashAggr: 0.0 mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: struct), _col1 (type: struct) Execution mode: llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported vectorized: false Reducer 2 Execution mode: llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true notVectorizedReason: Aggregation Function expression for GROUPBY operator: UDF compute_stats not supported vectorized: false Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Statistics: Num rows: 1 Data size: 880 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink PREHOOK: query: select compute_stats(c2, 'fm', 16), compute_stats(c4, 'fm', 16) from varchar_udf_1_n2 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select compute_stats(c2, 'fm', 16), compute_stats(c4, 'fm', 16) from varchar_udf_1_n2 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":FM} {"columntype":"String","maxlength":7,"avglength":7.0,"countnulls":0,"numdistinctvalues":1,"ndvbitvector":FM} PREHOOK: query: explain vectorization detail select min(c2), min(c4) from varchar_udf_1_n2 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select min(c2), min(c4) from varchar_udf_1_n2 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: c2 (type: string), c4 (type: varchar(20)) outputColumnNames: c2, c4 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 3] Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(c2), min(c4) Group By Vectorization: aggregators: VectorUDAFMinString(col 1:string) -> string, VectorUDAFMinString(col 3:varchar(20)) -> varchar(20) className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] minReductionHashAggr: 0.0 mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 0:string, 1:varchar(20) Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: varchar(20)) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true reduceColumnNullOrder: reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 2 dataColumns: VALUE._col0:string, VALUE._col1:varchar(20) partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: min(VALUE._col0), min(VALUE._col1) Group By Vectorization: aggregators: VectorUDAFMinString(col 0:string) -> string, VectorUDAFMinString(col 1:varchar(20)) -> varchar(20) className: VectorGroupByOperator groupByMode: MERGEPARTIAL native: false vectorProcessingMode: GLOBAL projectedOutputColumnNums: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink PREHOOK: query: select min(c2), min(c4) from varchar_udf_1_n2 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select min(c2), min(c4) from varchar_udf_1_n2 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### val_238 val_238 PREHOOK: query: explain vectorization detail select max(c2), max(c4) from varchar_udf_1_n2 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: explain vectorization detail select max(c2), max(c4) from varchar_udf_1_n2 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### PLAN VECTORIZATION: enabled: true enabledConditionsMet: [hive.vectorized.execution.enabled IS true] STAGE DEPENDENCIES: Stage-1 is a root stage Stage-0 depends on stages: Stage-1 STAGE PLANS: Stage: Stage-1 Tez #### A masked pattern was here #### Edges: Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) #### A masked pattern was here #### Vertices: Map 1 Map Operator Tree: TableScan alias: varchar_udf_1_n2 Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:c1:string, 1:c2:string, 2:c3:varchar(10), 3:c4:varchar(20), 4:d1:string, 5:d2:string, 6:d3:varchar(10), 7:d4:varchar(10), 8:ROW__ID:struct] Select Operator expressions: c2 (type: string), c4 (type: varchar(20)) outputColumnNames: c2, c4 Select Vectorization: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 3] Statistics: Num rows: 1 Data size: 182 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(c2), max(c4) Group By Vectorization: aggregators: VectorUDAFMaxString(col 1:string) -> string, VectorUDAFMaxString(col 3:varchar(20)) -> varchar(20) className: VectorGroupByOperator groupByMode: HASH native: false vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1] minReductionHashAggr: 0.0 mode: hash outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator null sort order: sort order: Reduce Sink Vectorization: className: VectorReduceSinkEmptyKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 0:string, 1:varchar(20) Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: string), _col1 (type: varchar(20)) Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: enabled: true enabledConditionsMet: hive.vectorized.use.vectorized.input.format IS true inputFormatFeatureSupport: [DECIMAL_64] featureSupportInUse: [DECIMAL_64] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 8 includeColumns: [1, 3] dataColumns: c1:string, c2:string, c3:varchar(10), c4:varchar(20), d1:string, d2:string, d3:varchar(10), d4:varchar(10) partitionColumnCount: 0 scratchColumnTypeNames: [] Reducer 2 Execution mode: vectorized, llap Reduce Vectorization: enabled: true enableConditionsMet: hive.vectorized.execution.reduce.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true reduceColumnNullOrder: reduceColumnSortOrder: allNative: false usesVectorUDFAdaptor: false vectorized: true rowBatchContext: dataColumnCount: 2 dataColumns: VALUE._col0:string, VALUE._col1:varchar(20) partitionColumnCount: 0 scratchColumnTypeNames: [] Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), max(VALUE._col1) Group By Vectorization: aggregators: VectorUDAFMaxString(col 0:string) -> string, VectorUDAFMaxString(col 1:varchar(20)) -> varchar(20) className: VectorGroupByOperator groupByMode: MERGEPARTIAL native: false vectorProcessingMode: GLOBAL projectedOutputColumnNums: [0, 1] mode: mergepartial outputColumnNames: _col0, _col1 Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false Statistics: Num rows: 1 Data size: 288 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe Stage: Stage-0 Fetch Operator limit: -1 Processor Tree: ListSink PREHOOK: query: select max(c2), max(c4) from varchar_udf_1_n2 PREHOOK: type: QUERY PREHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### POSTHOOK: query: select max(c2), max(c4) from varchar_udf_1_n2 POSTHOOK: type: QUERY POSTHOOK: Input: default@varchar_udf_1_n2 #### A masked pattern was here #### val_238 val_238 PREHOOK: query: drop table varchar_udf_1_n2 PREHOOK: type: DROPTABLE PREHOOK: Input: default@varchar_udf_1_n2 PREHOOK: Output: default@varchar_udf_1_n2 POSTHOOK: query: drop table varchar_udf_1_n2 POSTHOOK: type: DROPTABLE POSTHOOK: Input: default@varchar_udf_1_n2 POSTHOOK: Output: default@varchar_udf_1_n2 diff --git ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out index 0077f08..0e0058b 100644 --- ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out @@ -274,7 +274,7 @@ native: true projectedOutputColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 0, 1, 3, 13, 14, 15, 16, 17] selectExpressions: VectorUDFUnixTimeStampTimestamp(col 1:timestamp) -> 5:bigint, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 6:int, VectorUDFMonthTimestamp(col 1:timestamp, field MONTH) -> 7:int, VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 8:int, VectorUDFWeekOfYearTimestamp(col 1:timestamp, field WEEK_OF_YEAR) -> 9:int, VectorUDFHourTimestamp(col 1:timestamp, field HOUR_OF_DAY) -> 10:int, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 11:int, VectorUDFSecondTimestamp(col 1:timestamp, field SECOND) -> 12:int, IfExprTimestampColumnScalar(col 0:boolean, col 1:timestamp, val 1319-01-25 08:31:57.778) -> 13:timestamp, IfExprTimestampScalarColumn(col 0:boolean, val 2000-12-18 00:42:30.0005, col 1:timestamp) -> 14:timestamp, IfExprTimestampColumnColumn(col 0:boolean, col 1:timestampcol 3:timestamp) -> 15:timestamp, IfExprColumnNull(col 0:boolean, col 1:timestamp, null)(children: col 0:boolean, col 1:timestamp) -> 16:timestamp, IfExprNullColumn(col 0:boolean, null, col 3)(children: col 0:boolean, col 3:timestamp) -> 17:timestamp - Statistics: Num rows: 52 Data size: 16436 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 11916 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z @@ -283,7 +283,7 @@ className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 52 Data size: 16436 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 11916 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: boolean), _col10 (type: timestamp), _col11 (type: timestamp), _col12 (type: timestamp), _col13 (type: timestamp), _col14 (type: timestamp), _col15 (type: timestamp), _col16 (type: timestamp) Execution mode: vectorized, llap LLAP IO: all inputs @@ -312,13 +312,13 @@ className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] - Statistics: Num rows: 52 Data size: 16436 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 11916 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 52 Data size: 16436 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 11916 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/llap/w1.q.out ql/src/test/results/clientpositive/llap/w1.q.out new file mode 100644 index 0000000..711d42f --- /dev/null +++ ql/src/test/results/clientpositive/llap/w1.q.out @@ -0,0 +1,127 @@ +PREHOOK: query: create table t (a string) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@t +POSTHOOK: query: create table t (a string) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@t +PREHOOK: query: insert into t values +('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'), +('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'), +('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@t +POSTHOOK: query: insert into t values +('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'), +('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa'), +('aaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaaa') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@t +POSTHOOK: Lineage: t.a SCRIPT [] +PREHOOK: query: select a from t +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: select a from t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +PREHOOK: query: explain analyze +select a from t +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: explain analyze +select a from t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 3/3 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: a (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 3/3 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3/3 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: no inputs + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: select substr(a,1,4) from t +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: select substr(a,1,4) from t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +PREHOOK: query: explain analyze +select substr(a,1,4) from t +PREHOOK: type: QUERY +PREHOOK: Input: default@t +#### A masked pattern was here #### +POSTHOOK: query: explain analyze +select substr(a,1,4) from t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@t +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Tez +#### A masked pattern was here #### + Vertices: + Map 1 + Map Operator Tree: + TableScan + alias: t + Statistics: Num rows: 3/3 Data size: 360 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: substr(a, 1, 4) (type: string) + outputColumnNames: _col0 + Statistics: Num rows: 3/3 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3/3 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized, llap + LLAP IO: no inputs + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + diff --git ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out index c55ef0f..8387880 100644 --- ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out +++ ql/src/test/results/clientpositive/merge_dynamic_partition4.q.out @@ -140,14 +140,13 @@ POSTHOOK: Input: default@srcpart_merge_dp_rc_n1@ds=2008-04-08/hr=12 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 @@ -160,79 +159,56 @@ Select Operator expressions: key (type: string), value (type: string), if(((UDFToDouble(key) % 2.0D) = 0.0D), 'a1', 'b1') (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 264000 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 1000 Data size: 264000 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.merge_dynamic_part_n3 Select Operator expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) outputColumnNames: key, value, ds, hr - Statistics: Num rows: 1000 Data size: 456000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 358000 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: struct), _col3 (type: struct) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 316 Data size: 365928 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col2 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string) - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 1000 Data size: 362000 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.merge_dynamic_part_n3 - - Stage: Stage-8 + Stage: Stage-7 Conditional Operator - Stage: Stage-5 + Stage: Stage-4 Move Operator files: hdfs directory: true @@ -251,7 +227,7 @@ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.merge_dynamic_part_n3 - Stage: Stage-3 + Stage: Stage-2 Stats Work Basic Stats Work: Column Stats Desc: @@ -259,7 +235,14 @@ Column Types: string, string Table: default.merge_dynamic_part_n3 - Stage: Stage-4 + Stage: Stage-3 + Merge File Operator + Map Operator Tree: + RCFile Merge Operator + merge level: block + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + + Stage: Stage-5 Merge File Operator Map Operator Tree: RCFile Merge Operator @@ -267,13 +250,6 @@ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat Stage: Stage-6 - Merge File Operator - Map Operator Tree: - RCFile Merge Operator - merge level: block - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - - Stage: Stage-7 Move Operator files: hdfs directory: true diff --git ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out index 251c3f8..518f400 100644 --- ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out +++ ql/src/test/results/clientpositive/merge_dynamic_partition5.q.out @@ -116,14 +116,13 @@ POSTHOOK: Input: default@srcpart_merge_dp_rc@ds=2008-04-08/hr=12 STAGE DEPENDENCIES: Stage-1 is a root stage - Stage-2 depends on stages: Stage-1 - Stage-8 depends on stages: Stage-2 , consists of Stage-5, Stage-4, Stage-6 - Stage-5 - Stage-0 depends on stages: Stage-5, Stage-4, Stage-7 - Stage-3 depends on stages: Stage-0 + Stage-7 depends on stages: Stage-1 , consists of Stage-4, Stage-3, Stage-5 Stage-4 - Stage-6 - Stage-7 depends on stages: Stage-6 + Stage-0 depends on stages: Stage-4, Stage-3, Stage-6 + Stage-2 depends on stages: Stage-0 + Stage-3 + Stage-5 + Stage-6 depends on stages: Stage-5 STAGE PLANS: Stage: Stage-1 @@ -136,79 +135,56 @@ Select Operator expressions: key (type: string), value (type: string), if(((UDFToDouble(key) % 100.0D) = 0.0D), 'a1', 'b1') (type: string) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 618 Data size: 223716 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 618 Data size: 163152 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 618 Data size: 163152 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat + serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe + name: default.merge_dynamic_part Select Operator expressions: _col0 (type: string), _col1 (type: string), '2008-04-08' (type: string), _col2 (type: string) outputColumnNames: key, value, ds, hr - Statistics: Num rows: 618 Data size: 281808 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 618 Data size: 221244 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: compute_stats(key, 'hll'), compute_stats(value, 'hll') keys: ds (type: string), hr (type: string) minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 309 Data size: 357822 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: string) null sort order: zz sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: string) - Statistics: Num rows: 309 Data size: 357822 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: struct), _col3 (type: struct) - File Output Operator - compressed: false - table: - input format: org.apache.hadoop.mapred.SequenceFileInputFormat - output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat - serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) keys: KEY._col0 (type: string), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 309 Data size: 357822 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col2 (type: struct), _col3 (type: struct), _col0 (type: string), _col1 (type: string) outputColumnNames: _col0, _col1, _col2, _col3 - Statistics: Num rows: 309 Data size: 357822 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 309 Data size: 357822 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1060 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe - Stage: Stage-2 - Map Reduce - Map Operator Tree: - TableScan - Reduce Output Operator - key expressions: _col2 (type: string) - null sort order: a - sort order: + - Map-reduce partition columns: _col2 (type: string) - Statistics: Num rows: 618 Data size: 223716 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: string), _col1 (type: string) - Reduce Operator Tree: - Select Operator - expressions: VALUE._col0 (type: string), VALUE._col1 (type: string), KEY._col2 (type: string) - outputColumnNames: _col0, _col1, _col2 - File Output Operator - compressed: false - Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 618 Data size: 223716 Basic stats: COMPLETE Column stats: COMPLETE - table: - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - output format: org.apache.hadoop.hive.ql.io.RCFileOutputFormat - serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe - name: default.merge_dynamic_part - - Stage: Stage-8 + Stage: Stage-7 Conditional Operator - Stage: Stage-5 + Stage: Stage-4 Move Operator files: hdfs directory: true @@ -227,7 +203,7 @@ serde: org.apache.hadoop.hive.serde2.columnar.ColumnarSerDe name: default.merge_dynamic_part - Stage: Stage-3 + Stage: Stage-2 Stats Work Basic Stats Work: Column Stats Desc: @@ -235,7 +211,14 @@ Column Types: string, string Table: default.merge_dynamic_part - Stage: Stage-4 + Stage: Stage-3 + Merge File Operator + Map Operator Tree: + RCFile Merge Operator + merge level: block + input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat + + Stage: Stage-5 Merge File Operator Map Operator Tree: RCFile Merge Operator @@ -243,13 +226,6 @@ input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat Stage: Stage-6 - Merge File Operator - Map Operator Tree: - RCFile Merge Operator - merge level: block - input format: org.apache.hadoop.hive.ql.io.RCFileInputFormat - - Stage: Stage-7 Move Operator files: hdfs directory: true diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query19.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query19.q.out index 94bf30c..4000b81 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query19.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query19.q.out @@ -96,23 +96,23 @@ PartitionCols:_col0, _col1, _col2, _col3 Group By Operator [GBY_34] (rows=76645658 width=314) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col8)"],keys:_col12, _col11, _col13, _col14 - Select Operator [SEL_33] (rows=76645658 width=650) + Select Operator [SEL_33] (rows=76645658 width=458) Output:["_col8","_col11","_col12","_col13","_col14"] - Filter Operator [FIL_32] (rows=76645658 width=650) + Filter Operator [FIL_32] (rows=76645658 width=458) predicate:(_col3 <> _col16) - Merge Join Operator [MERGEJOIN_122] (rows=76645658 width=650) + Merge Join Operator [MERGEJOIN_122] (rows=76645658 width=458) Conds:RS_29._col7=RS_143._col0(Inner),Output:["_col3","_col8","_col11","_col12","_col13","_col14","_col16"] <-Map 14 [SIMPLE_EDGE] vectorized SHUFFLE [RS_143] PartitionCols:_col0 - Select Operator [SEL_142] (rows=1704 width=188) + Select Operator [SEL_142] (rows=1704 width=92) Output:["_col0","_col1"] TableScan [TS_21] (rows=1704 width=93) default@store,store,Tbl:COMPLETE,Col:COMPLETE,Output:["s_store_sk","s_zip"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_29] PartitionCols:_col7 - Merge Join Operator [MERGEJOIN_121] (rows=76645658 width=468) + Merge Join Operator [MERGEJOIN_121] (rows=76645658 width=372) Conds:RS_26._col0=RS_27._col2(Inner),Output:["_col3","_col7","_col8","_col11","_col12","_col13","_col14"] <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_27] @@ -165,7 +165,7 @@ <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_26] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_118] (rows=80000000 width=188) + Merge Join Operator [MERGEJOIN_118] (rows=80000000 width=92) Conds:RS_125._col1=RS_127._col0(Inner),Output:["_col0","_col3"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_125] @@ -179,7 +179,7 @@ <-Map 7 [SIMPLE_EDGE] vectorized SHUFFLE [RS_127] PartitionCols:_col0 - Select Operator [SEL_126] (rows=40000000 width=188) + Select Operator [SEL_126] (rows=40000000 width=92) Output:["_col0","_col1"] TableScan [TS_3] (rows=40000000 width=93) default@customer_address,customer_address,Tbl:COMPLETE,Col:COMPLETE,Output:["ca_address_sk","ca_zip"] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query79.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query79.q.out index 2828fb0..2b94615 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query79.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query79.q.out @@ -72,17 +72,17 @@ Stage-1 Reducer 3 vectorized File Output Operator [FS_125] - Limit [LIM_124] (rows=100 width=776) + Limit [LIM_124] (rows=100 width=592) Number of rows:100 - Select Operator [SEL_123] (rows=479121995 width=776) + Select Operator [SEL_123] (rows=479121995 width=592) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_32] - Select Operator [SEL_31] (rows=479121995 width=776) + Select Operator [SEL_31] (rows=479121995 width=592) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Top N Key Operator [TNK_56] (rows=479121995 width=685) + Top N Key Operator [TNK_56] (rows=479121995 width=593) keys:_col2, _col1, substr(_col5, 1, 30), _col7,top n:100 - Merge Join Operator [MERGEJOIN_100] (rows=479121995 width=685) + Merge Join Operator [MERGEJOIN_100] (rows=479121995 width=593) Conds:RS_102._col0=RS_122._col1(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7","_col8"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_102] @@ -94,7 +94,7 @@ <-Reducer 8 [SIMPLE_EDGE] vectorized SHUFFLE [RS_122] PartitionCols:_col1 - Select Operator [SEL_121] (rows=479121995 width=508) + Select Operator [SEL_121] (rows=479121995 width=416) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] Group By Operator [GBY_120] (rows=479121995 width=328) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out index ed1501b..06b9f6f 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query8.q.out @@ -271,7 +271,7 @@ <-Map 18 [SIMPLE_EDGE] vectorized SHUFFLE [RS_149] PartitionCols:_col2 - Select Operator [SEL_148] (rows=1704 width=276) + Select Operator [SEL_148] (rows=1704 width=178) Output:["_col0","_col1","_col2"] Filter Operator [FIL_147] (rows=1704 width=181) predicate:substr(s_zip, 1, 2) is not null @@ -280,24 +280,24 @@ <-Reducer 11 [SIMPLE_EDGE] vectorized SHUFFLE [RS_146] PartitionCols:_col0 - Select Operator [SEL_145] (rows=1 width=184) + Select Operator [SEL_145] (rows=1 width=86) Output:["_col0"] - Filter Operator [FIL_144] (rows=1 width=192) + Filter Operator [FIL_144] (rows=1 width=96) predicate:(_col1 = 2L) - Group By Operator [GBY_143] (rows=5633 width=192) + Group By Operator [GBY_143] (rows=5633 width=96) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Union 10 [SIMPLE_EDGE] <-Reducer 16 [CONTAINS] vectorized Reduce Output Operator [RS_175] PartitionCols:_col0 - Group By Operator [GBY_174] (rows=5633 width=192) + Group By Operator [GBY_174] (rows=5633 width=96) Output:["_col0","_col1"],aggregations:["count(_col1)"],keys:_col0 - Group By Operator [GBY_173] (rows=1126 width=192) + Group By Operator [GBY_173] (rows=1126 width=96) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 15 [SIMPLE_EDGE] vectorized SHUFFLE [RS_172] PartitionCols:_col0 - Group By Operator [GBY_171] (rows=1126 width=192) + Group By Operator [GBY_171] (rows=1126 width=96) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 Select Operator [SEL_170] (rows=2253 width=97) Output:["_col0"] @@ -333,14 +333,14 @@ <-Reducer 9 [CONTAINS] vectorized Reduce Output Operator [RS_161] PartitionCols:_col0 - Group By Operator [GBY_160] (rows=5633 width=192) + Group By Operator [GBY_160] (rows=5633 width=96) Output:["_col0","_col1"],aggregations:["count(_col1)"],keys:_col0 - Group By Operator [GBY_159] (rows=10141 width=192) + Group By Operator [GBY_159] (rows=10141 width=96) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Map 8 [SIMPLE_EDGE] vectorized SHUFFLE [RS_158] PartitionCols:_col0 - Group By Operator [GBY_157] (rows=141974 width=192) + Group By Operator [GBY_157] (rows=141974 width=96) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 Select Operator [SEL_156] (rows=20000000 width=89) Output:["_col0"] diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query85.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query85.q.out index 706bbd7..9a550b7 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query85.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query85.q.out @@ -200,13 +200,13 @@ Stage-1 Reducer 6 vectorized File Output Operator [FS_209] - Limit [LIM_208] (rows=72 width=832) + Limit [LIM_208] (rows=72 width=656) Number of rows:100 - Select Operator [SEL_207] (rows=72 width=832) + Select Operator [SEL_207] (rows=72 width=656) Output:["_col0","_col1","_col2","_col3"] <-Reducer 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_206] - Select Operator [SEL_205] (rows=72 width=832) + Select Operator [SEL_205] (rows=72 width=656) Output:["_col4","_col5","_col6","_col7"] Top N Key Operator [TNK_204] (rows=72 width=353) keys:substr(_col0, 1, 20), (UDFToDouble(_col1) / _col2), (_col3 / _col4), (_col5 / _col6),top n:100 diff --git ql/src/test/results/clientpositive/perf/tez/constraints/query99.q.out ql/src/test/results/clientpositive/perf/tez/constraints/query99.q.out index 5310297..98249ac 100644 --- ql/src/test/results/clientpositive/perf/tez/constraints/query99.q.out +++ ql/src/test/results/clientpositive/perf/tez/constraints/query99.q.out @@ -96,24 +96,24 @@ Stage-1 Reducer 7 vectorized File Output Operator [FS_125] - Limit [LIM_124] (rows=100 width=590) + Limit [LIM_124] (rows=100 width=420) Number of rows:100 - Select Operator [SEL_123] (rows=3920468 width=590) + Select Operator [SEL_123] (rows=3920468 width=420) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 6 [SIMPLE_EDGE] vectorized SHUFFLE [RS_122] - Select Operator [SEL_121] (rows=3920468 width=590) + Select Operator [SEL_121] (rows=3920468 width=420) Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Group By Operator [GBY_120] (rows=3920468 width=406) + Group By Operator [GBY_120] (rows=3920468 width=321) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_26] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_25] (rows=7840936 width=406) + Group By Operator [GBY_25] (rows=7840936 width=321) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col13, _col11, _col15 - Top N Key Operator [TNK_56] (rows=15681873 width=386) + Top N Key Operator [TNK_56] (rows=15681873 width=301) keys:_col13, _col11, _col15,top n:100 - Merge Join Operator [MERGEJOIN_102] (rows=15681873 width=386) + Merge Join Operator [MERGEJOIN_102] (rows=15681873 width=301) Conds:RS_21._col1=RS_119._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col8","_col11","_col13","_col15"] <-Map 12 [SIMPLE_EDGE] vectorized SHUFFLE [RS_119] @@ -125,12 +125,12 @@ <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_21] PartitionCols:_col1 - Merge Join Operator [MERGEJOIN_101] (rows=15681873 width=291) + Merge Join Operator [MERGEJOIN_101] (rows=15681873 width=206) Conds:RS_18._col3=RS_117._col0(Inner),Output:["_col1","_col4","_col5","_col6","_col7","_col8","_col11","_col13"] <-Map 11 [SIMPLE_EDGE] vectorized SHUFFLE [RS_117] PartitionCols:_col0 - Select Operator [SEL_116] (rows=27 width=188) + Select Operator [SEL_116] (rows=27 width=103) Output:["_col0","_col1"] TableScan [TS_8] (rows=27 width=104) default@warehouse,warehouse,Tbl:COMPLETE,Col:COMPLETE,Output:["w_warehouse_sk","w_warehouse_name"] diff --git ql/src/test/results/clientpositive/perf/tez/query19.q.out ql/src/test/results/clientpositive/perf/tez/query19.q.out index 55ce944..d94c899 100644 --- ql/src/test/results/clientpositive/perf/tez/query19.q.out +++ ql/src/test/results/clientpositive/perf/tez/query19.q.out @@ -96,16 +96,16 @@ PartitionCols:_col0, _col1, _col2, _col3 Group By Operator [GBY_36] (rows=76645658 width=314) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["sum(_col8)"],keys:_col12, _col11, _col13, _col14 - Select Operator [SEL_35] (rows=76645658 width=650) + Select Operator [SEL_35] (rows=76645658 width=458) Output:["_col8","_col11","_col12","_col13","_col14"] - Filter Operator [FIL_34] (rows=76645658 width=650) + Filter Operator [FIL_34] (rows=76645658 width=458) predicate:(_col3 <> _col16) - Merge Join Operator [MERGEJOIN_124] (rows=76645658 width=650) + Merge Join Operator [MERGEJOIN_124] (rows=76645658 width=458) Conds:RS_31._col7=RS_147._col0(Inner),Output:["_col3","_col8","_col11","_col12","_col13","_col14","_col16"] <-Map 14 [SIMPLE_EDGE] vectorized SHUFFLE [RS_147] PartitionCols:_col0 - Select Operator [SEL_146] (rows=1704 width=188) + Select Operator [SEL_146] (rows=1704 width=92) Output:["_col0","_col1"] Filter Operator [FIL_145] (rows=1704 width=93) predicate:s_store_sk is not null @@ -114,7 +114,7 @@ <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_31] PartitionCols:_col7 - Merge Join Operator [MERGEJOIN_123] (rows=76645658 width=468) + Merge Join Operator [MERGEJOIN_123] (rows=76645658 width=372) Conds:RS_28._col0=RS_29._col2(Inner),Output:["_col3","_col7","_col8","_col11","_col12","_col13","_col14"] <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_29] @@ -167,7 +167,7 @@ <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_28] PartitionCols:_col0 - Merge Join Operator [MERGEJOIN_120] (rows=80000000 width=188) + Merge Join Operator [MERGEJOIN_120] (rows=80000000 width=92) Conds:RS_127._col1=RS_130._col0(Inner),Output:["_col0","_col3"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_127] @@ -181,7 +181,7 @@ <-Map 7 [SIMPLE_EDGE] vectorized SHUFFLE [RS_130] PartitionCols:_col0 - Select Operator [SEL_129] (rows=40000000 width=188) + Select Operator [SEL_129] (rows=40000000 width=92) Output:["_col0","_col1"] Filter Operator [FIL_128] (rows=40000000 width=93) predicate:ca_address_sk is not null diff --git ql/src/test/results/clientpositive/perf/tez/query23.q.out ql/src/test/results/clientpositive/perf/tez/query23.q.out index e60051b..edde953 100644 --- ql/src/test/results/clientpositive/perf/tez/query23.q.out +++ ql/src/test/results/clientpositive/perf/tez/query23.q.out @@ -173,25 +173,25 @@ PartitionCols:_col0 Group By Operator [GBY_501] (rows=62562 width=4) Output:["_col0"],keys:_col1 - Select Operator [SEL_500] (rows=183358851 width=290) + Select Operator [SEL_500] (rows=183358851 width=220) Output:["_col1"] - Filter Operator [FIL_499] (rows=183358851 width=290) + Filter Operator [FIL_499] (rows=183358851 width=220) predicate:(_col3 > 4L) - Select Operator [SEL_498] (rows=550076554 width=290) + Select Operator [SEL_498] (rows=550076554 width=220) Output:["_col1","_col3"] - Group By Operator [GBY_497] (rows=550076554 width=290) + Group By Operator [GBY_497] (rows=550076554 width=220) Output:["_col0","_col1","_col2","_col3"],aggregations:["count(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 15 [SIMPLE_EDGE] SHUFFLE [RS_23] PartitionCols:_col0 - Group By Operator [GBY_22] (rows=550076554 width=290) + Group By Operator [GBY_22] (rows=550076554 width=220) Output:["_col0","_col1","_col2","_col3"],aggregations:["count()"],keys:_col4, _col3, _col5 - Merge Join Operator [MERGEJOIN_442] (rows=550076554 width=282) + Merge Join Operator [MERGEJOIN_442] (rows=550076554 width=212) Conds:RS_18._col1=RS_496._col0(Inner),Output:["_col3","_col4","_col5"] <-Map 19 [SIMPLE_EDGE] vectorized SHUFFLE [RS_496] PartitionCols:_col0 - Select Operator [SEL_495] (rows=462000 width=188) + Select Operator [SEL_495] (rows=462000 width=118) Output:["_col0","_col1"] Filter Operator [FIL_494] (rows=462000 width=188) predicate:i_item_sk is not null diff --git ql/src/test/results/clientpositive/perf/tez/query79.q.out ql/src/test/results/clientpositive/perf/tez/query79.q.out index f7c8e2f..83b29ee 100644 --- ql/src/test/results/clientpositive/perf/tez/query79.q.out +++ ql/src/test/results/clientpositive/perf/tez/query79.q.out @@ -72,17 +72,17 @@ Stage-1 Reducer 3 vectorized File Output Operator [FS_127] - Limit [LIM_126] (rows=100 width=776) + Limit [LIM_126] (rows=100 width=592) Number of rows:100 - Select Operator [SEL_125] (rows=479121995 width=776) + Select Operator [SEL_125] (rows=479121995 width=592) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_33] - Select Operator [SEL_32] (rows=479121995 width=776) + Select Operator [SEL_32] (rows=479121995 width=592) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] - Top N Key Operator [TNK_57] (rows=479121995 width=685) + Top N Key Operator [TNK_57] (rows=479121995 width=593) keys:_col2, _col1, substr(_col5, 1, 30), _col7,top n:100 - Merge Join Operator [MERGEJOIN_101] (rows=479121995 width=685) + Merge Join Operator [MERGEJOIN_101] (rows=479121995 width=593) Conds:RS_104._col0=RS_124._col1(Inner),Output:["_col1","_col2","_col3","_col5","_col6","_col7","_col8"] <-Map 1 [SIMPLE_EDGE] vectorized SHUFFLE [RS_104] @@ -96,7 +96,7 @@ <-Reducer 8 [SIMPLE_EDGE] vectorized SHUFFLE [RS_124] PartitionCols:_col1 - Select Operator [SEL_123] (rows=479121995 width=508) + Select Operator [SEL_123] (rows=479121995 width=416) Output:["_col0","_col1","_col2","_col3","_col4","_col5"] Group By Operator [GBY_122] (rows=479121995 width=328) Output:["_col0","_col1","_col2","_col3","_col4","_col5"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)"],keys:KEY._col0, KEY._col1, KEY._col2, KEY._col3 diff --git ql/src/test/results/clientpositive/perf/tez/query8.q.out ql/src/test/results/clientpositive/perf/tez/query8.q.out index c72498b..27420bd 100644 --- ql/src/test/results/clientpositive/perf/tez/query8.q.out +++ ql/src/test/results/clientpositive/perf/tez/query8.q.out @@ -271,7 +271,7 @@ <-Map 18 [SIMPLE_EDGE] vectorized SHUFFLE [RS_149] PartitionCols:_col2 - Select Operator [SEL_148] (rows=1704 width=276) + Select Operator [SEL_148] (rows=1704 width=178) Output:["_col0","_col1","_col2"] Filter Operator [FIL_147] (rows=1704 width=181) predicate:(s_store_sk is not null and substr(s_zip, 1, 2) is not null) @@ -280,24 +280,24 @@ <-Reducer 11 [SIMPLE_EDGE] vectorized SHUFFLE [RS_146] PartitionCols:_col0 - Select Operator [SEL_145] (rows=1 width=184) + Select Operator [SEL_145] (rows=1 width=86) Output:["_col0"] - Filter Operator [FIL_144] (rows=1 width=192) + Filter Operator [FIL_144] (rows=1 width=96) predicate:(_col1 = 2L) - Group By Operator [GBY_143] (rows=5633 width=192) + Group By Operator [GBY_143] (rows=5633 width=96) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Union 10 [SIMPLE_EDGE] <-Reducer 16 [CONTAINS] vectorized Reduce Output Operator [RS_175] PartitionCols:_col0 - Group By Operator [GBY_174] (rows=5633 width=192) + Group By Operator [GBY_174] (rows=5633 width=96) Output:["_col0","_col1"],aggregations:["count(_col1)"],keys:_col0 - Group By Operator [GBY_173] (rows=1126 width=192) + Group By Operator [GBY_173] (rows=1126 width=96) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Reducer 15 [SIMPLE_EDGE] vectorized SHUFFLE [RS_172] PartitionCols:_col0 - Group By Operator [GBY_171] (rows=1126 width=192) + Group By Operator [GBY_171] (rows=1126 width=96) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 Select Operator [SEL_170] (rows=2253 width=97) Output:["_col0"] @@ -333,14 +333,14 @@ <-Reducer 9 [CONTAINS] vectorized Reduce Output Operator [RS_161] PartitionCols:_col0 - Group By Operator [GBY_160] (rows=5633 width=192) + Group By Operator [GBY_160] (rows=5633 width=96) Output:["_col0","_col1"],aggregations:["count(_col1)"],keys:_col0 - Group By Operator [GBY_159] (rows=10141 width=192) + Group By Operator [GBY_159] (rows=10141 width=96) Output:["_col0","_col1"],aggregations:["count(VALUE._col0)"],keys:KEY._col0 <-Map 8 [SIMPLE_EDGE] vectorized SHUFFLE [RS_158] PartitionCols:_col0 - Group By Operator [GBY_157] (rows=141974 width=192) + Group By Operator [GBY_157] (rows=141974 width=96) Output:["_col0","_col1"],aggregations:["count()"],keys:_col0 Select Operator [SEL_156] (rows=20000000 width=89) Output:["_col0"] diff --git ql/src/test/results/clientpositive/perf/tez/query85.q.out ql/src/test/results/clientpositive/perf/tez/query85.q.out index 6e1a562..94ec2f9 100644 --- ql/src/test/results/clientpositive/perf/tez/query85.q.out +++ ql/src/test/results/clientpositive/perf/tez/query85.q.out @@ -201,13 +201,13 @@ Stage-1 Reducer 6 vectorized File Output Operator [FS_239] - Limit [LIM_238] (rows=72 width=832) + Limit [LIM_238] (rows=72 width=656) Number of rows:100 - Select Operator [SEL_237] (rows=72 width=832) + Select Operator [SEL_237] (rows=72 width=656) Output:["_col0","_col1","_col2","_col3"] <-Reducer 5 [SIMPLE_EDGE] vectorized SHUFFLE [RS_236] - Select Operator [SEL_235] (rows=72 width=832) + Select Operator [SEL_235] (rows=72 width=656) Output:["_col4","_col5","_col6","_col7"] Top N Key Operator [TNK_234] (rows=72 width=353) keys:substr(_col0, 1, 20), (UDFToDouble(_col1) / _col2), (_col3 / _col4), (_col5 / _col6),top n:100 diff --git ql/src/test/results/clientpositive/perf/tez/query99.q.out ql/src/test/results/clientpositive/perf/tez/query99.q.out index d24d5cc..87e2713 100644 --- ql/src/test/results/clientpositive/perf/tez/query99.q.out +++ ql/src/test/results/clientpositive/perf/tez/query99.q.out @@ -96,24 +96,24 @@ Stage-1 Reducer 7 vectorized File Output Operator [FS_131] - Limit [LIM_130] (rows=100 width=590) + Limit [LIM_130] (rows=100 width=420) Number of rows:100 - Select Operator [SEL_129] (rows=3920468 width=590) + Select Operator [SEL_129] (rows=3920468 width=420) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"] <-Reducer 6 [SIMPLE_EDGE] vectorized SHUFFLE [RS_128] - Select Operator [SEL_127] (rows=3920468 width=590) + Select Operator [SEL_127] (rows=3920468 width=420) Output:["_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8"] - Group By Operator [GBY_126] (rows=3920468 width=406) + Group By Operator [GBY_126] (rows=3920468 width=321) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(VALUE._col0)","sum(VALUE._col1)","sum(VALUE._col2)","sum(VALUE._col3)","sum(VALUE._col4)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Reducer 5 [SIMPLE_EDGE] SHUFFLE [RS_29] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_28] (rows=7840936 width=406) + Group By Operator [GBY_28] (rows=7840936 width=321) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7"],aggregations:["sum(_col4)","sum(_col5)","sum(_col6)","sum(_col7)","sum(_col8)"],keys:_col13, _col15, _col11 - Top N Key Operator [TNK_59] (rows=15681873 width=386) + Top N Key Operator [TNK_59] (rows=15681873 width=301) keys:_col13, _col15, _col11,top n:100 - Merge Join Operator [MERGEJOIN_105] (rows=15681873 width=386) + Merge Join Operator [MERGEJOIN_105] (rows=15681873 width=301) Conds:RS_24._col2=RS_108._col0(Inner),Output:["_col4","_col5","_col6","_col7","_col8","_col11","_col13","_col15"] <-Map 11 [SIMPLE_EDGE] vectorized SHUFFLE [RS_108] @@ -127,12 +127,12 @@ <-Reducer 4 [SIMPLE_EDGE] SHUFFLE [RS_24] PartitionCols:_col2 - Merge Join Operator [MERGEJOIN_104] (rows=282273729 width=305) + Merge Join Operator [MERGEJOIN_104] (rows=282273729 width=220) Conds:RS_21._col3=RS_125._col0(Inner),Output:["_col2","_col4","_col5","_col6","_col7","_col8","_col11","_col13"] <-Map 10 [SIMPLE_EDGE] vectorized SHUFFLE [RS_125] PartitionCols:_col0 - Select Operator [SEL_124] (rows=27 width=188) + Select Operator [SEL_124] (rows=27 width=103) Output:["_col0","_col1"] Filter Operator [FIL_123] (rows=27 width=104) predicate:w_warehouse_sk is not null diff --git ql/src/test/results/clientpositive/vector_case_when_1.q.out ql/src/test/results/clientpositive/vector_case_when_1.q.out index d6c8a3d..f3866d6 100644 --- ql/src/test/results/clientpositive/vector_case_when_1.q.out +++ ql/src/test/results/clientpositive/vector_case_when_1.q.out @@ -213,13 +213,13 @@ native: true projectedOutputColumnNums: [4, 21, 26, 30, 34, 38, 42, 44, 46, 48, 50, 52, 54, 58, 61, 64, 67] selectExpressions: VectorUDFAdaptor(CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE ('Huge number') END)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, LongColLessLongScalar(col 4:int, val 100) -> 20:boolean) -> 21:string, VectorUDFAdaptor(CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') WHEN ((l_quantity < 100)) THEN ('Many') ELSE (null) END)(children: LongColEqualLongScalar(col 4:int, val 1) -> 22:boolean, LongColEqualLongScalar(col 4:int, val 2) -> 23:boolean, LongColLessLongScalar(col 4:int, val 10) -> 24:boolean, LongColLessLongScalar(col 4:int, val 100) -> 25:boolean) -> 26:string, VectorUDFAdaptor(CASE WHEN ((l_quantity = 1)) THEN ('Single') WHEN ((l_quantity = 2)) THEN ('Two') WHEN ((l_quantity < 10)) THEN ('Some') ELSE (null) END)(children: LongColEqualLongScalar(col 4:int, val 1) -> 27:boolean, LongColEqualLongScalar(col 4:int, val 2) -> 28:boolean, LongColLessLongScalar(col 4:int, val 10) -> 29:boolean) -> 30:string, IfExprLongColumnLongColumn(col 31:boolean, col 32:date, col 33:date)(children: StringGroupColEqualCharScalar(col 14:char(10), val SHIP) -> 31:boolean, VectorUDFDateAddColScalar(col 10:date, val 10) -> 32:date, VectorUDFDateAddColScalar(col 10:date, val 5) -> 33:date) -> 34:date, VectorUDFAdaptor(CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0D - l_discount))) ELSE (0.0D) END)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 35:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 36:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 36:double) -> 37:double) -> 38:double, VectorUDFAdaptor(CASE WHEN ((l_returnflag = 'N')) THEN ((l_extendedprice * (1.0D - l_discount))) ELSE (0.0D) END)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 39:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 40:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 40:double) -> 41:double) -> 42:double, VectorUDFAdaptor(if((l_shipinstruct = 'DELIVER IN PERSON'), null, l_tax))(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 43:boolean) -> 44:decimal(10,2), VectorUDFAdaptor(if((l_shipinstruct = 'TAKE BACK RETURN'), l_tax, null))(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 45:boolean) -> 46:decimal(10,2), VectorUDFAdaptor(if((l_shipinstruct = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 47:boolean) -> 48:decimal(12,2), VectorUDFAdaptor(if((l_shipinstruct = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 49:boolean) -> 50:decimal(12,2), IfExprDecimal64ScalarDecimal64Column(col 51:boolean, decimal64Val 0, decimalVal 0, col 7:decimal(10,2)/DECIMAL_64)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 51:boolean) -> 52:decimal(10,2)/DECIMAL_64, IfExprDecimal64ColumnDecimal64Scalar(col 53:boolean, col 7:decimal(10,2)/DECIMAL_64, decimal64Val 0, decimalVal 0)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 53:boolean) -> 54:decimal(10,2)/DECIMAL_64, IfExprTimestampColumnColumn(col 55:boolean, col 56:timestampcol 57:timestamp)(children: LongColGreaterLongScalar(col 1:int, val 30) -> 55:boolean, CastDateToTimestamp(col 12:date) -> 56:timestamp, CastDateToTimestamp(col 11:date) -> 57:timestamp) -> 58:timestamp, VectorUDFAdaptor(if((l_suppkey > 10000), datediff(l_receiptdate, l_commitdate), null))(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 59:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 60:int) -> 61:int, VectorUDFAdaptor(if((l_suppkey > 10000), null, datediff(l_receiptdate, l_commitdate)))(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 62:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 63:int) -> 64:int, IfExprLongScalarLongScalar(col 66:boolean, val 14245, val 14609)(children: LongColGreaterLongScalar(col 65:int, val 100)(children: LongColModuloLongScalar(col 2:int, val 500) -> 65:int) -> 66:boolean) -> 67:date - Statistics: Num rows: 101 Data size: 141804 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 101 Data size: 118604 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 101 Data size: 141804 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 101 Data size: 118604 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -546,13 +546,13 @@ native: true projectedOutputColumnNums: [4, 24, 33, 40, 44, 48, 52, 54, 56, 58, 60, 62, 64, 68, 71, 74, 77] selectExpressions: IfExprStringScalarStringGroupColumn(col 17:boolean, val Singlecol 23:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, IfExprStringScalarStringGroupColumn(col 18:boolean, val Twocol 22:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 18:boolean, IfExprStringScalarStringGroupColumn(col 19:boolean, val Somecol 21:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 19:boolean, IfExprStringScalarStringScalar(col 20:boolean, val Many, val Huge number)(children: LongColLessLongScalar(col 4:int, val 100) -> 20:boolean) -> 21:string) -> 22:string) -> 23:string) -> 24:string, IfExprStringScalarStringGroupColumn(col 25:boolean, val Singlecol 32:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 25:boolean, IfExprStringScalarStringGroupColumn(col 26:boolean, val Twocol 31:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 26:boolean, IfExprStringScalarStringGroupColumn(col 27:boolean, val Somecol 30:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 27:boolean, IfExprColumnNull(col 28:boolean, col 29:string, null)(children: LongColLessLongScalar(col 4:int, val 100) -> 28:boolean, ConstantVectorExpression(val Many) -> 29:string) -> 30:string) -> 31:string) -> 32:string) -> 33:string, IfExprStringScalarStringGroupColumn(col 34:boolean, val Singlecol 39:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 34:boolean, IfExprStringScalarStringGroupColumn(col 35:boolean, val Twocol 38:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 35:boolean, IfExprColumnNull(col 36:boolean, col 37:string, null)(children: LongColLessLongScalar(col 4:int, val 10) -> 36:boolean, ConstantVectorExpression(val Some) -> 37:string) -> 38:string) -> 39:string) -> 40:string, IfExprLongColumnLongColumn(col 41:boolean, col 42:date, col 43:date)(children: StringGroupColEqualCharScalar(col 14:char(10), val SHIP) -> 41:boolean, VectorUDFDateAddColScalar(col 10:date, val 10) -> 42:date, VectorUDFDateAddColScalar(col 10:date, val 5) -> 43:date) -> 44:date, IfExprDoubleColumnDoubleScalar(col 45:boolean, col 47:double, val 0.0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 45:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 46:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 46:double) -> 47:double) -> 48:double, IfExprDoubleColumnDoubleScalar(col 49:boolean, col 51:double, val 0.0)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 49:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 50:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 50:double) -> 51:double) -> 52:double, IfExprNullColumn(col 53:boolean, null, col 78)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 53:boolean, ConvertDecimal64ToDecimal(col 7:decimal(10,2)/DECIMAL_64) -> 78:decimal(10,2)) -> 54:decimal(10,2), IfExprColumnNull(col 55:boolean, col 79:decimal(10,2), null)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 55:boolean, ConvertDecimal64ToDecimal(col 7:decimal(10,2)/DECIMAL_64) -> 79:decimal(10,2)) -> 56:decimal(10,2), VectorUDFAdaptor(if((l_shipinstruct = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 57:boolean) -> 58:decimal(12,2), VectorUDFAdaptor(if((l_shipinstruct = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 59:boolean) -> 60:decimal(12,2), IfExprDecimal64ScalarDecimal64Column(col 61:boolean, decimal64Val 0, decimalVal 0, col 7:decimal(10,2)/DECIMAL_64)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 61:boolean) -> 62:decimal(10,2)/DECIMAL_64, IfExprDecimal64ColumnDecimal64Scalar(col 63:boolean, col 7:decimal(10,2)/DECIMAL_64, decimal64Val 0, decimalVal 0)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 63:boolean) -> 64:decimal(10,2)/DECIMAL_64, IfExprTimestampColumnColumn(col 65:boolean, col 66:timestampcol 67:timestamp)(children: LongColGreaterLongScalar(col 1:int, val 30) -> 65:boolean, CastDateToTimestamp(col 12:date) -> 66:timestamp, CastDateToTimestamp(col 11:date) -> 67:timestamp) -> 68:timestamp, IfExprColumnNull(col 69:boolean, col 70:int, null)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 69:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 70:int) -> 71:int, IfExprNullColumn(col 72:boolean, null, col 73)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 72:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 73:int) -> 74:int, IfExprLongScalarLongScalar(col 76:boolean, val 14245, val 14609)(children: LongColGreaterLongScalar(col 75:int, val 100)(children: LongColModuloLongScalar(col 2:int, val 500) -> 75:int) -> 76:boolean) -> 77:date - Statistics: Num rows: 101 Data size: 141804 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 101 Data size: 118604 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 101 Data size: 141804 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 101 Data size: 118604 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -879,13 +879,13 @@ native: true projectedOutputColumnNums: [4, 27, 39, 48, 52, 57, 62, 64, 66, 71, 76, 78, 80, 84, 87, 90, 93] selectExpressions: IfExprColumnCondExpr(col 17:boolean, col 18:stringcol 26:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 17:boolean, ConstantVectorExpression(val Single) -> 18:string, IfExprColumnCondExpr(col 19:boolean, col 20:stringcol 25:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 19:boolean, ConstantVectorExpression(val Two) -> 20:string, IfExprColumnCondExpr(col 21:boolean, col 22:stringcol 24:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 21:boolean, ConstantVectorExpression(val Some) -> 22:string, IfExprStringScalarStringScalar(col 23:boolean, val Many, val Huge number)(children: LongColLessLongScalar(col 4:int, val 100) -> 23:boolean) -> 24:string) -> 25:string) -> 26:string) -> 27:string, IfExprColumnCondExpr(col 28:boolean, col 29:stringcol 38:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 28:boolean, ConstantVectorExpression(val Single) -> 29:string, IfExprColumnCondExpr(col 30:boolean, col 31:stringcol 37:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 30:boolean, ConstantVectorExpression(val Two) -> 31:string, IfExprColumnCondExpr(col 32:boolean, col 33:stringcol 36:string)(children: LongColLessLongScalar(col 4:int, val 10) -> 32:boolean, ConstantVectorExpression(val Some) -> 33:string, IfExprColumnNull(col 34:boolean, col 35:string, null)(children: LongColLessLongScalar(col 4:int, val 100) -> 34:boolean, ConstantVectorExpression(val Many) -> 35:string) -> 36:string) -> 37:string) -> 38:string) -> 39:string, IfExprColumnCondExpr(col 40:boolean, col 41:stringcol 47:string)(children: LongColEqualLongScalar(col 4:int, val 1) -> 40:boolean, ConstantVectorExpression(val Single) -> 41:string, IfExprColumnCondExpr(col 42:boolean, col 43:stringcol 46:string)(children: LongColEqualLongScalar(col 4:int, val 2) -> 42:boolean, ConstantVectorExpression(val Two) -> 43:string, IfExprColumnNull(col 44:boolean, col 45:string, null)(children: LongColLessLongScalar(col 4:int, val 10) -> 44:boolean, ConstantVectorExpression(val Some) -> 45:string) -> 46:string) -> 47:string) -> 48:string, IfExprCondExprCondExpr(col 49:boolean, col 50:datecol 51:date)(children: StringGroupColEqualCharScalar(col 14:char(10), val SHIP) -> 49:boolean, VectorUDFDateAddColScalar(col 10:date, val 10) -> 50:date, VectorUDFDateAddColScalar(col 10:date, val 5) -> 51:date) -> 52:date, IfExprCondExprColumn(col 53:boolean, col 55:double, col 56:double)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 53:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 54:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 54:double) -> 55:double, ConstantVectorExpression(val 0.0) -> 56:double) -> 57:double, IfExprCondExprColumn(col 58:boolean, col 60:double, col 61:double)(children: StringGroupColEqualCharScalar(col 8:char(1), val N) -> 58:boolean, DoubleColMultiplyDoubleColumn(col 5:double, col 59:double)(children: DoubleScalarSubtractDoubleColumn(val 1.0, col 6:double) -> 59:double) -> 60:double, ConstantVectorExpression(val 0.0) -> 61:double) -> 62:double, IfExprNullColumn(col 63:boolean, null, col 94)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 63:boolean, ConvertDecimal64ToDecimal(col 7:decimal(10,2)/DECIMAL_64) -> 94:decimal(10,2)) -> 64:decimal(10,2), IfExprColumnNull(col 65:boolean, col 95:decimal(10,2), null)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 65:boolean, ConvertDecimal64ToDecimal(col 7:decimal(10,2)/DECIMAL_64) -> 95:decimal(10,2)) -> 66:decimal(10,2), VectorUDFAdaptor(if((l_shipinstruct = 'DELIVER IN PERSON'), 0, l_tax))(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 70:boolean) -> 71:decimal(12,2), VectorUDFAdaptor(if((l_shipinstruct = 'TAKE BACK RETURN'), l_tax, 0))(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 75:boolean) -> 76:decimal(12,2), IfExprDecimal64ScalarDecimal64Column(col 77:boolean, decimal64Val 0, decimalVal 0, col 7:decimal(10,2)/DECIMAL_64)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val DELIVER IN PERSON) -> 77:boolean) -> 78:decimal(10,2)/DECIMAL_64, IfExprDecimal64ColumnDecimal64Scalar(col 79:boolean, col 7:decimal(10,2)/DECIMAL_64, decimal64Val 0, decimalVal 0)(children: StringGroupColEqualVarCharScalar(col 13:varchar(20), val TAKE BACK RETURN) -> 79:boolean) -> 80:decimal(10,2)/DECIMAL_64, IfExprCondExprCondExpr(col 81:boolean, col 82:timestampcol 83:timestamp)(children: LongColGreaterLongScalar(col 1:int, val 30) -> 81:boolean, CastDateToTimestamp(col 12:date) -> 82:timestamp, CastDateToTimestamp(col 11:date) -> 83:timestamp) -> 84:timestamp, IfExprCondExprNull(col 85:boolean, col 86:int, null)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 85:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 86:int) -> 87:int, IfExprNullCondExpr(col 88:boolean, null, col 89:int)(children: LongColGreaterLongScalar(col 2:int, val 10000) -> 88:boolean, VectorUDFDateDiffColCol(col 12:date, col 11:date) -> 89:int) -> 90:int, IfExprLongScalarLongScalar(col 92:boolean, val 14245, val 14609)(children: LongColGreaterLongScalar(col 91:int, val 100)(children: LongColModuloLongScalar(col 2:int, val 500) -> 91:int) -> 92:boolean) -> 93:date - Statistics: Num rows: 101 Data size: 141804 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 101 Data size: 118604 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 101 Data size: 141804 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 101 Data size: 118604 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/vector_case_when_2.q.out ql/src/test/results/clientpositive/vector_case_when_2.q.out index 288e6f0..a7b46fd 100644 --- ql/src/test/results/clientpositive/vector_case_when_2.q.out +++ ql/src/test/results/clientpositive/vector_case_when_2.q.out @@ -145,7 +145,7 @@ native: true projectedOutputColumnNums: [1, 3, 9, 14, 18, 22, 24, 27, 32, 38, 2] selectExpressions: VectorUDFAdaptor(CASE WHEN ((ctimestamp2 <= TIMESTAMP'1800-12-31 00:00:00')) THEN ('1800s or Earlier') WHEN ((ctimestamp2 < TIMESTAMP'1900-01-01 00:00:00')) THEN ('1900s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN ('Early 2010s') ELSE ('Unknown') END)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 1800-12-31 00:00:00) -> 5:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00) -> 6:boolean, TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 7:boolean, TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 8:boolean) -> 9:string, VectorUDFAdaptor(CASE WHEN ((ctimestamp2 <= TIMESTAMP'2000-12-31 23:59:59.999999999')) THEN ('Old') WHEN ((ctimestamp2 < TIMESTAMP'2006-01-01 00:00:00')) THEN ('Early 2000s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') WHEN ((ctimestamp2 <= TIMESTAMP'2015-12-31 23:59:59.999999999')) THEN ('Early 2010s') ELSE (null) END)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 10:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 11:boolean, TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 12:boolean, TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 13:boolean) -> 14:string, VectorUDFAdaptor(CASE WHEN ((ctimestamp2 <= TIMESTAMP'2000-12-31 23:59:59.999999999')) THEN ('Old') WHEN ((ctimestamp2 < TIMESTAMP'2006-01-01 00:00:00')) THEN ('Early 2000s') WHEN (ctimestamp2 BETWEEN TIMESTAMP'2006-01-01 00:00:00' AND TIMESTAMP'2010-12-31 23:59:59.999999999') THEN ('Late 2000s') ELSE (null) END)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 15:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 16:boolean, TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 17:boolean) -> 18:string, IfExprLongColumnLongColumn(col 19:boolean, col 20:int, col 21:int)(children: TimestampColLessTimestampScalar(col 1:timestamp, val 1974-10-04 17:21:03.989) -> 19:boolean, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 20:int, VectorUDFYearTimestamp(col 3:timestamp, field YEAR) -> 21:int) -> 22:int, VectorUDFAdaptor(CASE WHEN ((stimestamp1 like '%19%')) THEN (stimestamp1) ELSE ('2018-03-08 23:04:59') END)(children: SelectStringColLikeStringScalar(col 2:string) -> 23:boolean) -> 24:string, VectorUDFAdaptor(if((ctimestamp1 = TIMESTAMP'2021-09-24 03:18:32.413655165'), null, minute(ctimestamp1)))(children: TimestampColEqualTimestampScalar(col 1:timestamp, val 2021-09-24 03:18:32.413655165) -> 25:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 26:int) -> 27:int, VectorUDFAdaptor(if(((ctimestamp2 >= TIMESTAMP'5344-10-04 18:40:08.165') and (ctimestamp2 < TIMESTAMP'6631-11-13 16:31:29.702202248')), minute(ctimestamp1), null))(children: ColAndCol(col 28:boolean, col 29:boolean)(children: TimestampColGreaterEqualTimestampScalar(col 3:timestamp, val 5344-10-04 18:40:08.165) -> 28:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 6631-11-13 16:31:29.702202248) -> 29:boolean) -> 30:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 31:int) -> 32:int, IfExprLongColumnLongColumn(col 35:boolean, col 36:date, col 37:date)(children: DoubleColGreaterDoubleScalar(col 34:double, val 100.0)(children: DoubleColModuloDoubleScalar(col 33:double, val 500.0)(children: CastTimestampToDouble(col 1:timestamp) -> 33:double) -> 34:double) -> 35:boolean, VectorUDFDateAddColScalar(col 0:date, val 1) -> 36:date, VectorUDFDateAddColScalar(col 0:date, val 365) -> 37:date) -> 38:date - Statistics: Num rows: 51 Data size: 50745 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 50345 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp), _col10 (type: string), _col1 (type: timestamp) null sort order: zzz @@ -155,7 +155,7 @@ native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 51 Data size: 50745 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 50345 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: date) Execution mode: vectorized Map Vectorization: @@ -181,10 +181,10 @@ Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey2 (type: timestamp), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 51 Data size: 45084 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 44684 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 51 Data size: 45084 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 44684 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -409,7 +409,7 @@ native: true projectedOutputColumnNums: [1, 3, 12, 21, 28, 32, 34, 37, 42, 48, 2] selectExpressions: IfExprStringScalarStringGroupColumn(col 5:boolean, val 1800s or Earliercol 11:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 1800-12-31 00:00:00) -> 5:boolean, IfExprStringScalarStringGroupColumn(col 6:boolean, val 1900scol 10:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00) -> 6:boolean, IfExprStringScalarStringGroupColumn(col 7:boolean, val Late 2000scol 9:string)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 7:boolean, IfExprStringScalarStringScalar(col 8:boolean, val Early 2010s, val Unknown)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 8:boolean) -> 9:string) -> 10:string) -> 11:string) -> 12:string, IfExprStringScalarStringGroupColumn(col 13:boolean, val Oldcol 20:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 13:boolean, IfExprStringScalarStringGroupColumn(col 14:boolean, val Early 2000scol 19:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 14:boolean, IfExprStringScalarStringGroupColumn(col 15:boolean, val Late 2000scol 18:string)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 15:boolean, IfExprColumnNull(col 16:boolean, col 17:string, null)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 16:boolean, ConstantVectorExpression(val Early 2010s) -> 17:string) -> 18:string) -> 19:string) -> 20:string) -> 21:string, IfExprStringScalarStringGroupColumn(col 22:boolean, val Oldcol 27:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 22:boolean, IfExprStringScalarStringGroupColumn(col 23:boolean, val Early 2000scol 26:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 23:boolean, IfExprColumnNull(col 24:boolean, col 25:string, null)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 24:boolean, ConstantVectorExpression(val Late 2000s) -> 25:string) -> 26:string) -> 27:string) -> 28:string, IfExprLongColumnLongColumn(col 29:boolean, col 30:int, col 31:int)(children: TimestampColLessTimestampScalar(col 1:timestamp, val 1974-10-04 17:21:03.989) -> 29:boolean, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 30:int, VectorUDFYearTimestamp(col 3:timestamp, field YEAR) -> 31:int) -> 32:int, IfExprStringGroupColumnStringScalar(col 33:boolean, col 2:string, val 2018-03-08 23:04:59)(children: SelectStringColLikeStringScalar(col 2:string) -> 33:boolean) -> 34:string, IfExprNullColumn(col 35:boolean, null, col 36)(children: TimestampColEqualTimestampScalar(col 1:timestamp, val 2021-09-24 03:18:32.413655165) -> 35:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 36:int) -> 37:int, IfExprColumnNull(col 40:boolean, col 41:int, null)(children: ColAndCol(col 38:boolean, col 39:boolean)(children: TimestampColGreaterEqualTimestampScalar(col 3:timestamp, val 5344-10-04 18:40:08.165) -> 38:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 6631-11-13 16:31:29.702202248) -> 39:boolean) -> 40:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 41:int) -> 42:int, IfExprLongColumnLongColumn(col 45:boolean, col 46:date, col 47:date)(children: DoubleColGreaterDoubleScalar(col 44:double, val 100.0)(children: DoubleColModuloDoubleScalar(col 43:double, val 500.0)(children: CastTimestampToDouble(col 1:timestamp) -> 43:double) -> 44:double) -> 45:boolean, VectorUDFDateAddColScalar(col 0:date, val 1) -> 46:date, VectorUDFDateAddColScalar(col 0:date, val 365) -> 47:date) -> 48:date - Statistics: Num rows: 51 Data size: 50745 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 50345 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp), _col10 (type: string), _col1 (type: timestamp) null sort order: zzz @@ -419,7 +419,7 @@ native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 51 Data size: 50745 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 50345 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: date) Execution mode: vectorized Map Vectorization: @@ -445,10 +445,10 @@ Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey2 (type: timestamp), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 51 Data size: 45084 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 44684 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 51 Data size: 45084 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 44684 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -673,7 +673,7 @@ native: true projectedOutputColumnNums: [1, 3, 15, 27, 36, 40, 42, 45, 50, 56, 2] selectExpressions: IfExprColumnCondExpr(col 5:boolean, col 6:stringcol 14:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 1800-12-31 00:00:00) -> 5:boolean, ConstantVectorExpression(val 1800s or Earlier) -> 6:string, IfExprColumnCondExpr(col 7:boolean, col 8:stringcol 13:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 1900-01-01 00:00:00) -> 7:boolean, ConstantVectorExpression(val 1900s) -> 8:string, IfExprColumnCondExpr(col 9:boolean, col 10:stringcol 12:string)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 9:boolean, ConstantVectorExpression(val Late 2000s) -> 10:string, IfExprStringScalarStringScalar(col 11:boolean, val Early 2010s, val Unknown)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 11:boolean) -> 12:string) -> 13:string) -> 14:string) -> 15:string, IfExprColumnCondExpr(col 16:boolean, col 17:stringcol 26:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 16:boolean, ConstantVectorExpression(val Old) -> 17:string, IfExprColumnCondExpr(col 18:boolean, col 19:stringcol 25:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 18:boolean, ConstantVectorExpression(val Early 2000s) -> 19:string, IfExprColumnCondExpr(col 20:boolean, col 21:stringcol 24:string)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 20:boolean, ConstantVectorExpression(val Late 2000s) -> 21:string, IfExprColumnNull(col 22:boolean, col 23:string, null)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2015-12-31 23:59:59.999999999) -> 22:boolean, ConstantVectorExpression(val Early 2010s) -> 23:string) -> 24:string) -> 25:string) -> 26:string) -> 27:string, IfExprColumnCondExpr(col 28:boolean, col 29:stringcol 35:string)(children: TimestampColLessEqualTimestampScalar(col 3:timestamp, val 2000-12-31 23:59:59.999999999) -> 28:boolean, ConstantVectorExpression(val Old) -> 29:string, IfExprColumnCondExpr(col 30:boolean, col 31:stringcol 34:string)(children: TimestampColLessTimestampScalar(col 3:timestamp, val 2006-01-01 00:00:00) -> 30:boolean, ConstantVectorExpression(val Early 2000s) -> 31:string, IfExprColumnNull(col 32:boolean, col 33:string, null)(children: TimestampColumnBetween(col 3:timestamp, left 2005-12-31 16:00:00.0, right 2010-12-31 15:59:59.999999999) -> 32:boolean, ConstantVectorExpression(val Late 2000s) -> 33:string) -> 34:string) -> 35:string) -> 36:string, IfExprCondExprCondExpr(col 37:boolean, col 38:intcol 39:int)(children: TimestampColLessTimestampScalar(col 1:timestamp, val 1974-10-04 17:21:03.989) -> 37:boolean, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 38:int, VectorUDFYearTimestamp(col 3:timestamp, field YEAR) -> 39:int) -> 40:int, IfExprStringGroupColumnStringScalar(col 41:boolean, col 2:string, val 2018-03-08 23:04:59)(children: SelectStringColLikeStringScalar(col 2:string) -> 41:boolean) -> 42:string, IfExprNullCondExpr(col 43:boolean, null, col 44:int)(children: TimestampColEqualTimestampScalar(col 1:timestamp, val 2021-09-24 03:18:32.413655165) -> 43:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 44:int) -> 45:int, IfExprCondExprNull(col 48:boolean, col 49:int, null)(children: ColAndCol(col 46:boolean, col 47:boolean)(children: TimestampColGreaterEqualTimestampScalar(col 3:timestamp, val 5344-10-04 18:40:08.165) -> 46:boolean, TimestampColLessTimestampScalar(col 3:timestamp, val 6631-11-13 16:31:29.702202248) -> 47:boolean) -> 48:boolean, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 49:int) -> 50:int, IfExprCondExprCondExpr(col 53:boolean, col 54:datecol 55:date)(children: DoubleColGreaterDoubleScalar(col 52:double, val 100.0)(children: DoubleColModuloDoubleScalar(col 51:double, val 500.0)(children: CastTimestampToDouble(col 1:timestamp) -> 51:double) -> 52:double) -> 53:boolean, VectorUDFDateAddColScalar(col 0:date, val 1) -> 54:date, VectorUDFDateAddColScalar(col 0:date, val 365) -> 55:date) -> 56:date - Statistics: Num rows: 51 Data size: 50745 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 50345 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp), _col10 (type: string), _col1 (type: timestamp) null sort order: zzz @@ -683,7 +683,7 @@ native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 51 Data size: 50745 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 50345 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: string), _col3 (type: string), _col4 (type: string), _col5 (type: int), _col6 (type: string), _col7 (type: int), _col8 (type: int), _col9 (type: date) Execution mode: vectorized Map Vectorization: @@ -709,10 +709,10 @@ Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey2 (type: timestamp), VALUE._col0 (type: string), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: int), VALUE._col4 (type: string), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: date) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 - Statistics: Num rows: 51 Data size: 45084 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 44684 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 51 Data size: 45084 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 51 Data size: 44684 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/vector_groupby4.q.out ql/src/test/results/clientpositive/vector_groupby4.q.out index 925b96c..8ea182a 100644 --- ql/src/test/results/clientpositive/vector_groupby4.q.out +++ ql/src/test/results/clientpositive/vector_groupby4.q.out @@ -90,7 +90,7 @@ keys: KEY._col0 (type: string) mode: partial1 outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -114,7 +114,7 @@ native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Map Vectorization: enabled: true @@ -134,10 +134,10 @@ keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0 - Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -146,7 +146,7 @@ Select Operator expressions: _col0 (type: string) outputColumnNames: c1 - Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -187,7 +187,7 @@ native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 316 Data size: 58144 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 316 Data size: 26860 Basic stats: COMPLETE Column stats: COMPLETE value expressions: c1 (type: string) Execution mode: vectorized Map Vectorization: diff --git ql/src/test/results/clientpositive/vector_groupby6.q.out ql/src/test/results/clientpositive/vector_groupby6.q.out index b478656..2cba267 100644 --- ql/src/test/results/clientpositive/vector_groupby6.q.out +++ ql/src/test/results/clientpositive/vector_groupby6.q.out @@ -90,7 +90,7 @@ keys: KEY._col0 (type: string) mode: partial1 outputColumnNames: _col0 - Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -114,7 +114,7 @@ native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 500 Data size: 92000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 500 Data size: 42500 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized Map Vectorization: enabled: true @@ -134,10 +134,10 @@ keys: KEY._col0 (type: string) mode: final outputColumnNames: _col0 - Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat output format: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat @@ -146,7 +146,7 @@ Select Operator expressions: _col0 (type: string) outputColumnNames: c1 - Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -187,7 +187,7 @@ native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 307 Data size: 56488 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 307 Data size: 26095 Basic stats: COMPLETE Column stats: COMPLETE value expressions: c1 (type: string) Execution mode: vectorized Map Vectorization: diff --git ql/src/test/results/clientpositive/vector_if_expr.q.out ql/src/test/results/clientpositive/vector_if_expr.q.out index 58c2e1e..292e8c6 100644 --- ql/src/test/results/clientpositive/vector_if_expr.q.out +++ ql/src/test/results/clientpositive/vector_if_expr.q.out @@ -41,7 +41,7 @@ native: true projectedOutputColumnNums: [10, 13] selectExpressions: IfExprStringScalarStringScalar(col 10:boolean, val first, val second) -> 13:string - Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4587 Data size: 426534 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean) null sort order: z @@ -51,7 +51,7 @@ native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4587 Data size: 426534 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string) Execution mode: vectorized Map Vectorization: @@ -71,10 +71,10 @@ Select Operator expressions: KEY.reducesinkkey0 (type: boolean), VALUE._col0 (type: string) outputColumnNames: _col0, _col1 - Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4587 Data size: 426534 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 4587 Data size: 857712 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4587 Data size: 426534 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/vector_nvl.q.out ql/src/test/results/clientpositive/vector_nvl.q.out index 43ca0ec..26bae3f 100644 --- ql/src/test/results/clientpositive/vector_nvl.q.out +++ ql/src/test/results/clientpositive/vector_nvl.q.out @@ -140,19 +140,19 @@ native: true projectedOutputColumnNums: [4, 14] selectExpressions: VectorCoalesce(columns [4, 13])(children: col 4:float, ConstantVectorExpression(val 1.0) -> 13:float) -> 14:float - Statistics: Num rows: 12288 Data size: 85848 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 73392 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 10 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 10 Data size: 72 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 64 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/vectorization_multi_value.q.out ql/src/test/results/clientpositive/vectorization_multi_value.q.out index 2fec50d..b4507fb 100644 --- ql/src/test/results/clientpositive/vectorization_multi_value.q.out +++ ql/src/test/results/clientpositive/vectorization_multi_value.q.out @@ -64,13 +64,13 @@ native: true projectedOutputColumnNums: [4] selectExpressions: IfExprCondExprNull(col 2:boolean, col 3:map, null)(children: ConstantVectorExpression(val 1) -> 2:boolean, VectorUDFAdaptor(map('a':'b')) -> 3:map) -> 4:map - Statistics: Num rows: 3 Data size: 978 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 326 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 3 Data size: 978 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 326 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -143,13 +143,13 @@ native: true projectedOutputColumnNums: [5] selectExpressions: IfExprCondExprNull(col 2:boolean, col 4:map>, null)(children: ConstantVectorExpression(val 1) -> 2:boolean, VectorUDFAdaptor(map('a':map('b':'c')))(children: VectorUDFAdaptor(map('b':'c')) -> 3:map) -> 4:map>) -> 5:map> - Statistics: Num rows: 3 Data size: 723 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 241 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 3 Data size: 723 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 241 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -222,13 +222,13 @@ native: true projectedOutputColumnNums: [4] selectExpressions: IfExprCondExprNull(col 2:boolean, col 3:map, null)(children: ConstantVectorExpression(val 1) -> 2:boolean, VectorUDFAdaptor(map('a':a)) -> 3:map) -> 4:map - Statistics: Num rows: 3 Data size: 2760 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 920 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 3 Data size: 2760 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 920 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -301,13 +301,13 @@ native: true projectedOutputColumnNums: [5] selectExpressions: IfExprCondExprNull(col 2:boolean, col 4:map>, null)(children: ConstantVectorExpression(val 1) -> 2:boolean, VectorUDFAdaptor(map('a':map('b':a)))(children: VectorUDFAdaptor(map('b':a)) -> 3:map) -> 4:map>) -> 5:map> - Statistics: Num rows: 3 Data size: 4968 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1656 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 3 Data size: 4968 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1656 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -380,13 +380,13 @@ native: true projectedOutputColumnNums: [4] selectExpressions: IfExprCondExprNull(col 2:boolean, col 3:array, null)(children: ConstantVectorExpression(val 1) -> 2:boolean, VectorUDFAdaptor(array('a','b')) -> 3:array) -> 4:array - Statistics: Num rows: 3 Data size: 1224 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 3 Data size: 1224 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -459,13 +459,13 @@ native: true projectedOutputColumnNums: [6] selectExpressions: IfExprCondExprNull(col 2:boolean, col 5:array>, null)(children: ConstantVectorExpression(val 1) -> 2:boolean, VectorUDFAdaptor(array(array('a','b'),array('c','d')))(children: VectorUDFAdaptor(array('a','b')) -> 3:array, VectorUDFAdaptor(array('c','d')) -> 4:array) -> 5:array>) -> 6:array> - Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 3 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -538,13 +538,13 @@ native: true projectedOutputColumnNums: [4] selectExpressions: IfExprCondExprNull(col 2:boolean, col 3:array, null)(children: ConstantVectorExpression(val 1) -> 2:boolean, VectorUDFAdaptor(array('a',a)) -> 3:array) -> 4:array - Statistics: Num rows: 3 Data size: 5760 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 3 Data size: 5760 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 1920 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -617,13 +617,13 @@ native: true projectedOutputColumnNums: [6] selectExpressions: IfExprCondExprNull(col 2:boolean, col 5:array>, null)(children: ConstantVectorExpression(val 1) -> 2:boolean, VectorUDFAdaptor(array(array('a',a),array('b','c')))(children: VectorUDFAdaptor(array('a',a)) -> 3:array, VectorUDFAdaptor(array('b','c')) -> 4:array) -> 5:array>) -> 6:array> - Statistics: Num rows: 3 Data size: 57840 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 19280 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 3 Data size: 57840 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3 Data size: 19280 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out index 907edb6..c94eb90 100644 --- ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out @@ -268,7 +268,7 @@ native: true projectedOutputColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 0, 1, 3, 13, 14, 15, 16, 17] selectExpressions: VectorUDFUnixTimeStampTimestamp(col 1:timestamp) -> 5:bigint, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 6:int, VectorUDFMonthTimestamp(col 1:timestamp, field MONTH) -> 7:int, VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 8:int, VectorUDFWeekOfYearTimestamp(col 1:timestamp, field WEEK_OF_YEAR) -> 9:int, VectorUDFHourTimestamp(col 1:timestamp, field HOUR_OF_DAY) -> 10:int, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 11:int, VectorUDFSecondTimestamp(col 1:timestamp, field SECOND) -> 12:int, IfExprTimestampColumnScalar(col 0:boolean, col 1:timestamp, val 1319-01-25 08:31:57.778) -> 13:timestamp, IfExprTimestampScalarColumn(col 0:boolean, val 2000-12-18 00:42:30.0005, col 1:timestamp) -> 14:timestamp, IfExprTimestampColumnColumn(col 0:boolean, col 1:timestampcol 3:timestamp) -> 15:timestamp, IfExprColumnNull(col 0:boolean, col 1:timestamp, null)(children: col 0:boolean, col 1:timestamp) -> 16:timestamp, IfExprNullColumn(col 0:boolean, null, col 3)(children: col 0:boolean, col 3:timestamp) -> 17:timestamp - Statistics: Num rows: 52 Data size: 16436 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 11916 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) null sort order: z @@ -278,7 +278,7 @@ native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 52 Data size: 16436 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 11916 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: boolean), _col10 (type: timestamp), _col11 (type: timestamp), _col12 (type: timestamp), _col13 (type: timestamp), _col14 (type: timestamp), _col15 (type: timestamp), _col16 (type: timestamp) Execution mode: vectorized Map Vectorization: @@ -298,10 +298,10 @@ Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: boolean), VALUE._col8 (type: timestamp), VALUE._col9 (type: timestamp), VALUE._col10 (type: timestamp), VALUE._col11 (type: timestamp), VALUE._col12 (type: timestamp), VALUE._col13 (type: timestamp), VALUE._col14 (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 - Statistics: Num rows: 52 Data size: 16436 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 11916 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 52 Data size: 16436 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 11916 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat