diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java index 10a2947b16..526b57acd0 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/ColumnStatsUpdateTask.java @@ -39,12 +39,14 @@ import org.apache.hadoop.hive.metastore.api.Date; import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.SetPartitionsStatsRequest; +import org.apache.hadoop.hive.metastore.api.Timestamp; import org.apache.hadoop.hive.metastore.api.utils.DecimalUtils; import org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector; import org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector; import org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector; import org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector; import org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector; +import org.apache.hadoop.hive.metastore.columnstats.cache.TimestampColumnStatsDataInspector; import org.apache.hadoop.hive.ql.CompilationOpContext; import org.apache.hadoop.hive.ql.DriverContext; import org.apache.hadoop.hive.ql.QueryPlan; @@ -58,6 +60,7 @@ import org.apache.hadoop.hive.ql.plan.ColumnStatsUpdateWork; import org.apache.hadoop.hive.ql.plan.api.StageType; import org.apache.hadoop.hive.serde2.io.DateWritableV2; +import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -110,7 +113,7 @@ private ColumnStatistics constructColumnStatsFromInput() if (columnType.equalsIgnoreCase("long") || columnType.equalsIgnoreCase("tinyint") || columnType.equalsIgnoreCase("smallint") || columnType.equalsIgnoreCase("int") - || columnType.equalsIgnoreCase("bigint") || columnType.equalsIgnoreCase("timestamp")) { + || columnType.equalsIgnoreCase("bigint")) { LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector(); longStats.setNumNullsIsSet(false); longStats.setNumDVsIsSet(false); @@ -275,6 +278,26 @@ private ColumnStatistics constructColumnStatsFromInput() } statsData.setDateStats(dateStats); statsObj.setStatsData(statsData); + } else if (columnType.equalsIgnoreCase("timestamp")) { + TimestampColumnStatsDataInspector timestampStats = new TimestampColumnStatsDataInspector(); + Map mapProp = work.getMapProp(); + for (Entry entry : mapProp.entrySet()) { + String fName = entry.getKey(); + String value = entry.getValue(); + if (fName.equals("numNulls")) { + timestampStats.setNumNulls(Long.parseLong(value)); + } else if (fName.equals("numDVs")) { + timestampStats.setNumDVs(Long.parseLong(value)); + } else if (fName.equals("lowValue")) { + timestampStats.setLowValue(readTimestampValue(value)); + } else if (fName.equals("highValue")) { + timestampStats.setHighValue(readTimestampValue(value)); + } else { + throw new SemanticException("Unknown stat"); + } + } + statsData.setTimestampStats(timestampStats); + statsObj.setStatsData(statsData); } else { throw new SemanticException("Unsupported type"); } @@ -375,4 +398,15 @@ private Date readDateValue(String dateStr) { return new Date(Long.parseLong(dateStr)); } } + + private Timestamp readTimestampValue(String timestampStr) { + try { + TimestampWritableV2 writableVal = new TimestampWritableV2( + org.apache.hadoop.hive.common.type.Timestamp.valueOf(timestampStr)); + return new Timestamp(writableVal.getSeconds()); + } catch (IllegalArgumentException err) { + LOG.debug("Reading timestamp value as seconds since epoch: {}", timestampStr); + return new Timestamp(Long.parseLong(timestampStr)); + } + } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/JsonMetaDataFormatter.java b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/JsonMetaDataFormatter.java index 0b66b93a75..4bbc1771ae 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/JsonMetaDataFormatter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/formatting/JsonMetaDataFormatter.java @@ -338,6 +338,19 @@ private ColumnStatisticsData getStatistics(FieldSchema column, List neededCols, Operator op) - throws SemanticException { + AnnotateStatsProcCtx aspCtx, List neededCols, Operator op) throws SemanticException { long numRows = currNumRows; @@ -1045,8 +1053,15 @@ private long evaluateComparator(Statistics stats, AnnotateStatsProcCtx aspCtx, E return Math.round(((double) (maxValue - value) / (maxValue - minValue)) * numRows); } } - } else if (colTypeLowerCase.equals(serdeConstants.BIGINT_TYPE_NAME)) { - long value = Long.parseLong(boundValue); + } else if (colTypeLowerCase.equals(serdeConstants.BIGINT_TYPE_NAME) || + colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) { + long value; + if (colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) { + TimestampWritableV2 timestampWritable = new TimestampWritableV2(Timestamp.valueOf(boundValue)); + value = timestampWritable.getTimestamp().toEpochSecond(); + } else { + value = Long.parseLong(boundValue); + } long maxValue = cs.getRange().maxValue.longValue(); long minValue = cs.getRange().minValue.longValue(); if (upperBound) { @@ -1640,6 +1655,7 @@ private static void computeAggregateColumnMinMax(ColStatistics cs, HiveConf conf case serdeConstants.DATE_TYPE_NAME: case serdeConstants.INT_TYPE_NAME: case serdeConstants.BIGINT_TYPE_NAME: + case serdeConstants.TIMESTAMP_TYPE_NAME: long maxValueLong = range.maxValue.longValue(); long minValueLong = range.minValue.longValue(); // If min value is less or equal to max value (legal) @@ -2022,6 +2038,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, pred = jop.getConf().getResidualFilterExprs().get(0); } // evaluate filter expression and update statistics + final boolean uniformWithinRange = HiveConf.getBoolVar( + aspCtx.getConf(), HiveConf.ConfVars.HIVE_STATS_RANGE_SELECTIVITY_UNIFORM_DISTRIBUTION); newNumRows = evaluateExpression(stats, pred, aspCtx, jop.getSchema().getColumnNames(), jop, stats.getNumRows()); // update statistics based on column statistics. @@ -2115,6 +2133,8 @@ public Object process(Node nd, Stack stack, NodeProcessorCtx procCtx, pred = jop.getConf().getResidualFilterExprs().get(0); } // evaluate filter expression and update statistics + final boolean uniformWithinRange = HiveConf.getBoolVar( + aspCtx.getConf(), HiveConf.ConfVars.HIVE_STATS_RANGE_SELECTIVITY_UNIFORM_DISTRIBUTION); newNumRows = evaluateExpression(wcStats, pred, aspCtx, jop.getSchema().getColumnNames(), jop, wcStats.getNumRows()); // update only the basic statistics in the absence of column statistics diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/ColumnStatisticsObjTranslator.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/ColumnStatisticsObjTranslator.java index 31c96826b0..e6926d3d18 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/ColumnStatisticsObjTranslator.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/ColumnStatisticsObjTranslator.java @@ -28,14 +28,17 @@ import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; import org.apache.hadoop.hive.metastore.api.Date; import org.apache.hadoop.hive.metastore.api.Decimal; +import org.apache.hadoop.hive.metastore.api.Timestamp; import org.apache.hadoop.hive.metastore.api.utils.DecimalUtils; import org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector; import org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector; import org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector; import org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector; import org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector; +import org.apache.hadoop.hive.metastore.columnstats.cache.TimestampColumnStatsDataInspector; import org.apache.hadoop.hive.ql.metadata.HiveException; import org.apache.hadoop.hive.serde2.io.DateWritableV2; +import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.StructField; @@ -46,9 +49,15 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.HiveDecimalObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.StringObjectInspector; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; public class ColumnStatisticsObjTranslator { + private static transient final Logger LOG = LoggerFactory + .getLogger(ColumnStatisticsObjTranslator.class); + public static ColumnStatisticsObj readHiveStruct(String columnName, String columnType, StructField structField, Object values) throws HiveException { @@ -210,6 +219,26 @@ private static void unpackDateStats(ObjectInspector oi, Object o, String fName, } } + private static void unpackTimestampStats(ObjectInspector oi, Object o, String fName, ColumnStatisticsObj statsObj) { + if (fName.equals("countnulls")) { + long v = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getTimestampStats().setNumNulls(v); + } else if (fName.equals("numdistinctvalues")) { + long v = ((LongObjectInspector) oi).get(o); + statsObj.getStatsData().getTimestampStats().setNumDVs(v); + } else if (fName.equals("max")) { + TimestampWritableV2 v = ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o); + statsObj.getStatsData().getTimestampStats().setHighValue(new Timestamp(v.getSeconds())); + } else if (fName.equals("min")) { + TimestampWritableV2 v = ((TimestampObjectInspector) oi).getPrimitiveWritableObject(o); + statsObj.getStatsData().getTimestampStats().setLowValue(new Timestamp(v.getSeconds())); + } else if (fName.equals("ndvbitvector")) { + PrimitiveObjectInspector poi = (PrimitiveObjectInspector) oi; + byte[] buf = ((BinaryObjectInspector) poi).getPrimitiveJavaObject(o); + statsObj.getStatsData().getTimestampStats().setBitVectors(buf); + } + } + private static void unpackPrimitiveObject(ObjectInspector oi, Object o, String fieldName, ColumnStatisticsObj statsObj) throws UnsupportedDoubleException { if (o == null) { return; @@ -248,6 +277,10 @@ private static void unpackPrimitiveObject(ObjectInspector oi, Object o, String f DateColumnStatsDataInspector dateStats = new DateColumnStatsDataInspector(); statsData.setDateStats(dateStats); statsObj.setStatsData(statsData); + } else if (s.equalsIgnoreCase("timestamp")) { + TimestampColumnStatsDataInspector timestampStats = new TimestampColumnStatsDataInspector(); + statsData.setTimestampStats(timestampStats); + statsObj.setStatsData(statsData); } } else { // invoke the right unpack method depending on data type of the column @@ -265,6 +298,8 @@ private static void unpackPrimitiveObject(ObjectInspector oi, Object o, String f unpackDecimalStats(oi, o, fieldName, statsObj); } else if (statsObj.getStatsData().isSetDateStats()) { unpackDateStats(oi, o, fieldName, statsObj); + } else if (statsObj.getStatsData().isSetTimestampStats()) { + unpackTimestampStats(oi, o, fieldName, statsObj); } } } diff --git a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java index be527095c3..0e12f273ba 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java @@ -131,6 +131,10 @@ private static final int DATE_RANGE_LOWER_LIMIT = 10593; // Range upper limit for date type when not defined (days, heuristic): '2024-12-31' private static final int DATE_RANGE_UPPER_LIMIT = 20089; + // Range lower limit for timestamp type when not defined (seconds, heuristic): '1999-01-01 00:00:00' + private static final long TIMESTAMP_RANGE_LOWER_LIMIT = 915148800L; + // Range upper limit for timestamp type when not defined (seconds, heuristic): '2024-12-31 23:59:59' + private static final long TIMESTAMP_RANGE_UPPER_LIMIT = 1735689599L; /** * Collect table, partition and column level statistics @@ -860,8 +864,15 @@ public static ColStatistics getColStatistics(ColumnStatisticsObj cso, String tab } else if (colTypeLowerCase.equals(serdeConstants.BINARY_TYPE_NAME)) { cs.setAvgColLen(csd.getBinaryStats().getAvgColLen()); cs.setNumNulls(csd.getBinaryStats().getNumNulls()); - } else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME) || - colTypeLowerCase.equals(serdeConstants.TIMESTAMPLOCALTZ_TYPE_NAME)) { + } else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) { + cs.setAvgColLen(JavaDataModel.get().lengthOfTimestamp()); + cs.setNumNulls(csd.getTimestampStats().getNumNulls()); + Long lowVal = (csd.getTimestampStats().getLowValue() != null) ? csd.getTimestampStats().getLowValue() + .getSecondsSinceEpoch() : null; + Long highVal = (csd.getTimestampStats().getHighValue() != null) ? csd.getTimestampStats().getHighValue() + .getSecondsSinceEpoch() : null; + cs.setRange(lowVal, highVal); + } else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMPLOCALTZ_TYPE_NAME)) { cs.setAvgColLen(JavaDataModel.get().lengthOfTimestamp()); } else if (colTypeLowerCase.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) { cs.setAvgColLen(JavaDataModel.get().lengthOfDecimal()); @@ -939,8 +950,11 @@ else if(colTypeLowerCase.equals(serdeConstants.SMALLINT_TYPE_NAME)){ cs.setNumTrues(Math.max(1, numRows/2)); cs.setNumFalses(Math.max(1, numRows/2)); cs.setAvgColLen(JavaDataModel.get().primitive1()); - } else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME) || - colTypeLowerCase.equals(serdeConstants.TIMESTAMPLOCALTZ_TYPE_NAME)) { + } else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMP_TYPE_NAME)) { + cs.setAvgColLen(JavaDataModel.get().lengthOfTimestamp()); + // epoch, seconds since epoch + cs.setRange(TIMESTAMP_RANGE_LOWER_LIMIT, TIMESTAMP_RANGE_UPPER_LIMIT); + } else if (colTypeLowerCase.equals(serdeConstants.TIMESTAMPLOCALTZ_TYPE_NAME)) { cs.setAvgColLen(JavaDataModel.get().lengthOfTimestamp()); } else if (colTypeLowerCase.startsWith(serdeConstants.DECIMAL_TYPE_NAME)) { cs.setAvgColLen(JavaDataModel.get().lengthOfDecimal()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java index 642f42b5b1..4e7c598155 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDAFComputeStats.java @@ -31,6 +31,7 @@ import org.apache.hadoop.hive.ql.util.JavaDataModel; import org.apache.hadoop.hive.serde2.io.DateWritableV2; import org.apache.hadoop.hive.serde2.io.DoubleWritable; +import org.apache.hadoop.hive.serde2.io.TimestampWritableV2; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector; @@ -44,6 +45,7 @@ import org.apache.hadoop.hive.serde2.objectinspector.primitive.LongObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory; import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorUtils; +import org.apache.hadoop.hive.serde2.objectinspector.primitive.TimestampObjectInspector; import org.apache.hadoop.hive.serde2.objectinspector.primitive.WritableLongObjectInspector; import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; @@ -83,7 +85,6 @@ public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) case SHORT: case INT: case LONG: - case TIMESTAMP: case TIMESTAMPLOCALTZ: return new GenericUDAFLongStatsEvaluator(); case FLOAT: @@ -99,6 +100,8 @@ public GenericUDAFEvaluator getEvaluator(TypeInfo[] parameters) return new GenericUDAFDecimalStatsEvaluator(); case DATE: return new GenericUDAFDateStatsEvaluator(); + case TIMESTAMP: + return new GenericUDAFTimestampStatsEvaluator(); default: throw new UDFArgumentTypeException(0, "Only integer/long/timestamp/date/float/double/string/binary/boolean/decimal type argument " + @@ -381,6 +384,8 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveExc partialResult = new Object[6]; partialResult[0] = new Text(); + partialResult[1] = null; + partialResult[2] = null; partialResult[3] = new LongWritable(0); partialResult[4] = new BytesWritable(); @@ -405,6 +410,8 @@ public ObjectInspector init(Mode m, ObjectInspector[] parameters) throws HiveExc result = new Object[6]; result[0] = new Text(); + result[1] = null; + result[2] = null; result[3] = new LongWritable(0); result[4] = new LongWritable(0); result[5] = new BytesWritable(); @@ -1366,6 +1373,80 @@ public void reset(AggregationBuffer agg) throws HiveException { ((NumericStatsAgg)agg).reset("Date"); } } + + /** + * GenericUDAFTimestampStatsEvaluator + * High/low value will be saved in stats DB as long value representing seconds since epoch. + */ + public static class GenericUDAFTimestampStatsEvaluator + extends GenericUDAFNumericStatsEvaluator { + + @Override + protected TimestampObjectInspector getValueObjectInspector() { + return PrimitiveObjectInspectorFactory.writableTimestampObjectInspector; + } + + @Override + protected TimestampObjectInspector getValueObjectInspector(PrimitiveTypeInfo typeInfo) { + return getValueObjectInspector(); + } + + @AggregationType(estimable = true) + public class TimestampStatsAgg extends NumericStatsAgg { + @Override + public int estimate() { + JavaDataModel model = JavaDataModel.get(); + return super.estimate() + model.primitive2() * 2; + } + + @Override + protected void update(Object p, PrimitiveObjectInspector inputOI) { + // TimestampWritableV2 is mutable, TimestampStatsAgg needs its own copy + TimestampWritableV2 v = new TimestampWritableV2((TimestampWritableV2) inputOI.getPrimitiveWritableObject(p)); + + //Update min counter if new value is less than min seen so far + if (min == null || v.compareTo(min) < 0) { + min = v; + } + //Update max counter if new value is greater than max seen so far + if (max == null || v.compareTo(max) > 0) { + max = v; + } + // Add value to NumDistinctValue Estimator + numDV.addToEstimator(v.getSeconds()); + } + + @Override + protected void updateMin(Object minValue, TimestampObjectInspector minFieldOI) { + if ((minValue != null) && (min == null || + min.compareTo(minFieldOI.getPrimitiveWritableObject(minValue)) > 0)) { + // TimestampWritableV2 is mutable, TimestampStatsAgg needs its own copy + min = new TimestampWritableV2(minFieldOI.getPrimitiveWritableObject(minValue)); + } + } + + @Override + protected void updateMax(Object maxValue, TimestampObjectInspector maxFieldOI) { + if ((maxValue != null) && (max == null || + max.compareTo(maxFieldOI.getPrimitiveWritableObject(maxValue)) < 0)) { + // TimestampWritableV2 is mutable, TimestampStatsAgg needs its own copy + max = new TimestampWritableV2(maxFieldOI.getPrimitiveWritableObject(maxValue)); + } + } + }; + + @Override + public AggregationBuffer getNewAggregationBuffer() throws HiveException { + AggregationBuffer result = new TimestampStatsAgg(); + reset(result); + return result; + } + + @Override + public void reset(AggregationBuffer agg) throws HiveException { + ((NumericStatsAgg)agg).reset("Timestamp"); + } + } @InterfaceAudience.LimitedPrivate(value = { "Hive" }) static int lengthFor(JavaDataModel model, Integer numVector) { diff --git a/ql/src/test/queries/clientpositive/timestamp_comparison3.q b/ql/src/test/queries/clientpositive/timestamp_comparison3.q new file mode 100644 index 0000000000..145d89bba8 --- /dev/null +++ b/ql/src/test/queries/clientpositive/timestamp_comparison3.q @@ -0,0 +1,32 @@ +set hive.fetch.task.conversion=none; +set hive.stats.fetch.column.stats=true; + +create database timestamp_test_n123; +create table timestamp_test_n123.onecolumntable (ts timestamp); + +insert into timestamp_test_n123.onecolumntable values +('2015-01-01 00:00:00'), +('2015-01-02 00:00:00'), +('2015-01-03 00:00:00'), +('2015-01-04 00:00:00'), +('2015-01-05 00:00:00'); + +describe formatted timestamp_test_n123.onecolumntable ts; + +explain +select ts from timestamp_test_n123.onecolumntable +where ts >= cast('2015-01-02 00:00:00' as timestamp) + and ts <= cast('2015-01-04 00:00:00' as timestamp); + +explain +select ts from timestamp_test_n123.onecolumntable +where ts >= cast('2015-01-02 00:00:00' as timestamp) + and ts <= cast('2015-01-03 00:00:00' as timestamp); + +explain +select ts from timestamp_test_n123.onecolumntable +where ts >= cast('2015-01-01 00:00:00' as timestamp) + and ts <= cast('2015-01-08 00:00:00' as timestamp); + +drop table timestamp_test_n123.onecolumntable; +drop database timestamp_test_n123; diff --git a/ql/src/test/results/clientpositive/alter_table_update_status.q.out b/ql/src/test/results/clientpositive/alter_table_update_status.q.out index 9ffd9b413d..6747c3b470 100644 --- a/ql/src/test/results/clientpositive/alter_table_update_status.q.out +++ b/ql/src/test/results/clientpositive/alter_table_update_status.q.out @@ -453,8 +453,8 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats_n0 col_name ts data_type timestamp -min 1325379723 -max 1325379723 +min 2012-01-01 01:02:03 +max 2012-01-01 01:02:03 num_nulls 1 distinct_count 1 avg_col_len @@ -908,8 +908,8 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats_n0 col_name ts data_type timestamp -min 1325379723 -max 1325379723 +min 2012-01-01 01:02:03 +max 2012-01-01 01:02:03 num_nulls 1 distinct_count 1 avg_col_len @@ -935,8 +935,8 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats_n0 col_name ts data_type timestamp -min 1357030924 -max 1357030923 +min 2013-01-01 09:02:04 +max 2013-01-01 09:02:03 num_nulls 12 distinct_count 7 avg_col_len diff --git a/ql/src/test/results/clientpositive/alter_table_update_status_disable_bitvector.q.out b/ql/src/test/results/clientpositive/alter_table_update_status_disable_bitvector.q.out index ec6a780b98..4fb4a70dc6 100644 --- a/ql/src/test/results/clientpositive/alter_table_update_status_disable_bitvector.q.out +++ b/ql/src/test/results/clientpositive/alter_table_update_status_disable_bitvector.q.out @@ -453,8 +453,8 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats col_name ts data_type timestamp -min 1325379723 -max 1325379723 +min 2012-01-01 01:02:03 +max 2012-01-01 01:02:03 num_nulls 1 distinct_count 1 avg_col_len @@ -908,8 +908,8 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats col_name ts data_type timestamp -min 1325379723 -max 1325379723 +min 2012-01-01 01:02:03 +max 2012-01-01 01:02:03 num_nulls 1 distinct_count 1 avg_col_len @@ -935,8 +935,8 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats col_name ts data_type timestamp -min 1357030924 -max 1357030923 +min 2013-01-01 09:02:04 +max 2013-01-01 09:02:03 num_nulls 12 distinct_count 7 avg_col_len diff --git a/ql/src/test/results/clientpositive/beeline/desc_table_formatted.q.out b/ql/src/test/results/clientpositive/beeline/desc_table_formatted.q.out index e9617449b8..2f7478d06d 100644 --- a/ql/src/test/results/clientpositive/beeline/desc_table_formatted.q.out +++ b/ql/src/test/results/clientpositive/beeline/desc_table_formatted.q.out @@ -222,8 +222,8 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats_n0 col_name ts data_type timestamp -min 1325379722 -max 1325379723 +min 2012-01-01 01:02:02 +max 2012-01-01 01:02:03 num_nulls 1 distinct_count 2 avg_col_len @@ -585,7 +585,7 @@ PREHOOK: Input: default@datatype_stats_n0 POSTHOOK: query: DESC FORMATTED datatype_stats_n0 ts POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats_n0 -{"columns":[{"name":"ts","type":"timestamp","comment":"from deserializer","min":1325379722,"max":1325379723,"numNulls":1,"distinctCount":2}]} +{"columns":[{"name":"ts","type":"timestamp","comment":"from deserializer","min":"2012-01-01 01:02:02","max":"2012-01-01 01:02:03","numNulls":1,"distinctCount":2}]} PREHOOK: query: DESC FORMATTED datatype_stats_n0 dt PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats_n0 diff --git a/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out b/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out index f259a1ef4f..cdd934c2fd 100644 --- a/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out +++ b/ql/src/test/results/clientpositive/confirm_initial_tbl_stats.q.out @@ -272,8 +272,8 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@alltypesorc col_name ctimestamp1 data_type timestamp -min -28830 -max -28769 +min 1969-12-31 15:59:30 +max 1969-12-31 16:00:31 num_nulls 3115 distinct_count 35 avg_col_len diff --git a/ql/src/test/results/clientpositive/constprog_type.q.out b/ql/src/test/results/clientpositive/constprog_type.q.out index 983d5ba172..159abf9cb2 100644 --- a/ql/src/test/results/clientpositive/constprog_type.q.out +++ b/ql/src/test/results/clientpositive/constprog_type.q.out @@ -59,20 +59,20 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 944 Basic stats: COMPLETE Column stats: COMPLETE - value expressions: _col0 (type: struct), _col1 (type: struct) + Statistics: Num rows: 1 Data size: 1008 Basic stats: COMPLETE Column stats: COMPLETE + value expressions: _col0 (type: struct), _col1 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1) mode: mergepartial outputColumnNames: _col0, _col1 - Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1040 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 976 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1 Data size: 1040 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/desc_table_formatted.q.out b/ql/src/test/results/clientpositive/desc_table_formatted.q.out index 0a5c3633ad..901484497f 100644 --- a/ql/src/test/results/clientpositive/desc_table_formatted.q.out +++ b/ql/src/test/results/clientpositive/desc_table_formatted.q.out @@ -222,8 +222,8 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats_n0 col_name ts data_type timestamp -min 1325379722 -max 1325379723 +min 2012-01-01 01:02:02 +max 2012-01-01 01:02:03 num_nulls 1 distinct_count 2 avg_col_len @@ -585,7 +585,7 @@ PREHOOK: Input: default@datatype_stats_n0 POSTHOOK: query: DESC FORMATTED datatype_stats_n0 ts POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats_n0 -{"columns":[{"name":"ts","type":"timestamp","comment":"from deserializer","min":1325379722,"max":1325379723,"numNulls":1,"distinctCount":2}]} +{"columns":[{"name":"ts","type":"timestamp","comment":"from deserializer","min":"2012-01-01 01:02:02","max":"2012-01-01 01:02:03","numNulls":1,"distinctCount":2}]} PREHOOK: query: DESC FORMATTED datatype_stats_n0 dt PREHOOK: type: DESCTABLE PREHOOK: Input: default@datatype_stats_n0 diff --git a/ql/src/test/results/clientpositive/druid/druidmini_dynamic_partition.q.out b/ql/src/test/results/clientpositive/druid/druidmini_dynamic_partition.q.out index 09636a4153..afc876b2c1 100644 --- a/ql/src/test/results/clientpositive/druid/druidmini_dynamic_partition.q.out +++ b/ql/src/test/results/clientpositive/druid/druidmini_dynamic_partition.q.out @@ -125,23 +125,23 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: ctimestamp1 is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2477090 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ctimestamp1 is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9173 Data size: 1849230 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: CAST( ctimestamp1 AS timestamp with local time zone) (type: timestamp with local time zone), cstring1 (type: string), cstring2 (type: string), cdouble (type: double), cfloat (type: float), ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9173 Data size: 1942190 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: timestamp with local time zone), _col1 (type: string), _col2 (type: string), _col3 (type: double), _col4 (type: float), _col5 (type: tinyint), _col6 (type: smallint), _col7 (type: int), _col8 (type: bigint), _col9 (type: boolean), _col10 (type: boolean), floor_hour(CAST( GenericUDFEpochMilli(_col0) AS TIMESTAMP)) (type: timestamp), (floor((1.0D / rand())) % 6) (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, __time_granularity, __druid_extra_partition_key - Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9173 Data size: 1942190 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: __time_granularity (type: timestamp), __druid_extra_partition_key (type: bigint) sort order: ++ Map-reduce partition columns: __time_granularity (type: timestamp), __druid_extra_partition_key (type: bigint) - Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9173 Data size: 1942190 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: timestamp with local time zone), _col1 (type: string), _col2 (type: string), _col3 (type: double), _col4 (type: float), _col5 (type: tinyint), _col6 (type: smallint), _col7 (type: int), _col8 (type: bigint), _col9 (type: boolean), _col10 (type: boolean) Execution mode: llap LLAP IO: all inputs @@ -151,11 +151,11 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: timestamp with local time zone), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: tinyint), VALUE._col6 (type: smallint), VALUE._col7 (type: int), VALUE._col8 (type: bigint), VALUE._col9 (type: boolean), VALUE._col10 (type: boolean), KEY.__time_granularity (type: timestamp), KEY.__druid_extra_partition_key (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, __time_granularity, __druid_extra_partition_key - Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9173 Data size: 1942190 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9173 Data size: 1942190 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.druid.io.DruidQueryBasedInputFormat output format: org.apache.hadoop.hive.druid.io.DruidOutputFormat @@ -364,23 +364,23 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: ctimestamp2 is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2477130 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ctimestamp2 is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9174 Data size: 1849380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: CAST( ctimestamp2 AS timestamp with local time zone) (type: timestamp with local time zone), cstring1 (type: string), cstring2 (type: string), cdouble (type: double), cfloat (type: float), ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9174 Data size: 1942340 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: timestamp with local time zone), _col1 (type: string), _col2 (type: string), _col3 (type: double), _col4 (type: float), _col5 (type: tinyint), _col6 (type: smallint), _col7 (type: int), _col8 (type: bigint), _col9 (type: boolean), _col10 (type: boolean), floor_hour(CAST( GenericUDFEpochMilli(_col0) AS TIMESTAMP)) (type: timestamp), (floor((1.0D / rand())) % 6) (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, __time_granularity, __druid_extra_partition_key - Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9174 Data size: 1942340 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: __time_granularity (type: timestamp), __druid_extra_partition_key (type: bigint) sort order: ++ Map-reduce partition columns: __time_granularity (type: timestamp), __druid_extra_partition_key (type: bigint) - Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9174 Data size: 1942340 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: timestamp with local time zone), _col1 (type: string), _col2 (type: string), _col3 (type: double), _col4 (type: float), _col5 (type: tinyint), _col6 (type: smallint), _col7 (type: int), _col8 (type: bigint), _col9 (type: boolean), _col10 (type: boolean) Execution mode: llap LLAP IO: all inputs @@ -390,11 +390,11 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: timestamp with local time zone), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: tinyint), VALUE._col6 (type: smallint), VALUE._col7 (type: int), VALUE._col8 (type: bigint), VALUE._col9 (type: boolean), VALUE._col10 (type: boolean), KEY.__time_granularity (type: timestamp), KEY.__druid_extra_partition_key (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, __time_granularity, __druid_extra_partition_key - Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9174 Data size: 1942340 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9174 Data size: 1942340 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.druid.io.DruidQueryBasedInputFormat output format: org.apache.hadoop.hive.druid.io.DruidOutputFormat @@ -505,23 +505,23 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: ctimestamp1 is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2477090 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ctimestamp1 is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9173 Data size: 1849230 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: CAST( ctimestamp1 AS timestamp with local time zone) (type: timestamp with local time zone), cstring1 (type: string), cstring2 (type: string), cdouble (type: double), cfloat (type: float), ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9173 Data size: 1942190 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: timestamp with local time zone), _col1 (type: string), _col2 (type: string), _col3 (type: double), _col4 (type: float), _col5 (type: tinyint), _col6 (type: smallint), _col7 (type: int), _col8 (type: bigint), _col9 (type: boolean), _col10 (type: boolean), floor_hour(CAST( GenericUDFEpochMilli(_col0) AS TIMESTAMP)) (type: timestamp), (floor((1.0D / rand())) % 6) (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, __time_granularity, __druid_extra_partition_key - Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9173 Data size: 1942190 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: __time_granularity (type: timestamp), __druid_extra_partition_key (type: bigint) sort order: ++ Map-reduce partition columns: __time_granularity (type: timestamp), __druid_extra_partition_key (type: bigint) - Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9173 Data size: 1942190 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: timestamp with local time zone), _col1 (type: string), _col2 (type: string), _col3 (type: double), _col4 (type: float), _col5 (type: tinyint), _col6 (type: smallint), _col7 (type: int), _col8 (type: bigint), _col9 (type: boolean), _col10 (type: boolean) Execution mode: llap LLAP IO: all inputs @@ -531,11 +531,11 @@ STAGE PLANS: Select Operator expressions: VALUE._col0 (type: timestamp with local time zone), VALUE._col1 (type: string), VALUE._col2 (type: string), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: tinyint), VALUE._col6 (type: smallint), VALUE._col7 (type: int), VALUE._col8 (type: bigint), VALUE._col9 (type: boolean), VALUE._col10 (type: boolean), KEY.__time_granularity (type: timestamp), KEY.__druid_extra_partition_key (type: bigint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, __time_granularity, __druid_extra_partition_key - Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9173 Data size: 1942190 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false Dp Sort State: PARTITION_SORTED - Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9173 Data size: 1942190 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.hive.druid.io.DruidQueryBasedInputFormat output format: org.apache.hadoop.hive.druid.io.DruidOutputFormat diff --git a/ql/src/test/results/clientpositive/foldts.q.out b/ql/src/test/results/clientpositive/foldts.q.out index d759de86f8..feda88c156 100644 --- a/ql/src/test/results/clientpositive/foldts.q.out +++ b/ql/src/test/results/clientpositive/foldts.q.out @@ -18,11 +18,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 491520 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 366960 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctimestamp1 (type: timestamp), to_unix_timestamp(ctimestamp1) (type: bigint), to_unix_timestamp(ctimestamp1) (type: bigint) outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 12288 Data size: 688128 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 563568 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Statistics: Num rows: 1 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE @@ -80,7 +80,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 491520 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 366960 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: from_unixtime(to_unix_timestamp(ctimestamp1), 'EEEE') (type: string) outputColumnNames: _col0 @@ -132,7 +132,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 491520 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 366960 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: from_unixtime(to_unix_timestamp(ctimestamp1), 'EEEE') (type: string) outputColumnNames: _col0 diff --git a/ql/src/test/results/clientpositive/interval_arithmetic.q.out b/ql/src/test/results/clientpositive/interval_arithmetic.q.out index 9d9aef4286..3b5db9b1ed 100644 --- a/ql/src/test/results/clientpositive/interval_arithmetic.q.out +++ b/ql/src/test/results/clientpositive/interval_arithmetic.q.out @@ -222,11 +222,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: interval_arithmetic_1_n0 - Statistics: Num rows: 12288 Data size: 491520 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 366960 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: tsval (type: timestamp), (tsval - INTERVAL'2-2') (type: timestamp), (tsval - INTERVAL'-2-2') (type: timestamp), (tsval + INTERVAL'2-2') (type: timestamp), (tsval + INTERVAL'-2-2') (type: timestamp), (INTERVAL'-2-2' + tsval) (type: timestamp), (INTERVAL'2-2' + tsval) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 12288 Data size: 3440640 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 3316080 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 2 Statistics: Num rows: 2 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE @@ -452,11 +452,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: interval_arithmetic_1_n0 - Statistics: Num rows: 12288 Data size: 1005264 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 880704 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: dateval (type: date), tsval (type: timestamp), (dateval - tsval) (type: interval_day_time), (tsval - dateval) (type: interval_day_time), (tsval - tsval) (type: interval_day_time) outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 12288 Data size: 1447632 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 1323072 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 2 Statistics: Num rows: 2 Data size: 264 Basic stats: COMPLETE Column stats: COMPLETE @@ -537,11 +537,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: interval_arithmetic_1_n0 - Statistics: Num rows: 12288 Data size: 491520 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 366960 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: tsval (type: timestamp), (tsval - INTERVAL'99 11:22:33.123456789') (type: timestamp), (tsval - INTERVAL'-99 11:22:33.123456789') (type: timestamp), (tsval + INTERVAL'99 11:22:33.123456789') (type: timestamp), (tsval + INTERVAL'-99 11:22:33.123456789') (type: timestamp), (INTERVAL'-99 11:22:33.123456789' + tsval) (type: timestamp), (INTERVAL'99 11:22:33.123456789' + tsval) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 12288 Data size: 3440640 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 3316080 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 2 Statistics: Num rows: 2 Data size: 560 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/llap/explainuser_4.q.out b/ql/src/test/results/clientpositive/llap/explainuser_4.q.out index e91926626f..258e5f3b1a 100644 --- a/ql/src/test/results/clientpositive/llap/explainuser_4.q.out +++ b/ql/src/test/results/clientpositive/llap/explainuser_4.q.out @@ -30,29 +30,29 @@ Stage-0 Stage-1 Reducer 3 vectorized, llap File Output Operator [FS_35] - Select Operator [SEL_34] (rows=4626 width=552) + Select Operator [SEL_34] (rows=4626 width=528) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"] <-Reducer 2 [SIMPLE_EDGE] llap SHUFFLE [RS_10] - Merge Join Operator [MERGEJOIN_27] (rows=4626 width=552) + Merge Join Operator [MERGEJOIN_27] (rows=4626 width=528) Conds:RS_30._col2=RS_33._col2(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"] <-Map 1 [SIMPLE_EDGE] vectorized, llap SHUFFLE [RS_30] PartitionCols:_col2 - Select Operator [SEL_29] (rows=3078 width=251) + Select Operator [SEL_29] (rows=3078 width=231) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - Filter Operator [FIL_28] (rows=3078 width=251) + Filter Operator [FIL_28] (rows=3078 width=231) predicate:cint BETWEEN 1000000 AND 3000000 - TableScan [TS_0] (rows=12288 width=251) + TableScan [TS_0] (rows=12288 width=231) default@alltypesorc,a,Tbl:COMPLETE,Col:COMPLETE,Output:["ctinyint","csmallint","cint","cbigint","cfloat","cdouble","cstring1","cstring2","ctimestamp1","ctimestamp2","cboolean1","cboolean2"] <-Map 4 [SIMPLE_EDGE] vectorized, llap SHUFFLE [RS_33] PartitionCols:_col2 - Select Operator [SEL_32] (rows=2298 width=251) + Select Operator [SEL_32] (rows=2298 width=231) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - Filter Operator [FIL_31] (rows=2298 width=251) + Filter Operator [FIL_31] (rows=2298 width=231) predicate:(cint BETWEEN 1000000 AND 3000000 and cbigint is not null) - TableScan [TS_3] (rows=12288 width=251) + TableScan [TS_3] (rows=12288 width=231) default@alltypesorc,b,Tbl:COMPLETE,Col:COMPLETE,Output:["ctinyint","csmallint","cint","cbigint","cfloat","cdouble","cstring1","cstring2","ctimestamp1","ctimestamp2","cboolean1","cboolean2"] PREHOOK: query: select diff --git a/ql/src/test/results/clientpositive/llap/orc_llap.q.out b/ql/src/test/results/clientpositive/llap/orc_llap.q.out index 0ad9682eb8..6a0d85550a 100644 --- a/ql/src/test/results/clientpositive/llap/orc_llap.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_llap.q.out @@ -344,14 +344,14 @@ STAGE PLANS: TableScan alias: orc_llap filterExpr: ((cint > 10) and cbigint is not null) (type: boolean) - Statistics: Num rows: 122880 Data size: 30929630 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 122880 Data size: 28438110 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((cint > 10) and cbigint is not null) (type: boolean) - Statistics: Num rows: 45873 Data size: 11546630 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 45873 Data size: 10616550 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(ctinyint,csmallint,cint,cbigint,cfloat,cdouble,cstring1,cstring2,ctimestamp1,ctimestamp2,cboolean1,cboolean2) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 45873 Data size: 11546630 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 45873 Data size: 10616550 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0) minReductionHashAggr: 0.99 @@ -828,14 +828,14 @@ STAGE PLANS: TableScan alias: orc_llap filterExpr: ((cint > 10) and cbigint is not null) (type: boolean) - Statistics: Num rows: 245760 Data size: 61859030 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 245760 Data size: 56875910 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((cint > 10) and cbigint is not null) (type: boolean) - Statistics: Num rows: 91747 Data size: 23093340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 91747 Data size: 21233100 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: hash(ctinyint,csmallint,cint,cbigint,cfloat,cdouble,cstring1,cstring2,ctimestamp1,ctimestamp2,cboolean1,cboolean2) (type: int) outputColumnNames: _col0 - Statistics: Num rows: 91747 Data size: 23093340 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 91747 Data size: 21233100 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0) minReductionHashAggr: 0.99 diff --git a/ql/src/test/results/clientpositive/llap/orc_llap_nonvector.q.out b/ql/src/test/results/clientpositive/llap/orc_llap_nonvector.q.out index 472b310b1b..d656c3ca7e 100644 --- a/ql/src/test/results/clientpositive/llap/orc_llap_nonvector.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_llap_nonvector.q.out @@ -88,17 +88,17 @@ STAGE PLANS: Map Operator Tree: TableScan alias: orc_llap_nonvector - Statistics: Num rows: 12288 Data size: 3191474 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2942394 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean), rdm (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 12288 Data size: 3191474 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2942394 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 100 - Statistics: Num rows: 100 Data size: 26280 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 100 Data size: 24360 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 100 Data size: 26280 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 100 Data size: 24360 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/orc_merge5.q.out b/ql/src/test/results/clientpositive/llap/orc_merge5.q.out index 6790cd7fff..4924b8a4d3 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge5.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge5.q.out @@ -74,11 +74,11 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2696 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 2696 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -88,10 +88,10 @@ STAGE PLANS: aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2824 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2824 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -209,11 +209,11 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2696 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 2696 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -223,10 +223,10 @@ STAGE PLANS: aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2824 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2824 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/orc_merge6.q.out b/ql/src/test/results/clientpositive/llap/orc_merge6.q.out index 7021220588..9cab0156a0 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge6.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge6.q.out @@ -81,7 +81,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -94,7 +94,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: int) + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -269,7 +269,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -282,7 +282,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: int) + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/llap/orc_merge7.q.out b/ql/src/test/results/clientpositive/llap/orc_merge7.q.out index 16bcc5a3da..f60bf9d306 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge7.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge7.q.out @@ -69,7 +69,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reduce Output Operator key expressions: _col5 (type: double) sort order: + @@ -88,7 +88,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -304,7 +304,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reduce Output Operator key expressions: _col5 (type: double) sort order: + @@ -323,7 +323,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/llap/orc_merge_incompat1.q.out b/ql/src/test/results/clientpositive/llap/orc_merge_incompat1.q.out index b299d1e172..d918e18492 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge_incompat1.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge_incompat1.q.out @@ -74,11 +74,11 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2696 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 2696 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) Execution mode: llap LLAP IO: all inputs Reducer 2 @@ -88,10 +88,10 @@ STAGE PLANS: aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2824 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2824 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/orc_merge_incompat2.q.out b/ql/src/test/results/clientpositive/llap/orc_merge_incompat2.q.out index 2d2787d5b9..d9701341e2 100644 --- a/ql/src/test/results/clientpositive/llap/orc_merge_incompat2.q.out +++ b/ql/src/test/results/clientpositive/llap/orc_merge_incompat2.q.out @@ -69,7 +69,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) Reduce Output Operator key expressions: _col5 (type: double) sort order: + @@ -88,7 +88,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_2.q.out b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_2.q.out index 7b1de5793a..c339430e0e 100644 --- a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_2.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_2.q.out @@ -47,19 +47,19 @@ STAGE PLANS: TableScan alias: a filterExpr: ((csmallint < 100S) and UDFToInteger(csmallint) is not null) (type: boolean) - Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2844090 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((csmallint < 100S) and UDFToInteger(csmallint) is not null) (type: boolean) - Statistics: Num rows: 4615 Data size: 1161780 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4615 Data size: 1068260 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean), UDFToInteger(csmallint) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 4615 Data size: 1175564 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4615 Data size: 1082044 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col12 (type: int) sort order: + Map-reduce partition columns: _col12 (type: int) - Statistics: Num rows: 4615 Data size: 1175564 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4615 Data size: 1082044 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs @@ -102,12 +102,12 @@ STAGE PLANS: 0 _col12 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 1084 Data size: 86954 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1084 Data size: 314 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col12 (type: int) sort order: + Map-reduce partition columns: _col12 (type: int) - Statistics: Num rows: 1084 Data size: 86954 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1084 Data size: 314 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Reducer 3 Execution mode: llap @@ -119,11 +119,11 @@ STAGE PLANS: 0 _col12 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 1715 Data size: 282560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1715 Data size: 195920 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: smallint), _col0 (type: tinyint), _col2 (type: int) sort order: +++ - Statistics: Num rows: 1715 Data size: 282560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1715 Data size: 195920 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Reducer 4 Execution mode: vectorized, llap @@ -131,10 +131,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 1715 Data size: 282560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1715 Data size: 195920 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1715 Data size: 282560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1715 Data size: 195920 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -252,19 +252,19 @@ STAGE PLANS: TableScan alias: a filterExpr: ((csmallint < 100S) and UDFToInteger(csmallint) is not null) (type: boolean) - Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2844090 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((csmallint < 100S) and UDFToInteger(csmallint) is not null) (type: boolean) - Statistics: Num rows: 4615 Data size: 1161780 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4615 Data size: 1068260 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean), UDFToInteger(csmallint) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 4615 Data size: 1175564 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4615 Data size: 1082044 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col12 (type: int) sort order: + Map-reduce partition columns: _col12 (type: int) - Statistics: Num rows: 4615 Data size: 1175564 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4615 Data size: 1082044 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs @@ -307,12 +307,12 @@ STAGE PLANS: 0 _col12 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 1084 Data size: 86954 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1084 Data size: 314 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col12 (type: int) sort order: + Map-reduce partition columns: _col12 (type: int) - Statistics: Num rows: 1084 Data size: 86954 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1084 Data size: 314 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Reducer 3 Execution mode: llap @@ -324,11 +324,11 @@ STAGE PLANS: 0 _col12 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 1715 Data size: 282560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1715 Data size: 195920 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: smallint), _col0 (type: tinyint), _col2 (type: int) sort order: +++ - Statistics: Num rows: 1715 Data size: 282560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1715 Data size: 195920 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Reducer 4 Execution mode: vectorized, llap @@ -336,10 +336,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 1715 Data size: 282560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1715 Data size: 195920 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1715 Data size: 282560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1715 Data size: 195920 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -457,19 +457,19 @@ STAGE PLANS: TableScan alias: a filterExpr: ((csmallint < 100S) and UDFToInteger(csmallint) is not null) (type: boolean) - Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2844090 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((csmallint < 100S) and UDFToInteger(csmallint) is not null) (type: boolean) - Statistics: Num rows: 4615 Data size: 1161780 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4615 Data size: 1068260 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean), UDFToInteger(csmallint) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 4615 Data size: 1175564 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4615 Data size: 1082044 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col12 (type: int) sort order: + Map-reduce partition columns: _col12 (type: int) - Statistics: Num rows: 4615 Data size: 1175564 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4615 Data size: 1082044 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs @@ -512,12 +512,12 @@ STAGE PLANS: 0 _col12 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 1084 Data size: 86954 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1084 Data size: 314 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col12 (type: int) sort order: + Map-reduce partition columns: _col12 (type: int) - Statistics: Num rows: 1084 Data size: 86954 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1084 Data size: 314 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Reducer 3 Execution mode: llap @@ -529,11 +529,11 @@ STAGE PLANS: 0 _col12 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 1715 Data size: 282560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1715 Data size: 195920 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: smallint), _col0 (type: tinyint), _col2 (type: int) sort order: +++ - Statistics: Num rows: 1715 Data size: 282560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1715 Data size: 195920 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Reducer 4 Execution mode: vectorized, llap @@ -541,10 +541,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 1715 Data size: 282560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1715 Data size: 195920 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1715 Data size: 282560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1715 Data size: 195920 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_3.q.out b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_3.q.out index 4866a42b7d..4fa69f69ff 100644 --- a/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_3.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_dynpart_hashjoin_3.q.out @@ -33,11 +33,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2844090 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean), (cint < 100) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 12288 Data size: 3142322 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2893242 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Statistics: Num rows: 1 Data size: 314 Basic stats: COMPLETE Column stats: COMPLETE @@ -149,11 +149,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2844090 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean), (cint < 100) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 12288 Data size: 3142322 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2893242 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Statistics: Num rows: 1 Data size: 314 Basic stats: COMPLETE Column stats: COMPLETE @@ -258,11 +258,11 @@ STAGE PLANS: Map Operator Tree: TableScan alias: a - Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2844090 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2844090 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 1 Statistics: Num rows: 1 Data size: 310 Basic stats: COMPLETE Column stats: COMPLETE diff --git a/ql/src/test/results/clientpositive/llap/tez_vector_dynpart_hashjoin_2.q.out b/ql/src/test/results/clientpositive/llap/tez_vector_dynpart_hashjoin_2.q.out index 7b1de5793a..c339430e0e 100644 --- a/ql/src/test/results/clientpositive/llap/tez_vector_dynpart_hashjoin_2.q.out +++ b/ql/src/test/results/clientpositive/llap/tez_vector_dynpart_hashjoin_2.q.out @@ -47,19 +47,19 @@ STAGE PLANS: TableScan alias: a filterExpr: ((csmallint < 100S) and UDFToInteger(csmallint) is not null) (type: boolean) - Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2844090 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((csmallint < 100S) and UDFToInteger(csmallint) is not null) (type: boolean) - Statistics: Num rows: 4615 Data size: 1161780 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4615 Data size: 1068260 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean), UDFToInteger(csmallint) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 4615 Data size: 1175564 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4615 Data size: 1082044 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col12 (type: int) sort order: + Map-reduce partition columns: _col12 (type: int) - Statistics: Num rows: 4615 Data size: 1175564 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4615 Data size: 1082044 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs @@ -102,12 +102,12 @@ STAGE PLANS: 0 _col12 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 1084 Data size: 86954 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1084 Data size: 314 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col12 (type: int) sort order: + Map-reduce partition columns: _col12 (type: int) - Statistics: Num rows: 1084 Data size: 86954 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1084 Data size: 314 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Reducer 3 Execution mode: llap @@ -119,11 +119,11 @@ STAGE PLANS: 0 _col12 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 1715 Data size: 282560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1715 Data size: 195920 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: smallint), _col0 (type: tinyint), _col2 (type: int) sort order: +++ - Statistics: Num rows: 1715 Data size: 282560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1715 Data size: 195920 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Reducer 4 Execution mode: vectorized, llap @@ -131,10 +131,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 1715 Data size: 282560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1715 Data size: 195920 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1715 Data size: 282560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1715 Data size: 195920 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -252,19 +252,19 @@ STAGE PLANS: TableScan alias: a filterExpr: ((csmallint < 100S) and UDFToInteger(csmallint) is not null) (type: boolean) - Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2844090 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((csmallint < 100S) and UDFToInteger(csmallint) is not null) (type: boolean) - Statistics: Num rows: 4615 Data size: 1161780 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4615 Data size: 1068260 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean), UDFToInteger(csmallint) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 4615 Data size: 1175564 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4615 Data size: 1082044 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col12 (type: int) sort order: + Map-reduce partition columns: _col12 (type: int) - Statistics: Num rows: 4615 Data size: 1175564 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4615 Data size: 1082044 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs @@ -307,12 +307,12 @@ STAGE PLANS: 0 _col12 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 1084 Data size: 86954 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1084 Data size: 314 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col12 (type: int) sort order: + Map-reduce partition columns: _col12 (type: int) - Statistics: Num rows: 1084 Data size: 86954 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1084 Data size: 314 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Reducer 3 Execution mode: llap @@ -324,11 +324,11 @@ STAGE PLANS: 0 _col12 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 1715 Data size: 282560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1715 Data size: 195920 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: smallint), _col0 (type: tinyint), _col2 (type: int) sort order: +++ - Statistics: Num rows: 1715 Data size: 282560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1715 Data size: 195920 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Reducer 4 Execution mode: vectorized, llap @@ -336,10 +336,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 1715 Data size: 282560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1715 Data size: 195920 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1715 Data size: 282560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1715 Data size: 195920 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -457,19 +457,19 @@ STAGE PLANS: TableScan alias: a filterExpr: ((csmallint < 100S) and UDFToInteger(csmallint) is not null) (type: boolean) - Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2844090 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((csmallint < 100S) and UDFToInteger(csmallint) is not null) (type: boolean) - Statistics: Num rows: 4615 Data size: 1161780 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4615 Data size: 1068260 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean), UDFToInteger(csmallint) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 4615 Data size: 1175564 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4615 Data size: 1082044 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col12 (type: int) sort order: + Map-reduce partition columns: _col12 (type: int) - Statistics: Num rows: 4615 Data size: 1175564 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4615 Data size: 1082044 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs @@ -512,12 +512,12 @@ STAGE PLANS: 0 _col12 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 1084 Data size: 86954 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1084 Data size: 314 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col12 (type: int) sort order: + Map-reduce partition columns: _col12 (type: int) - Statistics: Num rows: 1084 Data size: 86954 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1084 Data size: 314 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col2 (type: int), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Reducer 3 Execution mode: llap @@ -529,11 +529,11 @@ STAGE PLANS: 0 _col12 (type: int) 1 _col0 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 1715 Data size: 282560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1715 Data size: 195920 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col1 (type: smallint), _col0 (type: tinyint), _col2 (type: int) sort order: +++ - Statistics: Num rows: 1715 Data size: 282560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1715 Data size: 195920 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Reducer 4 Execution mode: vectorized, llap @@ -541,10 +541,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey0 (type: smallint), KEY.reducesinkkey2 (type: int), VALUE._col0 (type: bigint), VALUE._col1 (type: float), VALUE._col2 (type: double), VALUE._col3 (type: string), VALUE._col4 (type: string), VALUE._col5 (type: timestamp), VALUE._col6 (type: timestamp), VALUE._col7 (type: boolean), VALUE._col8 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 1715 Data size: 282560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1715 Data size: 195920 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1715 Data size: 282560 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1715 Data size: 195920 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out b/ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out index 7ba6b4bfb8..ef516b5d0a 100644 --- a/ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_aggregate_9.q.out @@ -419,7 +419,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: vectortab2korc_n4 - Statistics: Num rows: 2000 Data size: 80000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 75760 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:tinyint, 1:si:smallint, 2:i:int, 3:b:bigint, 4:f:float, 5:d:double, 6:dc:decimal(38,18), 7:bo:boolean, 8:s:string, 9:s2:string, 10:ts:timestamp, 11:ts2:timestamp, 12:dt:date, 13:ROW__ID:struct] @@ -430,7 +430,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [10] - Statistics: Num rows: 2000 Data size: 80000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 75760 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(ts), max(ts), sum(ts), count(ts) Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/llap/vector_coalesce.q.out b/ql/src/test/results/clientpositive/llap/vector_coalesce.q.out index 3d70e15c73..9573b3db77 100644 --- a/ql/src/test/results/clientpositive/llap/vector_coalesce.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_coalesce.q.out @@ -347,7 +347,7 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: SelectColumnIsNotNull(col 8:timestamp), SelectColumnIsNotNull(col 9:timestamp)) + predicateExpression: FilterExprOrExpr(children: SelectColumnIsNotNull(col 9:timestamp), SelectColumnIsNotNull(col 8:timestamp)) Select Vectorization: className: VectorSelectOperator native: true diff --git a/ql/src/test/results/clientpositive/llap/vector_complex_join.q.out b/ql/src/test/results/clientpositive/llap/vector_complex_join.q.out index c30bd079fe..bd6cd8cb60 100644 --- a/ql/src/test/results/clientpositive/llap/vector_complex_join.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_complex_join.q.out @@ -51,7 +51,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: cint is not null (type: boolean) - Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2844090 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -60,7 +60,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 2:int) predicate: cint is not null (type: boolean) - Statistics: Num rows: 9173 Data size: 2309110 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9173 Data size: 2123190 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 @@ -68,7 +68,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] - Statistics: Num rows: 9173 Data size: 2309110 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9173 Data size: 2123190 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -83,13 +83,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 input vertices: 1 Map 2 - Statistics: Num rows: 10090 Data size: 2540021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10090 Data size: 2335509 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 10090 Data size: 2540021 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 10090 Data size: 2335509 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_decimal_cast.q.out b/ql/src/test/results/clientpositive/llap/vector_decimal_cast.q.out index f558cfc120..6153137539 100644 --- a/ql/src/test/results/clientpositive/llap/vector_decimal_cast.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_decimal_cast.q.out @@ -23,8 +23,8 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - filterExpr: (cboolean1 is not null and cint is not null and cdouble is not null and ctimestamp1 is not null) (type: boolean) - Statistics: Num rows: 12288 Data size: 638316 Basic stats: COMPLETE Column stats: COMPLETE + filterExpr: (cboolean1 is not null and cint is not null and ctimestamp1 is not null and cdouble is not null) (type: boolean) + Statistics: Num rows: 12288 Data size: 513756 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -32,9 +32,9 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 10:boolean), SelectColumnIsNotNull(col 2:int), SelectColumnIsNotNull(col 5:double), SelectColumnIsNotNull(col 8:timestamp)) - predicate: (cboolean1 is not null and cint is not null and cdouble is not null and ctimestamp1 is not null) (type: boolean) - Statistics: Num rows: 5112 Data size: 265564 Basic stats: COMPLETE Column stats: COMPLETE + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 10:boolean), SelectColumnIsNotNull(col 2:int), SelectColumnIsNotNull(col 8:timestamp), SelectColumnIsNotNull(col 5:double)) + predicate: (cboolean1 is not null and cint is not null and ctimestamp1 is not null and cdouble is not null) (type: boolean) + Statistics: Num rows: 3816 Data size: 159600 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cdouble (type: double), cint (type: int), cboolean1 (type: boolean), ctimestamp1 (type: timestamp), CAST( cdouble AS decimal(20,10)) (type: decimal(20,10)), CAST( cint AS decimal(23,14)) (type: decimal(23,14)), CAST( cboolean1 AS decimal(5,2)) (type: decimal(5,2)), CAST( ctimestamp1 AS decimal(15,0)) (type: decimal(15,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 @@ -43,19 +43,19 @@ STAGE PLANS: native: true projectedOutputColumnNums: [5, 2, 10, 8, 13, 14, 15, 16] selectExpressions: CastDoubleToDecimal(col 5:double) -> 13:decimal(20,10), CastLongToDecimal(col 2:int) -> 14:decimal(23,14), CastLongToDecimal(col 10:boolean) -> 15:decimal(5,2), CastTimestampToDecimal(col 8:timestamp) -> 16:decimal(15,0) - Statistics: Num rows: 5112 Data size: 2410700 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3816 Data size: 1760976 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 10 Data size: 4784 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 4704 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 10 Data size: 4784 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 4704 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -151,8 +151,8 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypes_small - filterExpr: (cboolean1 is not null and cint is not null and cdouble is not null and ctimestamp1 is not null) (type: boolean) - Statistics: Num rows: 12288 Data size: 638316 Basic stats: COMPLETE Column stats: COMPLETE + filterExpr: (cboolean1 is not null and cint is not null and ctimestamp1 is not null and cdouble is not null) (type: boolean) + Statistics: Num rows: 12288 Data size: 513756 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -160,9 +160,9 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 10:boolean), SelectColumnIsNotNull(col 2:int), SelectColumnIsNotNull(col 5:double), SelectColumnIsNotNull(col 8:timestamp)) - predicate: (cboolean1 is not null and cint is not null and cdouble is not null and ctimestamp1 is not null) (type: boolean) - Statistics: Num rows: 5112 Data size: 265564 Basic stats: COMPLETE Column stats: COMPLETE + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 10:boolean), SelectColumnIsNotNull(col 2:int), SelectColumnIsNotNull(col 8:timestamp), SelectColumnIsNotNull(col 5:double)) + predicate: (cboolean1 is not null and cint is not null and ctimestamp1 is not null and cdouble is not null) (type: boolean) + Statistics: Num rows: 3816 Data size: 159600 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cdouble (type: double), cint (type: int), cboolean1 (type: boolean), ctimestamp1 (type: timestamp), CAST( cdouble AS decimal(20,10)) (type: decimal(20,10)), CAST( cint AS decimal(23,14)) (type: decimal(23,14)), CAST( cboolean1 AS decimal(5,2)) (type: decimal(5,2)), CAST( ctimestamp1 AS decimal(15,0)) (type: decimal(15,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 @@ -171,19 +171,19 @@ STAGE PLANS: native: true projectedOutputColumnNums: [5, 2, 10, 8, 13, 14, 15, 16] selectExpressions: CastDoubleToDecimal(col 5:double) -> 13:decimal(20,10), CastLongToDecimal(col 2:int) -> 14:decimal(23,14), CastLongToDecimal(col 10:boolean) -> 15:decimal(5,2), CastTimestampToDecimal(col 8:timestamp) -> 16:decimal(15,0) - Statistics: Num rows: 5112 Data size: 2410700 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3816 Data size: 1760976 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 10 Data size: 4784 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 4704 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 10 Data size: 4784 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 4704 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out b/ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out index 479d13987b..30c867ed45 100644 --- a/ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_interval_mapjoin.q.out @@ -207,7 +207,7 @@ STAGE PLANS: TableScan alias: vectortab_a_1korc filterExpr: (dt is not null and CAST( ts AS DATE) is not null and s is not null) (type: boolean) - Statistics: Num rows: 1000 Data size: 187480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 185480 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -216,7 +216,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 12:date), SelectColumnIsNotNull(col 14:date)(children: CastTimestampToDate(col 10:timestamp) -> 14:date), SelectColumnIsNotNull(col 8:string)) predicate: (dt is not null and CAST( ts AS DATE) is not null and s is not null) (type: boolean) - Statistics: Num rows: 954 Data size: 178852 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 905 Data size: 167854 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: s (type: string), (dt - CAST( ts AS DATE)) (type: interval_day_time) outputColumnNames: _col0, _col1 @@ -225,7 +225,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [8, 16] selectExpressions: DateColSubtractDateColumn(col 12:date, col 15:date)(children: CastTimestampToDate(col 10:timestamp) -> 15:date) -> 16:interval_day_time - Statistics: Num rows: 954 Data size: 101124 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 905 Data size: 95930 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -240,7 +240,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2 input vertices: 1 Map 2 - Statistics: Num rows: 34600 Data size: 6920000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32218 Data size: 6443600 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col2 (type: string), _col1 (type: interval_day_time) outputColumnNames: _col0, _col1, _col2 @@ -248,13 +248,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [8, 8, 16] - Statistics: Num rows: 34600 Data size: 6920000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32218 Data size: 6443600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 34600 Data size: 6920000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32218 Data size: 6443600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -275,7 +275,7 @@ STAGE PLANS: TableScan alias: vectortab_b_1korc filterExpr: (dt is not null and CAST( ts AS DATE) is not null and s is not null) (type: boolean) - Statistics: Num rows: 1000 Data size: 186864 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 184664 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -284,7 +284,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 12:date), SelectColumnIsNotNull(col 14:date)(children: CastTimestampToDate(col 10:timestamp) -> 14:date), SelectColumnIsNotNull(col 8:string)) predicate: (dt is not null and CAST( ts AS DATE) is not null and s is not null) (type: boolean) - Statistics: Num rows: 943 Data size: 176202 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 890 Data size: 164340 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: s (type: string), (dt - CAST( ts AS DATE)) (type: interval_day_time) outputColumnNames: _col0, _col1 @@ -293,7 +293,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [8, 16] selectExpressions: DateColSubtractDateColumn(col 12:date, col 15:date)(children: CastTimestampToDate(col 10:timestamp) -> 15:date) -> 16:interval_day_time - Statistics: Num rows: 943 Data size: 99958 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 890 Data size: 94340 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: interval_day_time) sort order: ++ @@ -302,7 +302,7 @@ STAGE PLANS: className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 943 Data size: 99958 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 890 Data size: 94340 Basic stats: COMPLETE Column stats: COMPLETE Execution mode: vectorized, llap LLAP IO: all inputs Map Vectorization: diff --git a/ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out b/ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out index 97814adcdf..a8e20bbc58 100644 --- a/ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_outer_join1.q.out @@ -252,7 +252,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c - Statistics: Num rows: 15 Data size: 3745 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 15 Data size: 3545 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -263,7 +263,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] - Statistics: Num rows: 15 Data size: 3745 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 15 Data size: 3545 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -284,13 +284,13 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 input vertices: 1 Map 2 - Statistics: Num rows: 33 Data size: 14459 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 33 Data size: 14099 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 33 Data size: 14459 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 33 Data size: 14099 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -317,7 +317,7 @@ STAGE PLANS: TableScan alias: cd filterExpr: cint is not null (type: boolean) - Statistics: Num rows: 15 Data size: 3745 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 15 Data size: 3545 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -327,7 +327,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsNotNull(col 2:int) predicate: cint is not null (type: boolean) - Statistics: Num rows: 10 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 2520 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 @@ -335,7 +335,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] - Statistics: Num rows: 10 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 2520 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: int) sort order: + @@ -346,7 +346,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 0:tinyint, 1:smallint, 3:bigint, 4:float, 5:double, 6:string, 7:string, 8:timestamp, 9:timestamp, 10:boolean, 11:boolean - Statistics: Num rows: 10 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 2520 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col0 (type: tinyint), _col1 (type: smallint), _col3 (type: bigint), _col4 (type: float), _col5 (type: double), _col6 (type: string), _col7 (type: string), _col8 (type: timestamp), _col9 (type: timestamp), _col10 (type: boolean), _col11 (type: boolean) Execution mode: vectorized, llap LLAP IO: all inputs diff --git a/ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out b/ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out index fb3c01269d..a9cf533810 100644 --- a/ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out +++ b/ql/src/test/results/clientpositive/llap/vector_reuse_scratchcols.q.out @@ -97,7 +97,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: (((UDFToFloat(csmallint) < cfloat) and (UDFToDouble(ctimestamp2) > -5.0D) and (cdouble <> UDFToDouble(cint))) or ((CAST( cbigint AS decimal(22,3)) <= -1.389) and (cstring2 <> 'a') and (CAST( cint AS decimal(13,3)) <> 79.553) and (cboolean2 <> cboolean1)) or (cbigint = 762L) or (cstring1 = 'a')) (type: boolean) - Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2477130 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -107,7 +107,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 13:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 1:smallint) -> 13:float), FilterDoubleColGreaterDoubleScalar(col 14:double, val -5.0)(children: CastTimestampToDouble(col 9:timestamp) -> 14:double), FilterDoubleColNotEqualDoubleColumn(col 5:double, col 15:double)(children: CastLongToDouble(col 2:int) -> 15:double)), FilterExprAndExpr(children: FilterDecimalColLessEqualDecimalScalar(col 16:decimal(22,3), val -1.389)(children: CastLongToDecimal(col 3:bigint) -> 16:decimal(22,3)), FilterStringGroupColNotEqualStringScalar(col 7:string, val a), FilterDecimalColNotEqualDecimalScalar(col 17:decimal(13,3), val 79.553)(children: CastLongToDecimal(col 2:int) -> 17:decimal(13,3)), FilterLongColNotEqualLongColumn(col 11:boolean, col 10:boolean)), FilterLongColEqualLongScalar(col 3:bigint, val 762), FilterStringGroupColEqualStringScalar(col 6:string, val a)) predicate: (((UDFToFloat(csmallint) < cfloat) and (UDFToDouble(ctimestamp2) > -5.0D) and (cdouble <> UDFToDouble(cint))) or ((CAST( cbigint AS decimal(22,3)) <= -1.389) and (cstring2 <> 'a') and (CAST( cint AS decimal(13,3)) <> 79.553) and (cboolean2 <> cboolean1)) or (cbigint = 762L) or (cstring1 = 'a')) (type: boolean) - Statistics: Num rows: 5465 Data size: 1157230 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5465 Data size: 1101870 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int), cdouble (type: double), csmallint (type: smallint), cfloat (type: float), ctinyint (type: tinyint), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double), UDFToDouble(csmallint) (type: double), (UDFToDouble(csmallint) * UDFToDouble(csmallint)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -116,7 +116,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [2, 5, 1, 4, 0, 18, 21, 22, 25] selectExpressions: CastLongToDouble(col 2:int) -> 18:double, DoubleColMultiplyDoubleColumn(col 19:double, col 20:double)(children: CastLongToDouble(col 2:int) -> 19:double, CastLongToDouble(col 2:int) -> 20:double) -> 21:double, CastLongToDouble(col 1:smallint) -> 22:double, DoubleColMultiplyDoubleColumn(col 23:double, col 24:double)(children: CastLongToDouble(col 1:smallint) -> 23:double, CastLongToDouble(col 1:smallint) -> 24:double) -> 25:double - Statistics: Num rows: 5465 Data size: 1157230 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5465 Data size: 1101870 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), count(_col0), sum(_col1), sum(_col6), sum(_col5), sum(_col8), sum(_col7), count(_col2), sum(_col3), count(_col3), min(_col4) Group By Vectorization: @@ -309,7 +309,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: (((UDFToFloat(csmallint) < cfloat) and (UDFToDouble(ctimestamp2) > -5.0D) and (cdouble <> UDFToDouble(cint))) or ((CAST( cbigint AS decimal(22,3)) <= -1.389) and (cstring2 <> 'a') and (CAST( cint AS decimal(13,3)) <> 79.553) and (cboolean2 <> cboolean1)) or (cbigint = 762L) or (cstring1 = 'a')) (type: boolean) - Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2477130 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -319,7 +319,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 13:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 1:smallint) -> 13:float), FilterDoubleColGreaterDoubleScalar(col 14:double, val -5.0)(children: CastTimestampToDouble(col 9:timestamp) -> 14:double), FilterDoubleColNotEqualDoubleColumn(col 5:double, col 15:double)(children: CastLongToDouble(col 2:int) -> 15:double)), FilterExprAndExpr(children: FilterDecimalColLessEqualDecimalScalar(col 16:decimal(22,3), val -1.389)(children: CastLongToDecimal(col 3:bigint) -> 16:decimal(22,3)), FilterStringGroupColNotEqualStringScalar(col 7:string, val a), FilterDecimalColNotEqualDecimalScalar(col 17:decimal(13,3), val 79.553)(children: CastLongToDecimal(col 2:int) -> 17:decimal(13,3)), FilterLongColNotEqualLongColumn(col 11:boolean, col 10:boolean)), FilterLongColEqualLongScalar(col 3:bigint, val 762), FilterStringGroupColEqualStringScalar(col 6:string, val a)) predicate: (((UDFToFloat(csmallint) < cfloat) and (UDFToDouble(ctimestamp2) > -5.0D) and (cdouble <> UDFToDouble(cint))) or ((CAST( cbigint AS decimal(22,3)) <= -1.389) and (cstring2 <> 'a') and (CAST( cint AS decimal(13,3)) <> 79.553) and (cboolean2 <> cboolean1)) or (cbigint = 762L) or (cstring1 = 'a')) (type: boolean) - Statistics: Num rows: 5465 Data size: 1157230 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5465 Data size: 1101870 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int), cdouble (type: double), csmallint (type: smallint), cfloat (type: float), ctinyint (type: tinyint), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double), UDFToDouble(csmallint) (type: double), (UDFToDouble(csmallint) * UDFToDouble(csmallint)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -328,7 +328,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [2, 5, 1, 4, 0, 18, 21, 22, 25] selectExpressions: CastLongToDouble(col 2:int) -> 18:double, DoubleColMultiplyDoubleColumn(col 19:double, col 20:double)(children: CastLongToDouble(col 2:int) -> 19:double, CastLongToDouble(col 2:int) -> 20:double) -> 21:double, CastLongToDouble(col 1:smallint) -> 22:double, DoubleColMultiplyDoubleColumn(col 23:double, col 24:double)(children: CastLongToDouble(col 1:smallint) -> 23:double, CastLongToDouble(col 1:smallint) -> 24:double) -> 25:double - Statistics: Num rows: 5465 Data size: 1157230 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5465 Data size: 1101870 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), count(_col0), sum(_col1), sum(_col6), sum(_col5), sum(_col8), sum(_col7), count(_col2), sum(_col3), count(_col3), min(_col4) Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/llap/vectorization_0.q.out b/ql/src/test/results/clientpositive/llap/vectorization_0.q.out index 56faf2c68d..824d190dc0 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_0.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_0.q.out @@ -30102,7 +30102,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: (((cint = 49) and (cfloat = 3.5)) or ((cint = 47) and (cfloat = 2.09)) or ((cint = 45) and (cfloat = 3.02))) (type: boolean) - Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2844090 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false @@ -30227,7 +30227,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: (struct(cint,cfloat)) IN (const struct(49,3.5), const struct(47,2.09), const struct(45,3.02)) (type: boolean) - Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2844090 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false @@ -30352,7 +30352,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: (((cint = 49) or (cfloat = 3.5)) and ((cint = 47) or (cfloat = 2.09)) and ((cint = 45) or (cfloat = 3.02))) (type: boolean) - Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2844090 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false diff --git a/ql/src/test/results/clientpositive/llap/vectorization_10.q.out b/ql/src/test/results/clientpositive/llap/vectorization_10.q.out index 67879495e5..761e1f5228 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_10.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_10.q.out @@ -68,7 +68,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: ((cstring2 <= '10') or ((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or ((cdouble > 6981.0D) and ((CAST( csmallint AS decimal(11,4)) = 9763215.5639) or (cstring1 like '%a')))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2491562 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2367002 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -78,7 +78,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterStringGroupColLessEqualStringScalar(col 7:string, val 10), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterDecimalColLessEqualDecimalScalar(col 14:decimal(6,2), val -5638.15)(children: CastLongToDecimal(col 0:tinyint) -> 14:decimal(6,2))), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 6981.0), FilterExprOrExpr(children: FilterDecimalColEqualDecimalScalar(col 15:decimal(11,4), val 9763215.5639)(children: CastLongToDecimal(col 1:smallint) -> 15:decimal(11,4)), FilterStringColLikeStringScalar(col 6:string, pattern %a)))) predicate: ((cstring2 <= '10') or ((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or ((cdouble > 6981.0D) and ((CAST( csmallint AS decimal(11,4)) = 9763215.5639) or (cstring1 like '%a')))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2491562 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2367002 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cdouble (type: double), ctimestamp1 (type: timestamp), ctinyint (type: tinyint), cboolean1 (type: boolean), cstring1 (type: string), (- cdouble) (type: double), (cdouble + UDFToDouble(csmallint)) (type: double), ((cdouble + UDFToDouble(csmallint)) % 33.0D) (type: double), (- cdouble) (type: double), (UDFToDouble(ctinyint) % cdouble) (type: double), (UDFToShort(ctinyint) % csmallint) (type: smallint), (- cdouble) (type: double), (cbigint * UDFToLong((UDFToShort(ctinyint) % csmallint))) (type: bigint), (9763215.5639D - (cdouble + UDFToDouble(csmallint))) (type: double), (- (- cdouble)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -87,13 +87,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [5, 8, 0, 10, 6, 16, 18, 21, 22, 24, 25, 26, 28, 31, 33] selectExpressions: DoubleColUnaryMinus(col 5:double) -> 16:double, DoubleColAddDoubleColumn(col 5:double, col 17:double)(children: CastLongToDouble(col 1:smallint) -> 17:double) -> 18:double, DoubleColModuloDoubleScalar(col 20:double, val 33.0)(children: DoubleColAddDoubleColumn(col 5:double, col 19:double)(children: CastLongToDouble(col 1:smallint) -> 19:double) -> 20:double) -> 21:double, DoubleColUnaryMinus(col 5:double) -> 22:double, DoubleColModuloDoubleColumn(col 23:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 23:double) -> 24:double, LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 25:smallint, DoubleColUnaryMinus(col 5:double) -> 26:double, LongColMultiplyLongColumn(col 3:bigint, col 27:bigint)(children: LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 27:smallint) -> 28:bigint, DoubleScalarSubtractDoubleColumn(val 9763215.5639, col 30:double)(children: DoubleColAddDoubleColumn(col 5:double, col 29:double)(children: CastLongToDouble(col 1:smallint) -> 29:double) -> 30:double) -> 31:double, DoubleColUnaryMinus(col 32:double)(children: DoubleColUnaryMinus(col 5:double) -> 32:double) -> 33:double - Statistics: Num rows: 12288 Data size: 2434654 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2310094 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 12288 Data size: 2434654 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2310094 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorization_11.q.out b/ql/src/test/results/clientpositive/llap/vectorization_11.q.out index ff03d60da4..b50cab1c16 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_11.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_11.q.out @@ -50,7 +50,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: ((ctimestamp1 is null and (cstring1 like '%a')) or (cstring2 = cstring1)) (type: boolean) - Statistics: Num rows: 12288 Data size: 2381474 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2256914 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -60,7 +60,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: SelectColumnIsNull(col 8:timestamp), FilterStringColLikeStringScalar(col 6:string, pattern %a)), FilterStringGroupColEqualStringGroupColumn(col 7:string, col 6:string)) predicate: ((ctimestamp1 is null and (cstring1 like '%a')) or (cstring2 = cstring1)) (type: boolean) - Statistics: Num rows: 6144 Data size: 1190792 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7701 Data size: 1414500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), ctimestamp1 (type: timestamp), (-3728 * UDFToInteger(csmallint)) (type: int), (cdouble - 9763215.5639D) (type: double), (- cdouble) (type: double), ((- cdouble) + 6981.0D) (type: double), (cdouble * -5638.15D) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -69,13 +69,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [6, 10, 5, 8, 13, 14, 15, 17, 18] selectExpressions: LongScalarMultiplyLongColumn(val -3728, col 1:int)(children: col 1:smallint) -> 13:int, DoubleColSubtractDoubleScalar(col 5:double, val 9763215.5639) -> 14:double, DoubleColUnaryMinus(col 5:double) -> 15:double, DoubleColAddDoubleScalar(col 16:double, val 6981.0)(children: DoubleColUnaryMinus(col 5:double) -> 16:double) -> 17:double, DoubleColMultiplyDoubleScalar(col 5:double, val -5638.15) -> 18:double - Statistics: Num rows: 6144 Data size: 953272 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7701 Data size: 1116736 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6144 Data size: 953272 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7701 Data size: 1116736 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorization_12.q.out b/ql/src/test/results/clientpositive/llap/vectorization_12.q.out index 620bc71291..c7ba1d7b39 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_12.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_12.q.out @@ -88,7 +88,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: (((cstring1 like '%a') or ((cboolean2 <= 1) and (cbigint >= UDFToLong(csmallint)))) and ((cboolean1 >= cboolean2) or (UDFToShort(ctinyint) <> csmallint)) and ctimestamp1 is null) (type: boolean) - Statistics: Num rows: 12288 Data size: 1647554 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 1522994 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -98,7 +98,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 6:string, pattern %a), FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 11:boolean, val 1), FilterLongColGreaterEqualLongColumn(col 3:bigint, col 1:bigint)(children: col 1:smallint))), FilterExprOrExpr(children: FilterLongColGreaterEqualLongColumn(col 10:boolean, col 11:boolean), FilterLongColNotEqualLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint)), SelectColumnIsNull(col 8:timestamp)) predicate: (((cstring1 like '%a') or ((cboolean2 <= 1) and (cbigint >= UDFToLong(csmallint)))) and ((cboolean1 >= cboolean2) or (UDFToShort(ctinyint) <> csmallint)) and ctimestamp1 is null) (type: boolean) - Statistics: Num rows: 1 Data size: 166 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1903 Data size: 236052 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cbigint (type: bigint), cboolean1 (type: boolean), cstring1 (type: string), cdouble (type: double), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double), (cdouble * cdouble) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -107,7 +107,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3, 10, 6, 5, 13, 16, 17] selectExpressions: CastLongToDouble(col 3:bigint) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 14:double, CastLongToDouble(col 3:bigint) -> 15:double) -> 16:double, DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 17:double - Statistics: Num rows: 1 Data size: 166 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1903 Data size: 236052 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col0), sum(_col5), sum(_col4), sum(_col3), count(_col3), sum(_col0), sum(_col6) Group By Vectorization: @@ -122,7 +122,7 @@ STAGE PLANS: minReductionHashAggr: 0.0 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1903 Data size: 268676 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean) sort order: ++++ @@ -133,7 +133,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 4:bigint, 5:double, 6:double, 7:double, 8:bigint, 9:bigint, 10:double - Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1903 Data size: 268676 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -181,7 +181,7 @@ STAGE PLANS: keys: KEY._col0 (type: double), KEY._col1 (type: bigint), KEY._col2 (type: string), KEY._col3 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1903 Data size: 268676 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: bigint), _col3 (type: boolean), _col2 (type: string), _col0 (type: double), (-6432.0D * _col0) (type: double), (- _col1) (type: bigint), _col4 (type: bigint), (_col1 * _col4) (type: bigint), power(((_col5 - ((_col6 * _col6) / _col4)) / CASE WHEN ((_col4 = 1L)) THEN (null) ELSE ((_col4 - 1)) END), 0.5) (type: double), ((-6432.0D * _col0) / -6432.0D) (type: double), (- ((-6432.0D * _col0) / -6432.0D)) (type: double), (_col7 / _col8) (type: double), (- (-6432.0D * _col0)) (type: double), (-5638.15 + CAST( _col1 AS decimal(19,0))) (type: decimal(22,2)), _col9 (type: bigint), ((_col7 / _col8) / (-6432.0D * _col0)) (type: double), (- (- ((-6432.0D * _col0) / -6432.0D))) (type: double), (((-6432.0D * _col0) / -6432.0D) + (- (-6432.0D * _col0))) (type: double), power(((_col10 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col17, _col18, _col19 @@ -190,7 +190,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 3, 2, 0, 11, 12, 4, 13, 21, 23, 26, 27, 29, 31, 9, 34, 38, 43, 48] selectExpressions: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 11:double, LongColUnaryMinus(col 1:bigint) -> 12:bigint, LongColMultiplyLongColumn(col 1:bigint, col 4:bigint) -> 13:bigint, FuncPowerDoubleToDouble(col 20:double)(children: DoubleColDivideLongColumn(col 16:double, col 19:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 15:double)(children: DoubleColDivideLongColumn(col 14:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 14:double) -> 15:double) -> 16:double, IfExprNullCondExpr(col 17:boolean, null, col 18:bigint)(children: LongColEqualLongScalar(col 4:bigint, val 1) -> 17:boolean, LongColSubtractLongScalar(col 4:bigint, val 1) -> 18:bigint) -> 19:bigint) -> 20:double) -> 21:double, DoubleColDivideDoubleScalar(col 22:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 22:double) -> 23:double, DoubleColUnaryMinus(col 25:double)(children: DoubleColDivideDoubleScalar(col 24:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 24:double) -> 25:double) -> 26:double, DoubleColDivideLongColumn(col 7:double, col 8:bigint) -> 27:double, DoubleColUnaryMinus(col 28:double)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 28:double) -> 29:double, DecimalScalarAddDecimalColumn(val -5638.15, col 30:decimal(19,0))(children: CastLongToDecimal(col 1:bigint) -> 30:decimal(19,0)) -> 31:decimal(22,2), DoubleColDivideDoubleColumn(col 32:double, col 33:double)(children: DoubleColDivideLongColumn(col 7:double, col 8:bigint) -> 32:double, DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 33:double) -> 34:double, DoubleColUnaryMinus(col 37:double)(children: DoubleColUnaryMinus(col 36:double)(children: DoubleColDivideDoubleScalar(col 35:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 35:double) -> 36:double) -> 37:double) -> 38:double, DoubleColAddDoubleColumn(col 40:double, col 42:double)(children: DoubleColDivideDoubleScalar(col 39:double, val -6432.0)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 39:double) -> 40:double, DoubleColUnaryMinus(col 41:double)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 0:double) -> 41:double) -> 42:double) -> 43:double, FuncPowerDoubleToDouble(col 47:double)(children: DoubleColDivideLongColumn(col 46:double, col 8:bigint)(children: DoubleColSubtractDoubleColumn(col 10:double, col 45:double)(children: DoubleColDivideLongColumn(col 44:double, col 8:bigint)(children: DoubleColMultiplyDoubleColumn(col 7:double, col 7:double) -> 44:double) -> 45:double) -> 46:double) -> 47:double) -> 48:double - Statistics: Num rows: 1 Data size: 346 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1903 Data size: 603604 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col3 (type: double), _col0 (type: bigint), _col2 (type: string) sort order: +++ @@ -200,7 +200,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 3:boolean, 11:double, 12:bigint, 4:bigint, 13:bigint, 21:double, 23:double, 26:double, 27:double, 29:double, 31:decimal(22,2), 9:bigint, 34:double, 38:double, 43:double, 48:double - Statistics: Num rows: 1 Data size: 346 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1903 Data size: 603604 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean), _col4 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: decimal(22,2)), _col14 (type: bigint), _col15 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double) Reducer 3 Execution mode: vectorized, llap @@ -226,13 +226,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 3, 2, 19, 0, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 11, 16, 17, 18] selectExpressions: ConstantVectorExpression(val null) -> 19:timestamp - Statistics: Num rows: 1 Data size: 386 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1903 Data size: 603644 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1 Data size: 386 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1903 Data size: 603644 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorization_13.q.out b/ql/src/test/results/clientpositive/llap/vectorization_13.q.out index e1d303204e..6af2b75ad5 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_13.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_13.q.out @@ -90,7 +90,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: (((cfloat < 3569.0) and (cdouble <= 10.175D) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > -28789.0D) and (UDFToDouble(ctimestamp2) <> -28788.0D) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2028982 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 1779902 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -100,7 +100,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 4:float, val 3569.0), FilterDoubleColLessEqualDoubleScalar(col 5:double, val 10.175), FilterLongColNotEqualLongScalar(col 10:boolean, val 1)), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 13:double, val -28789.0)(children: CastTimestampToDouble(col 8:timestamp) -> 13:double), FilterDoubleColNotEqualDoubleScalar(col 14:double, val -28788.0)(children: CastTimestampToDouble(col 9:timestamp) -> 14:double), FilterDecimalColLessDecimalScalar(col 15:decimal(11,4), val 9763215.5639)(children: CastLongToDecimal(col 0:tinyint) -> 15:decimal(11,4)))) predicate: (((cfloat < 3569.0) and (cdouble <= 10.175D) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > -28789.0D) and (UDFToDouble(ctimestamp2) <> -28788.0D) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean) - Statistics: Num rows: 1386 Data size: 228984 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1386 Data size: 200984 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -109,7 +109,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [10, 0, 8, 4, 6, 4, 16, 17, 20] selectExpressions: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 16:double, CastLongToDouble(col 0:tinyint) -> 17:double, DoubleColMultiplyDoubleColumn(col 18:double, col 19:double)(children: CastLongToDouble(col 0:tinyint) -> 18:double, CastLongToDouble(col 0:tinyint) -> 19:double) -> 20:double - Statistics: Num rows: 1386 Data size: 228984 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1386 Data size: 200984 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(_col1), sum(_col3), sum(_col6), sum(_col5), count(_col3), sum(_col8), sum(_col7), count(_col1), max(_col3), min(_col1) Group By Vectorization: @@ -121,10 +121,10 @@ STAGE PLANS: vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.0 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 693 Data size: 129752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 693 Data size: 122752 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) sort order: +++++ @@ -135,7 +135,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 5:tinyint, 6:double, 7:double, 8:double, 9:bigint, 10:double, 11:double, 12:bigint, 13:float, 14:tinyint - Statistics: Num rows: 693 Data size: 129752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 693 Data size: 122752 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: float), _col14 (type: tinyint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -183,7 +183,7 @@ STAGE PLANS: keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 346 Data size: 64822 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 693 Data size: 122752 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), power(((_col7 - ((_col8 * _col8) / _col9)) / _col9), 0.5) (type: double), (- _col6) (type: double), power(((_col10 - ((_col11 * _col11) / _col12)) / _col12), 0.5) (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28D / (- (- _col6))) (type: double), _col13 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 @@ -192,7 +192,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 15, 5, 17, 6, 21, 22, 23, 28, 29, 34, 38, 40, 43, 13, 49, 14] selectExpressions: LongColUnaryMinus(col 1:tinyint) -> 15:tinyint, LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 17:tinyint, DoubleColMultiplyDoubleColumn(col 6:double, col 20:double)(children: CastLongToDouble(col 19:tinyint)(children: LongColAddLongColumn(col 18:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 18:tinyint) -> 19:tinyint) -> 20:double) -> 21:double, DoubleColUnaryMinus(col 6:double) -> 22:double, DoubleScalarMultiplyDoubleColumn(val 79.5530014038086, col 3:float) -> 23:float, FuncPowerDoubleToDouble(col 27:double)(children: DoubleColDivideLongColumn(col 26:double, col 9:bigint)(children: DoubleColSubtractDoubleColumn(col 7:double, col 25:double)(children: DoubleColDivideLongColumn(col 24:double, col 9:bigint)(children: DoubleColMultiplyDoubleColumn(col 8:double, col 8:double) -> 24:double) -> 25:double) -> 26:double) -> 27:double) -> 28:double, DoubleColUnaryMinus(col 6:double) -> 29:double, FuncPowerDoubleToDouble(col 33:double)(children: DoubleColDivideLongColumn(col 32:double, col 12:bigint)(children: DoubleColSubtractDoubleColumn(col 10:double, col 31:double)(children: DoubleColDivideLongColumn(col 30:double, col 12:bigint)(children: DoubleColMultiplyDoubleColumn(col 11:double, col 11:double) -> 30:double) -> 31:double) -> 32:double) -> 33:double) -> 34:double, DecimalColSubtractDecimalScalar(col 37:decimal(3,0), val 10.175)(children: CastLongToDecimal(col 36:tinyint)(children: LongColAddLongColumn(col 35:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 35:tinyint) -> 36:tinyint) -> 37:decimal(3,0)) -> 38:decimal(7,3), DoubleColUnaryMinus(col 39:double)(children: DoubleColUnaryMinus(col 6:double) -> 39:double) -> 40:double, DoubleScalarDivideDoubleColumn(val -26.28, col 42:double)(children: DoubleColUnaryMinus(col 41:double)(children: DoubleColUnaryMinus(col 6:double) -> 41:double) -> 42:double) -> 43:double, DoubleColDivideDoubleColumn(col 47:double, col 48:double)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 46:double)(children: CastLongToDouble(col 45:tinyint)(children: LongColAddLongColumn(col 44:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 44:tinyint) -> 45:tinyint) -> 46:double) -> 47:double, CastLongToDouble(col 1:tinyint) -> 48:double) -> 49:double - Statistics: Num rows: 346 Data size: 113262 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 693 Data size: 219772 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), _col5 (type: tinyint), _col6 (type: tinyint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: decimal(7,3)), _col16 (type: double), _col17 (type: double), _col18 (type: float), _col19 (type: double), _col20 (type: tinyint) sort order: +++++++++++++++++++++ @@ -201,7 +201,7 @@ STAGE PLANS: keyColumns: 0:boolean, 1:tinyint, 2:timestamp, 3:float, 4:string, 15:tinyint, 5:tinyint, 17:tinyint, 6:double, 21:double, 22:double, 23:float, 28:double, 29:double, 34:double, 38:decimal(7,3), 40:double, 43:double, 13:float, 49:double, 14:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 346 Data size: 113262 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 693 Data size: 219772 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized, llap @@ -226,19 +226,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 10, 14, 15, 16, 17, 18, 19, 20] - Statistics: Num rows: 346 Data size: 113262 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 693 Data size: 219772 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 40 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 40 Data size: 13206 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 12846 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 40 Data size: 13206 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 12846 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -450,7 +450,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: (((cfloat < 3569.0) and (cdouble <= 10.175D) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > -28801.388D) and (UDFToDouble(ctimestamp2) <> -28801.336D) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2028982 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 1779902 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -459,7 +459,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 4:float, val 3569.0), FilterDoubleColLessEqualDoubleScalar(col 5:double, val 10.175), FilterLongColNotEqualLongScalar(col 10:boolean, val 1)), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 13:double, val -28801.388)(children: CastTimestampToDouble(col 8:timestamp) -> 13:double), FilterDoubleColNotEqualDoubleScalar(col 14:double, val -28801.336)(children: CastTimestampToDouble(col 9:timestamp) -> 14:double), FilterDecimalColLessDecimalScalar(col 15:decimal(11,4), val 9763215.5639)(children: CastLongToDecimal(col 0:tinyint) -> 15:decimal(11,4)))) predicate: (((cfloat < 3569.0) and (cdouble <= 10.175D) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > -28801.388D) and (UDFToDouble(ctimestamp2) <> -28801.336D) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean) - Statistics: Num rows: 1386 Data size: 228984 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1386 Data size: 200984 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -468,7 +468,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [10, 0, 8, 4, 6, 4, 16, 17, 20] selectExpressions: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 16:double, CastLongToDouble(col 0:tinyint) -> 17:double, DoubleColMultiplyDoubleColumn(col 18:double, col 19:double)(children: CastLongToDouble(col 0:tinyint) -> 18:double, CastLongToDouble(col 0:tinyint) -> 19:double) -> 20:double - Statistics: Num rows: 1386 Data size: 228984 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1386 Data size: 200984 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(_col1), sum(_col3), sum(_col6), sum(_col5), count(_col3), sum(_col8), sum(_col7), count(_col1), max(_col3), min(_col1) Group By Vectorization: @@ -480,10 +480,10 @@ STAGE PLANS: vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.0 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 693 Data size: 129752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 693 Data size: 122752 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) sort order: +++++ @@ -492,7 +492,7 @@ STAGE PLANS: className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 693 Data size: 129752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 693 Data size: 122752 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: float), _col14 (type: tinyint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -527,7 +527,7 @@ STAGE PLANS: keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 346 Data size: 64822 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 693 Data size: 122752 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), power(((_col7 - ((_col8 * _col8) / _col9)) / _col9), 0.5) (type: double), (- _col6) (type: double), power(((_col10 - ((_col11 * _col11) / _col12)) / _col12), 0.5) (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28D / (- (- _col6))) (type: double), _col13 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 @@ -536,7 +536,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 15, 5, 17, 6, 21, 22, 23, 28, 29, 34, 38, 40, 43, 13, 49, 14] selectExpressions: LongColUnaryMinus(col 1:tinyint) -> 15:tinyint, LongColAddLongColumn(col 16:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 16:tinyint) -> 17:tinyint, DoubleColMultiplyDoubleColumn(col 6:double, col 20:double)(children: CastLongToDouble(col 19:tinyint)(children: LongColAddLongColumn(col 18:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 18:tinyint) -> 19:tinyint) -> 20:double) -> 21:double, DoubleColUnaryMinus(col 6:double) -> 22:double, DoubleScalarMultiplyDoubleColumn(val 79.5530014038086, col 3:float) -> 23:float, FuncPowerDoubleToDouble(col 27:double)(children: DoubleColDivideLongColumn(col 26:double, col 9:bigint)(children: DoubleColSubtractDoubleColumn(col 7:double, col 25:double)(children: DoubleColDivideLongColumn(col 24:double, col 9:bigint)(children: DoubleColMultiplyDoubleColumn(col 8:double, col 8:double) -> 24:double) -> 25:double) -> 26:double) -> 27:double) -> 28:double, DoubleColUnaryMinus(col 6:double) -> 29:double, FuncPowerDoubleToDouble(col 33:double)(children: DoubleColDivideLongColumn(col 32:double, col 12:bigint)(children: DoubleColSubtractDoubleColumn(col 10:double, col 31:double)(children: DoubleColDivideLongColumn(col 30:double, col 12:bigint)(children: DoubleColMultiplyDoubleColumn(col 11:double, col 11:double) -> 30:double) -> 31:double) -> 32:double) -> 33:double) -> 34:double, DecimalColSubtractDecimalScalar(col 37:decimal(3,0), val 10.175)(children: CastLongToDecimal(col 36:tinyint)(children: LongColAddLongColumn(col 35:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 35:tinyint) -> 36:tinyint) -> 37:decimal(3,0)) -> 38:decimal(7,3), DoubleColUnaryMinus(col 39:double)(children: DoubleColUnaryMinus(col 6:double) -> 39:double) -> 40:double, DoubleScalarDivideDoubleColumn(val -26.28, col 42:double)(children: DoubleColUnaryMinus(col 41:double)(children: DoubleColUnaryMinus(col 6:double) -> 41:double) -> 42:double) -> 43:double, DoubleColDivideDoubleColumn(col 47:double, col 48:double)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 46:double)(children: CastLongToDouble(col 45:tinyint)(children: LongColAddLongColumn(col 44:tinyint, col 5:tinyint)(children: LongColUnaryMinus(col 1:tinyint) -> 44:tinyint) -> 45:tinyint) -> 46:double) -> 47:double, CastLongToDouble(col 1:tinyint) -> 48:double) -> 49:double - Statistics: Num rows: 346 Data size: 113262 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 693 Data size: 219772 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), _col5 (type: tinyint), _col6 (type: tinyint), _col7 (type: tinyint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: decimal(7,3)), _col16 (type: double), _col17 (type: double), _col18 (type: float), _col19 (type: double), _col20 (type: tinyint) sort order: +++++++++++++++++++++ @@ -544,7 +544,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 346 Data size: 113262 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 693 Data size: 219772 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized, llap @@ -562,19 +562,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 10, 14, 15, 16, 17, 18, 19, 20] - Statistics: Num rows: 346 Data size: 113262 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 693 Data size: 219772 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 40 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 40 Data size: 13206 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 12846 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 40 Data size: 13206 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 12846 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorization_14.q.out b/ql/src/test/results/clientpositive/llap/vectorization_14.q.out index 1f7084acd3..ff5d42e8ca 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_14.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_14.q.out @@ -90,7 +90,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: ((UDFToLong(ctinyint) <= cbigint) and (cdouble < UDFToDouble(ctinyint)) and ((cbigint > -257L) or (cfloat < UDFToFloat(cint))) and ((UDFToDouble(cint) <= cdouble) or (ctimestamp2 < ctimestamp1))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2139070 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 1889990 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -100,7 +100,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColLessEqualLongColumn(col 0:bigint, col 3:bigint)(children: col 0:tinyint), FilterDoubleColLessDoubleColumn(col 5:double, col 13:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterExprOrExpr(children: FilterLongColGreaterLongScalar(col 3:bigint, val -257), FilterDoubleColLessDoubleColumn(col 4:float, col 14:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 14:float)), FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleColumn(col 15:double, col 5:double)(children: CastLongToDouble(col 2:int) -> 15:double), FilterTimestampColLessTimestampColumn(col 9:timestamp, col 8:timestamp))) predicate: ((UDFToLong(ctinyint) <= cbigint) and (cdouble < UDFToDouble(ctinyint)) and ((cbigint > -257L) or (cfloat < UDFToFloat(cint))) and ((UDFToDouble(cint) <= cdouble) or (ctimestamp2 < ctimestamp1))) (type: boolean) - Statistics: Num rows: 758 Data size: 132082 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 758 Data size: 116802 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), (- (-26.28D + cdouble)) (type: double), ((- (-26.28D + cdouble)) * (- (-26.28D + cdouble))) (type: double), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -109,7 +109,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [8, 4, 6, 10, 5, 17, 22, 4, 23] selectExpressions: DoubleColUnaryMinus(col 16:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 16:double) -> 17:double, DoubleColMultiplyDoubleColumn(col 19:double, col 21:double)(children: DoubleColUnaryMinus(col 18:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 18:double) -> 19:double, DoubleColUnaryMinus(col 20:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 20:double) -> 21:double) -> 22:double, DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 23:double - Statistics: Num rows: 758 Data size: 132082 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 758 Data size: 116802 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col6), sum(_col5), count(_col5), max(_col1), sum(_col8), sum(_col7), count(_col1) Group By Vectorization: @@ -121,10 +121,10 @@ STAGE PLANS: vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6] keys: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp), _col3 (type: boolean) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.0 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 379 Data size: 66108 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 379 Data size: 62308 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) sort order: +++++ @@ -135,7 +135,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 5:double, 6:double, 7:bigint, 8:float, 9:double, 10:double, 11:bigint - Statistics: Num rows: 379 Data size: 66108 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 379 Data size: 62308 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -183,7 +183,7 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: float), KEY._col2 (type: double), KEY._col3 (type: timestamp), KEY._col4 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 189 Data size: 33008 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 379 Data size: 62308 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col3 (type: timestamp), _col1 (type: float), _col0 (type: string), _col4 (type: boolean), _col2 (type: double), (-26.28D + _col2) (type: double), (- (-26.28D + _col2)) (type: double), power(((_col5 - ((_col6 * _col6) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5) (type: double), (_col1 * -26.28) (type: float), _col8 (type: float), (- _col1) (type: float), (- _col8) (type: float), ((- (-26.28D + _col2)) / 10.175D) (type: double), power(((_col9 - ((_col10 * _col10) / _col11)) / _col11), 0.5) (type: double), _col11 (type: bigint), (- ((- (-26.28D + _col2)) / 10.175D)) (type: double), (-1.389D % power(((_col5 - ((_col6 * _col6) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5)) (type: double), (UDFToDouble(_col1) - _col2) (type: double), ((_col9 - ((_col10 * _col10) / _col11)) / _col11) (type: double), (((_col9 - ((_col10 * _col10) / _col11)) / _col11) % 10.175D) (type: double), ((_col9 - ((_col10 * _col10) / _col11)) / CASE WHEN ((_col11 = 1L)) THEN (null) ELSE ((_col11 - 1)) END) (type: double), (- (UDFToDouble(_col1) - _col2)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 @@ -192,7 +192,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3, 1, 0, 4, 2, 12, 14, 22, 23, 8, 24, 25, 28, 33, 11, 37, 46, 47, 51, 56, 63, 65] selectExpressions: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 12:double, DoubleColUnaryMinus(col 13:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 13:double) -> 14:double, FuncPowerDoubleToDouble(col 21:double)(children: DoubleColDivideLongColumn(col 17:double, col 20:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 16:double)(children: DoubleColDivideLongColumn(col 15:double, col 7:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 15:double) -> 16:double) -> 17:double, IfExprNullCondExpr(col 18:boolean, null, col 19:bigint)(children: LongColEqualLongScalar(col 7:bigint, val 1) -> 18:boolean, LongColSubtractLongScalar(col 7:bigint, val 1) -> 19:bigint) -> 20:bigint) -> 21:double) -> 22:double, DoubleColMultiplyDoubleScalar(col 1:float, val -26.280000686645508) -> 23:float, DoubleColUnaryMinus(col 1:float) -> 24:float, DoubleColUnaryMinus(col 8:float) -> 25:float, DoubleColDivideDoubleScalar(col 27:double, val 10.175)(children: DoubleColUnaryMinus(col 26:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 26:double) -> 27:double) -> 28:double, FuncPowerDoubleToDouble(col 32:double)(children: DoubleColDivideLongColumn(col 31:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 30:double)(children: DoubleColDivideLongColumn(col 29:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 29:double) -> 30:double) -> 31:double) -> 32:double) -> 33:double, DoubleColUnaryMinus(col 36:double)(children: DoubleColDivideDoubleScalar(col 35:double, val 10.175)(children: DoubleColUnaryMinus(col 34:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 2:double) -> 34:double) -> 35:double) -> 36:double) -> 37:double, DoubleScalarModuloDoubleColumn(val -1.389, col 45:double)(children: FuncPowerDoubleToDouble(col 44:double)(children: DoubleColDivideLongColumn(col 40:double, col 43:bigint)(children: DoubleColSubtractDoubleColumn(col 5:double, col 39:double)(children: DoubleColDivideLongColumn(col 38:double, col 7:bigint)(children: DoubleColMultiplyDoubleColumn(col 6:double, col 6:double) -> 38:double) -> 39:double) -> 40:double, IfExprNullCondExpr(col 41:boolean, null, col 42:bigint)(children: LongColEqualLongScalar(col 7:bigint, val 1) -> 41:boolean, LongColSubtractLongScalar(col 7:bigint, val 1) -> 42:bigint) -> 43:bigint) -> 44:double) -> 45:double) -> 46:double, DoubleColSubtractDoubleColumn(col 1:double, col 2:double)(children: col 1:float) -> 47:double, DoubleColDivideLongColumn(col 50:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 49:double)(children: DoubleColDivideLongColumn(col 48:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 48:double) -> 49:double) -> 50:double) -> 51:double, DoubleColModuloDoubleScalar(col 55:double, val 10.175)(children: DoubleColDivideLongColumn(col 54:double, col 11:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 53:double)(children: DoubleColDivideLongColumn(col 52:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 52:double) -> 53:double) -> 54:double) -> 55:double) -> 56:double, DoubleColDivideLongColumn(col 59:double, col 62:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 58:double)(children: DoubleColDivideLongColumn(col 57:double, col 11:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 57:double) -> 58:double) -> 59:double, IfExprNullCondExpr(col 60:boolean, null, col 61:bigint)(children: LongColEqualLongScalar(col 11:bigint, val 1) -> 60:boolean, LongColSubtractLongScalar(col 11:bigint, val 1) -> 61:bigint) -> 62:bigint) -> 63:double, DoubleColUnaryMinus(col 64:double)(children: DoubleColSubtractDoubleColumn(col 1:double, col 2:double)(children: col 1:float) -> 64:double) -> 65:double - Statistics: Num rows: 189 Data size: 45860 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 379 Data size: 88080 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col2 (type: string), _col1 (type: float), _col4 (type: double), _col0 (type: timestamp) sort order: ++++ @@ -202,7 +202,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 4:boolean, 12:double, 14:double, 22:double, 23:float, 8:float, 24:float, 25:float, 28:double, 33:double, 11:bigint, 37:double, 46:double, 47:double, 51:double, 56:double, 63:double, 65:double - Statistics: Num rows: 189 Data size: 45860 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 379 Data size: 88080 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: boolean), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: float), _col10 (type: float), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double) Reducer 3 Execution mode: vectorized, llap @@ -227,13 +227,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [3, 1, 0, 4, 2, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21] - Statistics: Num rows: 189 Data size: 45860 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 379 Data size: 88080 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 189 Data size: 45860 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 379 Data size: 88080 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorization_15.q.out b/ql/src/test/results/clientpositive/llap/vectorization_15.q.out index f7423ef0b2..c176b0a4e0 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_15.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_15.q.out @@ -86,7 +86,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: ((cstring1 like '10%') or (cstring2 like '%ss%') or ((cint >= -75) and (UDFToShort(ctinyint) = csmallint) and (cdouble >= -3728.0D))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2491562 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2367002 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -96,7 +96,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 6:string, pattern 10%), FilterStringColLikeStringScalar(col 7:string, pattern %ss%), FilterExprAndExpr(children: FilterLongColGreaterEqualLongScalar(col 2:int, val -75), FilterLongColEqualLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint), FilterDoubleColGreaterEqualDoubleScalar(col 5:double, val -3728.0))) predicate: ((cstring1 like '10%') or (cstring2 like '%ss%') or ((cint >= -75) and (UDFToShort(ctinyint) = csmallint) and (cdouble >= -3728.0D))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2491562 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2367002 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cfloat (type: float), cboolean1 (type: boolean), cdouble (type: double), cstring1 (type: string), ctinyint (type: tinyint), cint (type: int), ctimestamp1 (type: timestamp), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 @@ -105,7 +105,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4, 10, 5, 6, 0, 2, 8, 4, 13, 14, 17, 18, 21] selectExpressions: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 13:double, CastLongToDouble(col 0:tinyint) -> 14:double, DoubleColMultiplyDoubleColumn(col 15:double, col 16:double)(children: CastLongToDouble(col 0:tinyint) -> 15:double, CastLongToDouble(col 0:tinyint) -> 16:double) -> 17:double, CastLongToDouble(col 2:int) -> 18:double, DoubleColMultiplyDoubleColumn(col 19:double, col 20:double)(children: CastLongToDouble(col 2:int) -> 19:double, CastLongToDouble(col 2:int) -> 20:double) -> 21:double - Statistics: Num rows: 12288 Data size: 2491562 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2367002 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col8), sum(_col7), count(_col0), min(_col2), sum(_col10), sum(_col9), count(_col4), sum(_col12), sum(_col11), count(_col5) Group By Vectorization: @@ -117,10 +117,10 @@ STAGE PLANS: vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9] keys: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.0 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 - Statistics: Num rows: 6144 Data size: 1278652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 1216372 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) sort order: +++++++ @@ -131,7 +131,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 7:double, 8:double, 9:bigint, 10:double, 11:double, 12:double, 13:bigint, 14:double, 15:double, 16:bigint - Statistics: Num rows: 6144 Data size: 1278652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 1216372 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: bigint), _col14 (type: double), _col15 (type: double), _col16 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -162,15 +162,15 @@ STAGE PLANS: keys: KEY._col0 (type: float), KEY._col1 (type: boolean), KEY._col2 (type: double), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int), KEY._col6 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 - Statistics: Num rows: 3072 Data size: 639332 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 1216372 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp), power(((_col7 - ((_col8 * _col8) / _col9)) / CASE WHEN ((_col9 = 1L)) THEN (null) ELSE ((_col9 - 1)) END), 0.5) (type: double), (-26.28 - CAST( _col5 AS decimal(10,0))) (type: decimal(13,2)), _col10 (type: double), (_col2 * 79.553D) (type: double), (33.0 % _col0) (type: float), power(((_col11 - ((_col12 * _col12) / _col13)) / CASE WHEN ((_col13 = 1L)) THEN (null) ELSE ((_col13 - 1)) END), 0.5) (type: double), ((_col11 - ((_col12 * _col12) / _col13)) / _col13) (type: double), (-23.0D % _col2) (type: double), (- _col4) (type: tinyint), ((_col14 - ((_col15 * _col15) / _col16)) / CASE WHEN ((_col16 = 1L)) THEN (null) ELSE ((_col16 - 1)) END) (type: double), (UDFToFloat(_col5) - _col0) (type: float), (-23 % UDFToInteger(_col4)) (type: int), (- (-26.28 - CAST( _col5 AS decimal(10,0)))) (type: decimal(13,2)), power(((_col14 - ((_col15 * _col15) / _col16)) / _col16), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 3072 Data size: 1327460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 2592628 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) sort order: +++++++ - Statistics: Num rows: 3072 Data size: 1327460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 2592628 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col7 (type: double), _col8 (type: decimal(13,2)), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: tinyint), _col16 (type: double), _col17 (type: float), _col18 (type: int), _col19 (type: decimal(13,2)), _col20 (type: double) Reducer 3 Execution mode: llap @@ -182,10 +182,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: float), KEY.reducesinkkey1 (type: boolean), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: tinyint), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey6 (type: timestamp), VALUE._col0 (type: double), VALUE._col1 (type: decimal(13,2)), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: tinyint), VALUE._col9 (type: double), VALUE._col10 (type: float), VALUE._col11 (type: int), VALUE._col12 (type: decimal(13,2)), VALUE._col13 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 3072 Data size: 1327460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 2592628 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3072 Data size: 1327460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 2592628 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorization_16.q.out b/ql/src/test/results/clientpositive/llap/vectorization_16.q.out index e9e8d1f0fe..01b98c3d03 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_16.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_16.q.out @@ -63,7 +63,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: ((cstring2 like '%b%') and ((cdouble >= -1.389D) or (cstring1 < 'a'))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2308074 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2183514 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -73,7 +73,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %b%), FilterExprOrExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5:double, val -1.389), FilterStringGroupColLessStringScalar(col 6:string, val a))) predicate: ((cstring2 like '%b%') and ((cdouble >= -1.389D) or (cstring1 < 'a'))) (type: boolean) - Statistics: Num rows: 6144 Data size: 1154088 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 1091808 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cstring1 (type: string), cdouble (type: double), ctimestamp1 (type: timestamp), (cdouble * cdouble) (type: double) outputColumnNames: _col0, _col1, _col2, _col3 @@ -82,7 +82,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [6, 5, 8, 13] selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 13:double - Statistics: Num rows: 6144 Data size: 1154088 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 1091808 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col1), sum(_col3), sum(_col1), min(_col1) Group By Vectorization: @@ -94,10 +94,10 @@ STAGE PLANS: vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.0 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 3072 Data size: 455172 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3072 Data size: 424052 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) sort order: +++ @@ -108,7 +108,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 3:bigint, 4:double, 5:double, 6:double - Statistics: Num rows: 3072 Data size: 455172 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3072 Data size: 424052 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -156,7 +156,7 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: double), KEY._col2 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1536 Data size: 227586 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3072 Data size: 424052 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp), (_col1 - 9763215.5639D) (type: double), (- (_col1 - 9763215.5639D)) (type: double), _col3 (type: bigint), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), (- power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5)) (type: double), (power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) * UDFToDouble(_col3)) (type: double), _col6 (type: double), (9763215.5639D / _col1) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 @@ -165,13 +165,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1, 2, 7, 9, 3, 17, 26, 36, 6, 37, 39, 47] selectExpressions: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 7:double, DoubleColUnaryMinus(col 8:double)(children: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 8:double) -> 9:double, FuncPowerDoubleToDouble(col 16:double)(children: DoubleColDivideLongColumn(col 12:double, col 15:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 11:double)(children: DoubleColDivideLongColumn(col 10:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 10:double) -> 11:double) -> 12:double, IfExprNullCondExpr(col 13:boolean, null, col 14:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 13:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 14:bigint) -> 15:bigint) -> 16:double) -> 17:double, DoubleColUnaryMinus(col 25:double)(children: FuncPowerDoubleToDouble(col 24:double)(children: DoubleColDivideLongColumn(col 20:double, col 23:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 19:double)(children: DoubleColDivideLongColumn(col 18:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 18:double) -> 19:double) -> 20:double, IfExprNullCondExpr(col 21:boolean, null, col 22:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 21:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 22:bigint) -> 23:bigint) -> 24:double) -> 25:double) -> 26:double, DoubleColMultiplyDoubleColumn(col 34:double, col 35:double)(children: FuncPowerDoubleToDouble(col 33:double)(children: DoubleColDivideLongColumn(col 29:double, col 32:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 28:double)(children: DoubleColDivideLongColumn(col 27:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 27:double) -> 28:double) -> 29:double, IfExprNullCondExpr(col 30:boolean, null, col 31:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 30:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 31:bigint) -> 32:bigint) -> 33:double) -> 34:double, CastLongToDouble(col 3:bigint) -> 35:double) -> 36:double, DoubleScalarDivideDoubleColumn(val 9763215.5639, col 1:double) -> 37:double, DecimalColDivideDecimalScalar(col 38:decimal(19,0), val -1.389)(children: CastLongToDecimal(col 3:bigint) -> 38:decimal(19,0)) -> 39:decimal(28,6), FuncPowerDoubleToDouble(col 46:double)(children: DoubleColDivideLongColumn(col 42:double, col 45:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 41:double)(children: DoubleColDivideLongColumn(col 40:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 40:double) -> 41:double) -> 42:double, IfExprNullCondExpr(col 43:boolean, null, col 44:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 43:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 44:bigint) -> 45:bigint) -> 46:double) -> 47:double - Statistics: Num rows: 1536 Data size: 461058 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3072 Data size: 890996 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1536 Data size: 461058 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3072 Data size: 890996 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorization_17.q.out b/ql/src/test/results/clientpositive/llap/vectorization_17.q.out index 1b7dc7ea59..a4ab091079 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_17.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_17.q.out @@ -71,7 +71,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: ((cbigint > -23L) and ((ctinyint >= 33Y) or (UDFToLong(csmallint) >= cbigint) or (UDFToDouble(cfloat) = cdouble)) and ((cdouble <> 988888.0D) or (CAST( cint AS decimal(13,3)) > -863.257))) (type: boolean) - Statistics: Num rows: 12288 Data size: 1647550 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 1522990 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -81,7 +81,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 3:bigint, val -23), FilterExprOrExpr(children: FilterLongColGreaterEqualLongScalar(col 0:tinyint, val 33), FilterLongColGreaterEqualLongColumn(col 1:bigint, col 3:bigint)(children: col 1:smallint), FilterDoubleColEqualDoubleColumn(col 4:double, col 5:double)(children: col 4:float)), FilterExprOrExpr(children: FilterDoubleColNotEqualDoubleScalar(col 5:double, val 988888.0), FilterDecimalColGreaterDecimalScalar(col 13:decimal(13,3), val -863.257)(children: CastLongToDecimal(col 2:int) -> 13:decimal(13,3)))) predicate: ((cbigint > -23L) and ((ctinyint >= 33Y) or (UDFToLong(csmallint) >= cbigint) or (UDFToDouble(cfloat) = cdouble)) and ((cdouble <> 988888.0D) or (CAST( cint AS decimal(13,3)) > -863.257))) (type: boolean) - Statistics: Num rows: 6141 Data size: 823456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6141 Data size: 761216 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cfloat (type: float), cstring1 (type: string), cint (type: int), ctimestamp1 (type: timestamp), cdouble (type: double), cbigint (type: bigint), (UDFToDouble(cfloat) / UDFToDouble(ctinyint)) (type: double), (UDFToLong(cint) % cbigint) (type: bigint), (- cdouble) (type: double), (cdouble + (UDFToDouble(cfloat) / UDFToDouble(ctinyint))) (type: double), (cdouble / UDFToDouble(cint)) (type: double), (- (- cdouble)) (type: double), (9763215.5639 % CAST( cbigint AS decimal(19,0))) (type: decimal(11,4)), (2563.58D + (- (- cdouble))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 @@ -90,7 +90,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4, 6, 2, 8, 5, 3, 15, 16, 17, 20, 22, 24, 26, 29] selectExpressions: DoubleColDivideDoubleColumn(col 4:double, col 14:double)(children: col 4:float, CastLongToDouble(col 0:tinyint) -> 14:double) -> 15:double, LongColModuloLongColumn(col 2:bigint, col 3:bigint)(children: col 2:int) -> 16:bigint, DoubleColUnaryMinus(col 5:double) -> 17:double, DoubleColAddDoubleColumn(col 5:double, col 19:double)(children: DoubleColDivideDoubleColumn(col 4:double, col 18:double)(children: col 4:float, CastLongToDouble(col 0:tinyint) -> 18:double) -> 19:double) -> 20:double, DoubleColDivideDoubleColumn(col 5:double, col 21:double)(children: CastLongToDouble(col 2:int) -> 21:double) -> 22:double, DoubleColUnaryMinus(col 23:double)(children: DoubleColUnaryMinus(col 5:double) -> 23:double) -> 24:double, DecimalScalarModuloDecimalColumn(val 9763215.5639, col 25:decimal(19,0))(children: CastLongToDecimal(col 3:bigint) -> 25:decimal(19,0)) -> 26:decimal(11,4), DoubleScalarAddDoubleColumn(val 2563.58, col 28:double)(children: DoubleColUnaryMinus(col 27:double)(children: DoubleColUnaryMinus(col 5:double) -> 27:double) -> 28:double) -> 29:double - Statistics: Num rows: 6141 Data size: 1818460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6141 Data size: 1756220 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col5 (type: bigint), _col0 (type: float) sort order: ++ @@ -100,7 +100,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 6:string, 2:int, 8:timestamp, 5:double, 15:double, 16:bigint, 17:double, 20:double, 22:double, 24:double, 26:decimal(11,4), 29:double - Statistics: Num rows: 6141 Data size: 1818460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6141 Data size: 1756220 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -142,13 +142,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1, 2, 3, 4, 5, 0, 6, 7, 8, 9, 10, 11, 12, 13] - Statistics: Num rows: 6141 Data size: 1818460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6141 Data size: 1756220 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6141 Data size: 1818460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6141 Data size: 1756220 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorization_2.q.out b/ql/src/test/results/clientpositive/llap/vectorization_2.q.out index 33c34a1642..02d4fa5c1e 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_2.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_2.q.out @@ -69,7 +69,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: (((cdouble < UDFToDouble(ctinyint)) and ((UDFToDouble(ctimestamp2) <> -10669.0D) or (cint < 359))) or ((ctimestamp1 < ctimestamp2) and (cstring2 like 'b%') and (cfloat <= -5638.15))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2157324 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 1908244 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -79,7 +79,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 5:double, col 13:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterExprOrExpr(children: FilterDoubleColNotEqualDoubleScalar(col 14:double, val -10669.0)(children: CastTimestampToDouble(col 9:timestamp) -> 14:double), FilterLongColLessLongScalar(col 2:int, val 359))), FilterExprAndExpr(children: FilterTimestampColLessTimestampColumn(col 8:timestamp, col 9:timestamp), FilterStringColLikeStringScalar(col 7:string, pattern b%), FilterDoubleColLessEqualDoubleScalar(col 4:float, val -5638.14990234375))) predicate: (((cdouble < UDFToDouble(ctinyint)) and ((UDFToDouble(ctimestamp2) <> -10669.0D) or (cint < 359))) or ((ctimestamp1 < ctimestamp2) and (cstring2 like 'b%') and (cfloat <= -5638.15))) (type: boolean) - Statistics: Num rows: 4096 Data size: 719232 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4096 Data size: 636272 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: csmallint (type: smallint), cfloat (type: float), cbigint (type: bigint), ctinyint (type: tinyint), cdouble (type: double), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -88,7 +88,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 4, 3, 0, 5, 15, 18] selectExpressions: CastLongToDouble(col 3:bigint) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 3:bigint) -> 16:double, CastLongToDouble(col 3:bigint) -> 17:double) -> 18:double - Statistics: Num rows: 4096 Data size: 719232 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4096 Data size: 636272 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), count(_col0), sum(_col1), sum(_col6), sum(_col5), count(_col2), count(), min(_col3), sum(_col4), count(_col4) Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/llap/vectorization_3.q.out b/ql/src/test/results/clientpositive/llap/vectorization_3.q.out index 7d1cadc849..10977ca0e3 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_3.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_3.q.out @@ -74,7 +74,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: (((UDFToFloat(cint) <= cfloat) and (CAST( cbigint AS decimal(22,3)) <> 79.553) and (UDFToDouble(ctimestamp2) = -29071.0D)) or ((UDFToDouble(cbigint) > cdouble) and (CAST( csmallint AS decimal(8,3)) >= 79.553) and (ctimestamp1 > ctimestamp2))) (type: boolean) - Statistics: Num rows: 12288 Data size: 1276620 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 1027540 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -84,7 +84,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 13:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 13:float), FilterDecimalColNotEqualDecimalScalar(col 14:decimal(22,3), val 79.553)(children: CastLongToDecimal(col 3:bigint) -> 14:decimal(22,3)), FilterDoubleColEqualDoubleScalar(col 15:double, val -29071.0)(children: CastTimestampToDouble(col 9:timestamp) -> 15:double)), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 16:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 16:double), FilterDecimalColGreaterEqualDecimalScalar(col 17:decimal(8,3), val 79.553)(children: CastLongToDecimal(col 1:smallint) -> 17:decimal(8,3)), FilterTimestampColGreaterTimestampColumn(col 8:timestamp, col 9:timestamp))) predicate: (((UDFToFloat(cint) <= cfloat) and (CAST( cbigint AS decimal(22,3)) <> 79.553) and (UDFToDouble(ctimestamp2) = -29071.0D)) or ((UDFToDouble(cbigint) > cdouble) and (CAST( csmallint AS decimal(8,3)) >= 79.553) and (ctimestamp1 > ctimestamp2))) (type: boolean) - Statistics: Num rows: 2503 Data size: 260060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2503 Data size: 209380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: csmallint (type: smallint), ctinyint (type: tinyint), cfloat (type: float), cint (type: int), UDFToDouble(csmallint) (type: double), (UDFToDouble(csmallint) * UDFToDouble(csmallint)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 @@ -93,7 +93,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 0, 4, 2, 18, 21, 22, 25, 4, 26, 27, 30] selectExpressions: CastLongToDouble(col 1:smallint) -> 18:double, DoubleColMultiplyDoubleColumn(col 19:double, col 20:double)(children: CastLongToDouble(col 1:smallint) -> 19:double, CastLongToDouble(col 1:smallint) -> 20:double) -> 21:double, CastLongToDouble(col 0:tinyint) -> 22:double, DoubleColMultiplyDoubleColumn(col 23:double, col 24:double)(children: CastLongToDouble(col 0:tinyint) -> 23:double, CastLongToDouble(col 0:tinyint) -> 24:double) -> 25:double, DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 26:double, CastLongToDouble(col 2:int) -> 27:double, DoubleColMultiplyDoubleColumn(col 28:double, col 29:double)(children: CastLongToDouble(col 2:int) -> 28:double, CastLongToDouble(col 2:int) -> 29:double) -> 30:double - Statistics: Num rows: 2503 Data size: 260060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2503 Data size: 209380 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col5), sum(_col4), count(_col0), sum(_col7), sum(_col6), count(_col1), sum(_col9), sum(_col8), count(_col2), sum(_col2), sum(_col3), count(_col3), sum(_col11), sum(_col10) Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/llap/vectorization_5.q.out b/ql/src/test/results/clientpositive/llap/vectorization_5.q.out index 836e8bb52b..db823ff6c7 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_5.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_5.q.out @@ -63,7 +63,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: (((cstring1 like '%b%') and cboolean2 is not null) or ((UDFToDouble(ctinyint) = cdouble) and (cstring2 like 'a') and ctimestamp2 is not null)) (type: boolean) - Statistics: Num rows: 12288 Data size: 2454862 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2330342 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -73,7 +73,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 6:string, pattern %b%), SelectColumnIsNotNull(col 11:boolean)), FilterExprAndExpr(children: FilterDoubleColEqualDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterStringColLikeStringScalar(col 7:string, pattern a), SelectColumnIsNotNull(col 9:timestamp))) predicate: (((cstring1 like '%b%') and cboolean2 is not null) or ((UDFToDouble(ctinyint) = cdouble) and (cstring2 like 'a') and ctimestamp2 is not null)) (type: boolean) - Statistics: Num rows: 7658 Data size: 1529972 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6879 Data size: 1304690 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int) outputColumnNames: ctinyint, csmallint, cint @@ -81,7 +81,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 7658 Data size: 1529972 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6879 Data size: 1304690 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(csmallint), count(), min(csmallint), sum(cint), max(ctinyint) Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/llap/vectorization_7.q.out b/ql/src/test/results/clientpositive/llap/vectorization_7.q.out index 51ae4a0eb2..8261276c9e 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_7.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_7.q.out @@ -77,7 +77,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: (((cdouble > 988888.0D) or ((UDFToDouble(ctimestamp2) > -28815.0D) and (cdouble <= 3569.0D))) and ((UDFToDouble(ctimestamp1) <= -28800.0D) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and (ctinyint <> 0Y)) (type: boolean) - Statistics: Num rows: 12288 Data size: 3019778 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2770698 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -87,7 +87,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterExprOrExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 988888.0), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 13:double, val -28815.0)(children: CastTimestampToDouble(col 9:timestamp) -> 13:double), FilterDoubleColLessEqualDoubleScalar(col 5:double, val 3569.0))), FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleScalar(col 14:double, val -28800.0)(children: CastTimestampToDouble(col 8:timestamp) -> 14:double), FilterLongColEqualLongColumn(col 0:int, col 2:int)(children: col 0:tinyint), FilterStringColLikeStringScalar(col 7:string, pattern ss)), FilterLongColNotEqualLongScalar(col 0:tinyint, val 0)) predicate: (((cdouble > 988888.0D) or ((UDFToDouble(ctimestamp2) > -28815.0D) and (cdouble <= 3569.0D))) and ((UDFToDouble(ctimestamp1) <= -28800.0D) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and (ctinyint <> 0Y)) (type: boolean) - Statistics: Num rows: 11033 Data size: 2711364 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11033 Data size: 2487724 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -96,7 +96,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [10, 3, 1, 0, 8, 6, 15, 16, 17, 18, 20, 22, 23, 24, 26] selectExpressions: LongColAddLongColumn(col 3:bigint, col 3:bigint) -> 15:bigint, LongColModuloLongScalar(col 1:int, val -257)(children: col 1:smallint) -> 16:int, LongColUnaryMinus(col 1:smallint) -> 17:smallint, LongColUnaryMinus(col 0:tinyint) -> 18:tinyint, LongColAddLongScalar(col 19:int, val 17)(children: LongColUnaryMinus(col 0:tinyint) -> 19:tinyint) -> 20:int, LongColMultiplyLongColumn(col 3:bigint, col 21:bigint)(children: LongColUnaryMinus(col 1:smallint) -> 21:smallint) -> 22:bigint, LongColModuloLongColumn(col 2:int, col 1:int)(children: col 1:smallint) -> 23:int, LongColUnaryMinus(col 0:tinyint) -> 24:tinyint, LongColModuloLongColumn(col 25:tinyint, col 0:tinyint)(children: LongColUnaryMinus(col 0:tinyint) -> 25:tinyint) -> 26:tinyint - Statistics: Num rows: 11033 Data size: 1865892 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11033 Data size: 1754052 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint) sort order: +++++++++++++++ @@ -105,7 +105,7 @@ STAGE PLANS: keyColumns: 10:boolean, 3:bigint, 1:smallint, 0:tinyint, 8:timestamp, 6:string, 15:bigint, 16:int, 17:smallint, 18:tinyint, 20:int, 22:bigint, 23:int, 24:tinyint, 26:tinyint native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 11033 Data size: 1865892 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11033 Data size: 1754052 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs @@ -147,19 +147,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 9, 14] - Statistics: Num rows: 11033 Data size: 1865892 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11033 Data size: 1754052 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 25 Data size: 4380 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4180 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 25 Data size: 4380 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4180 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -331,7 +331,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: (((cdouble > 988888.0D) or ((UDFToDouble(ctimestamp2) > -28792.315D) and (cdouble <= 3569.0D))) and ((UDFToDouble(ctimestamp1) <= -28800.0D) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and (ctinyint <> 0Y)) (type: boolean) - Statistics: Num rows: 12288 Data size: 3019778 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2770698 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -340,7 +340,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterExprOrExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 988888.0), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 13:double, val -28792.315)(children: CastTimestampToDouble(col 9:timestamp) -> 13:double), FilterDoubleColLessEqualDoubleScalar(col 5:double, val 3569.0))), FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleScalar(col 14:double, val -28800.0)(children: CastTimestampToDouble(col 8:timestamp) -> 14:double), FilterLongColEqualLongColumn(col 0:int, col 2:int)(children: col 0:tinyint), FilterStringColLikeStringScalar(col 7:string, pattern ss)), FilterLongColNotEqualLongScalar(col 0:tinyint, val 0)) predicate: (((cdouble > 988888.0D) or ((UDFToDouble(ctimestamp2) > -28792.315D) and (cdouble <= 3569.0D))) and ((UDFToDouble(ctimestamp1) <= -28800.0D) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and (ctinyint <> 0Y)) (type: boolean) - Statistics: Num rows: 11033 Data size: 2711364 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11033 Data size: 2487724 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -349,7 +349,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [10, 3, 1, 0, 8, 6, 15, 16, 17, 18, 20, 22, 23, 24, 26] selectExpressions: LongColAddLongColumn(col 3:bigint, col 3:bigint) -> 15:bigint, LongColModuloLongScalar(col 1:int, val -257)(children: col 1:smallint) -> 16:int, LongColUnaryMinus(col 1:smallint) -> 17:smallint, LongColUnaryMinus(col 0:tinyint) -> 18:tinyint, LongColAddLongScalar(col 19:int, val 17)(children: LongColUnaryMinus(col 0:tinyint) -> 19:tinyint) -> 20:int, LongColMultiplyLongColumn(col 3:bigint, col 21:bigint)(children: LongColUnaryMinus(col 1:smallint) -> 21:smallint) -> 22:bigint, LongColModuloLongColumn(col 2:int, col 1:int)(children: col 1:smallint) -> 23:int, LongColUnaryMinus(col 0:tinyint) -> 24:tinyint, LongColModuloLongColumn(col 25:tinyint, col 0:tinyint)(children: LongColUnaryMinus(col 0:tinyint) -> 25:tinyint) -> 26:tinyint - Statistics: Num rows: 11033 Data size: 1865892 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11033 Data size: 1754052 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint) sort order: +++++++++++++++ @@ -357,7 +357,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 11033 Data size: 1865892 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11033 Data size: 1754052 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs @@ -386,19 +386,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 9, 14] - Statistics: Num rows: 11033 Data size: 1865892 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11033 Data size: 1754052 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 25 Data size: 4380 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4180 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 25 Data size: 4380 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4180 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorization_8.q.out b/ql/src/test/results/clientpositive/llap/vectorization_8.q.out index eaa1f4dc1b..3c42e1d5d3 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_8.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_8.q.out @@ -73,7 +73,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: ((cfloat < -6432.0) or ((UDFToDouble(ctimestamp1) <= 10.0D) and (UDFToDouble(ctimestamp2) <> 16.0D) and cstring2 is not null) or ((cdouble = 988888.0D) and cboolean1 is not null)) (type: boolean) - Statistics: Num rows: 12288 Data size: 2983078 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2733998 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -83,7 +83,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterDoubleColLessDoubleScalar(col 4:float, val -6432.0), FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleScalar(col 13:double, val 10.0)(children: CastTimestampToDouble(col 8:timestamp) -> 13:double), FilterDoubleColNotEqualDoubleScalar(col 14:double, val 16.0)(children: CastTimestampToDouble(col 9:timestamp) -> 14:double), SelectColumnIsNotNull(col 7:string)), FilterExprAndExpr(children: FilterDoubleColEqualDoubleScalar(col 5:double, val 988888.0), SelectColumnIsNotNull(col 10:boolean))) predicate: ((cfloat < -6432.0) or ((UDFToDouble(ctimestamp1) <= 10.0D) and (UDFToDouble(ctimestamp2) <> 16.0D) and cstring2 is not null) or ((cdouble = 988888.0D) and cboolean1 is not null)) (type: boolean) - Statistics: Num rows: 3059 Data size: 742850 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3059 Data size: 680930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15D - cdouble) (type: double), (cdouble * -257.0D) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - cfloat) (type: float), (- cfloat) (type: float), ((-5638.15D - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 @@ -92,7 +92,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [8, 5, 10, 6, 4, 15, 16, 17, 19, 22, 23, 24, 25, 29] selectExpressions: DoubleColUnaryMinus(col 5:double) -> 15:double, DoubleScalarSubtractDoubleColumn(val -5638.15, col 5:double) -> 16:double, DoubleColMultiplyDoubleScalar(col 5:double, val -257.0) -> 17:double, DoubleColAddDoubleColumn(col 18:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 18:float) -> 19:float, DoubleColAddDoubleColumn(col 20:double, col 21:double)(children: DoubleColUnaryMinus(col 5:double) -> 20:double, CastLongToDouble(col 3:bigint) -> 21:double) -> 22:double, DoubleColUnaryMinus(col 5:double) -> 23:double, DoubleScalarSubtractDoubleColumn(val -1.3890000581741333, col 4:float) -> 24:float, DoubleColUnaryMinus(col 4:float) -> 25:float, DoubleColAddDoubleColumn(col 26:double, col 28:double)(children: DoubleScalarSubtractDoubleColumn(val -5638.15, col 5:double) -> 26:double, DoubleColAddDoubleColumn(col 27:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 27:float) -> 28:float) -> 29:double - Statistics: Num rows: 3059 Data size: 557250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3059 Data size: 526290 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: float), _col13 (type: double) sort order: ++++++++++++++ @@ -101,7 +101,7 @@ STAGE PLANS: keyColumns: 8:timestamp, 5:double, 10:boolean, 6:string, 4:float, 15:double, 16:double, 17:double, 19:float, 22:double, 23:double, 24:float, 25:float, 29:double native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 3059 Data size: 557250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3059 Data size: 526290 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs @@ -143,19 +143,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 5, 11, 12, 13] - Statistics: Num rows: 3059 Data size: 557250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3059 Data size: 526290 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -314,7 +314,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: ((cfloat < -6432.0) or ((UDFToDouble(ctimestamp1) <= 12.503D) and (UDFToDouble(ctimestamp2) <> 11.998D) and cstring2 is not null) or ((cdouble = 988888.0D) and cboolean1 is not null)) (type: boolean) - Statistics: Num rows: 12288 Data size: 2983078 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2733998 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -323,7 +323,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterDoubleColLessDoubleScalar(col 4:float, val -6432.0), FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleScalar(col 13:double, val 12.503)(children: CastTimestampToDouble(col 8:timestamp) -> 13:double), FilterDoubleColNotEqualDoubleScalar(col 14:double, val 11.998)(children: CastTimestampToDouble(col 9:timestamp) -> 14:double), SelectColumnIsNotNull(col 7:string)), FilterExprAndExpr(children: FilterDoubleColEqualDoubleScalar(col 5:double, val 988888.0), SelectColumnIsNotNull(col 10:boolean))) predicate: ((cfloat < -6432.0) or ((UDFToDouble(ctimestamp1) <= 12.503D) and (UDFToDouble(ctimestamp2) <> 11.998D) and cstring2 is not null) or ((cdouble = 988888.0D) and cboolean1 is not null)) (type: boolean) - Statistics: Num rows: 3059 Data size: 742850 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3059 Data size: 680930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15D - cdouble) (type: double), (cdouble * -257.0D) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - cfloat) (type: float), (- cfloat) (type: float), ((-5638.15D - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 @@ -332,7 +332,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [8, 5, 10, 6, 4, 15, 16, 17, 19, 22, 23, 24, 25, 29] selectExpressions: DoubleColUnaryMinus(col 5:double) -> 15:double, DoubleScalarSubtractDoubleColumn(val -5638.15, col 5:double) -> 16:double, DoubleColMultiplyDoubleScalar(col 5:double, val -257.0) -> 17:double, DoubleColAddDoubleColumn(col 18:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 18:float) -> 19:float, DoubleColAddDoubleColumn(col 20:double, col 21:double)(children: DoubleColUnaryMinus(col 5:double) -> 20:double, CastLongToDouble(col 3:bigint) -> 21:double) -> 22:double, DoubleColUnaryMinus(col 5:double) -> 23:double, DoubleScalarSubtractDoubleColumn(val -1.3890000581741333, col 4:float) -> 24:float, DoubleColUnaryMinus(col 4:float) -> 25:float, DoubleColAddDoubleColumn(col 26:double, col 28:double)(children: DoubleScalarSubtractDoubleColumn(val -5638.15, col 5:double) -> 26:double, DoubleColAddDoubleColumn(col 27:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 27:float) -> 28:float) -> 29:double - Statistics: Num rows: 3059 Data size: 557250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3059 Data size: 526290 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: float), _col13 (type: double) sort order: ++++++++++++++ @@ -340,7 +340,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 3059 Data size: 557250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3059 Data size: 526290 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs @@ -369,19 +369,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 5, 11, 12, 13] - Statistics: Num rows: 3059 Data size: 557250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3059 Data size: 526290 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorization_9.q.out b/ql/src/test/results/clientpositive/llap/vectorization_9.q.out index e9e8d1f0fe..01b98c3d03 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_9.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_9.q.out @@ -63,7 +63,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: ((cstring2 like '%b%') and ((cdouble >= -1.389D) or (cstring1 < 'a'))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2308074 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2183514 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -73,7 +73,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %b%), FilterExprOrExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5:double, val -1.389), FilterStringGroupColLessStringScalar(col 6:string, val a))) predicate: ((cstring2 like '%b%') and ((cdouble >= -1.389D) or (cstring1 < 'a'))) (type: boolean) - Statistics: Num rows: 6144 Data size: 1154088 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 1091808 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cstring1 (type: string), cdouble (type: double), ctimestamp1 (type: timestamp), (cdouble * cdouble) (type: double) outputColumnNames: _col0, _col1, _col2, _col3 @@ -82,7 +82,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [6, 5, 8, 13] selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 13:double - Statistics: Num rows: 6144 Data size: 1154088 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 1091808 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col1), sum(_col3), sum(_col1), min(_col1) Group By Vectorization: @@ -94,10 +94,10 @@ STAGE PLANS: vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3] keys: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.0 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 3072 Data size: 455172 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3072 Data size: 424052 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) sort order: +++ @@ -108,7 +108,7 @@ STAGE PLANS: native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true valueColumns: 3:bigint, 4:double, 5:double, 6:double - Statistics: Num rows: 3072 Data size: 455172 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3072 Data size: 424052 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double) Execution mode: vectorized, llap LLAP IO: all inputs @@ -156,7 +156,7 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: double), KEY._col2 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1536 Data size: 227586 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3072 Data size: 424052 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp), (_col1 - 9763215.5639D) (type: double), (- (_col1 - 9763215.5639D)) (type: double), _col3 (type: bigint), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), (- power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5)) (type: double), (power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) * UDFToDouble(_col3)) (type: double), _col6 (type: double), (9763215.5639D / _col1) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 @@ -165,13 +165,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1, 2, 7, 9, 3, 17, 26, 36, 6, 37, 39, 47] selectExpressions: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 7:double, DoubleColUnaryMinus(col 8:double)(children: DoubleColSubtractDoubleScalar(col 1:double, val 9763215.5639) -> 8:double) -> 9:double, FuncPowerDoubleToDouble(col 16:double)(children: DoubleColDivideLongColumn(col 12:double, col 15:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 11:double)(children: DoubleColDivideLongColumn(col 10:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 10:double) -> 11:double) -> 12:double, IfExprNullCondExpr(col 13:boolean, null, col 14:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 13:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 14:bigint) -> 15:bigint) -> 16:double) -> 17:double, DoubleColUnaryMinus(col 25:double)(children: FuncPowerDoubleToDouble(col 24:double)(children: DoubleColDivideLongColumn(col 20:double, col 23:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 19:double)(children: DoubleColDivideLongColumn(col 18:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 18:double) -> 19:double) -> 20:double, IfExprNullCondExpr(col 21:boolean, null, col 22:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 21:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 22:bigint) -> 23:bigint) -> 24:double) -> 25:double) -> 26:double, DoubleColMultiplyDoubleColumn(col 34:double, col 35:double)(children: FuncPowerDoubleToDouble(col 33:double)(children: DoubleColDivideLongColumn(col 29:double, col 32:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 28:double)(children: DoubleColDivideLongColumn(col 27:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 27:double) -> 28:double) -> 29:double, IfExprNullCondExpr(col 30:boolean, null, col 31:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 30:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 31:bigint) -> 32:bigint) -> 33:double) -> 34:double, CastLongToDouble(col 3:bigint) -> 35:double) -> 36:double, DoubleScalarDivideDoubleColumn(val 9763215.5639, col 1:double) -> 37:double, DecimalColDivideDecimalScalar(col 38:decimal(19,0), val -1.389)(children: CastLongToDecimal(col 3:bigint) -> 38:decimal(19,0)) -> 39:decimal(28,6), FuncPowerDoubleToDouble(col 46:double)(children: DoubleColDivideLongColumn(col 42:double, col 45:bigint)(children: DoubleColSubtractDoubleColumn(col 4:double, col 41:double)(children: DoubleColDivideLongColumn(col 40:double, col 3:bigint)(children: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 40:double) -> 41:double) -> 42:double, IfExprNullCondExpr(col 43:boolean, null, col 44:bigint)(children: LongColEqualLongScalar(col 3:bigint, val 1) -> 43:boolean, LongColSubtractLongScalar(col 3:bigint, val 1) -> 44:bigint) -> 45:bigint) -> 46:double) -> 47:double - Statistics: Num rows: 1536 Data size: 461058 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3072 Data size: 890996 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 1536 Data size: 461058 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3072 Data size: 890996 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out index 8eb523d6f3..39aaa56c30 100644 --- a/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorization_short_regress.q.out @@ -97,7 +97,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: (((UDFToFloat(csmallint) < cfloat) and (UDFToDouble(ctimestamp2) > -5.0D) and (cdouble <> UDFToDouble(cint))) or ((CAST( cbigint AS decimal(22,3)) <= -1.389) and (cstring2 <> 'a') and (CAST( cint AS decimal(13,3)) <> 79.553) and (cboolean2 <> cboolean1)) or (cbigint = 762L) or (cstring1 = 'a')) (type: boolean) - Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2477130 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -106,7 +106,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 13:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 1:smallint) -> 13:float), FilterDoubleColGreaterDoubleScalar(col 14:double, val -5.0)(children: CastTimestampToDouble(col 9:timestamp) -> 14:double), FilterDoubleColNotEqualDoubleColumn(col 5:double, col 15:double)(children: CastLongToDouble(col 2:int) -> 15:double)), FilterExprAndExpr(children: FilterDecimalColLessEqualDecimalScalar(col 16:decimal(22,3), val -1.389)(children: CastLongToDecimal(col 3:bigint) -> 16:decimal(22,3)), FilterStringGroupColNotEqualStringScalar(col 7:string, val a), FilterDecimalColNotEqualDecimalScalar(col 17:decimal(13,3), val 79.553)(children: CastLongToDecimal(col 2:int) -> 17:decimal(13,3)), FilterLongColNotEqualLongColumn(col 11:boolean, col 10:boolean)), FilterLongColEqualLongScalar(col 3:bigint, val 762), FilterStringGroupColEqualStringScalar(col 6:string, val a)) predicate: (((UDFToFloat(csmallint) < cfloat) and (UDFToDouble(ctimestamp2) > -5.0D) and (cdouble <> UDFToDouble(cint))) or ((CAST( cbigint AS decimal(22,3)) <= -1.389) and (cstring2 <> 'a') and (CAST( cint AS decimal(13,3)) <> 79.553) and (cboolean2 <> cboolean1)) or (cbigint = 762L) or (cstring1 = 'a')) (type: boolean) - Statistics: Num rows: 5465 Data size: 1157230 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5465 Data size: 1101870 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int), cdouble (type: double), csmallint (type: smallint), cfloat (type: float), ctinyint (type: tinyint), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double), UDFToDouble(csmallint) (type: double), (UDFToDouble(csmallint) * UDFToDouble(csmallint)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -115,7 +115,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [2, 5, 1, 4, 0, 18, 21, 22, 25] selectExpressions: CastLongToDouble(col 2:int) -> 18:double, DoubleColMultiplyDoubleColumn(col 19:double, col 20:double)(children: CastLongToDouble(col 2:int) -> 19:double, CastLongToDouble(col 2:int) -> 20:double) -> 21:double, CastLongToDouble(col 1:smallint) -> 22:double, DoubleColMultiplyDoubleColumn(col 23:double, col 24:double)(children: CastLongToDouble(col 1:smallint) -> 23:double, CastLongToDouble(col 1:smallint) -> 24:double) -> 25:double - Statistics: Num rows: 5465 Data size: 1157230 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5465 Data size: 1101870 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), count(_col0), sum(_col1), sum(_col6), sum(_col5), sum(_col8), sum(_col7), count(_col2), sum(_col3), count(_col3), min(_col4) Group By Vectorization: @@ -625,7 +625,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: ((ctimestamp1 = ctimestamp2) or ((UDFToLong(csmallint) <= cbigint) and (cboolean2 = 1)) or ((cstring2 > 'a') and cboolean1 is not null and ctimestamp2 is not null) or (cfloat = 762.0) or (cstring1 = 'ss')) (type: boolean) - Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2844090 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -634,7 +634,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterTimestampColEqualTimestampColumn(col 8:timestamp, col 9:timestamp), FilterExprAndExpr(children: FilterLongColLessEqualLongColumn(col 1:bigint, col 3:bigint)(children: col 1:smallint), FilterLongColEqualLongScalar(col 11:boolean, val 1)), FilterExprAndExpr(children: FilterStringGroupColGreaterStringScalar(col 7:string, val a), SelectColumnIsNotNull(col 10:boolean), SelectColumnIsNotNull(col 9:timestamp)), FilterDoubleColEqualDoubleScalar(col 4:float, val 762.0), FilterStringGroupColEqualStringScalar(col 6:string, val ss)) predicate: ((ctimestamp1 = ctimestamp2) or ((UDFToLong(csmallint) <= cbigint) and (cboolean2 = 1)) or ((cstring2 > 'a') and cboolean1 is not null and ctimestamp2 is not null) or (cfloat = 762.0) or (cstring1 = 'ss')) (type: boolean) - Statistics: Num rows: 11346 Data size: 2856120 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10571 Data size: 2446670 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cbigint (type: bigint), ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cdouble (type: double), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double), UDFToDouble(csmallint) (type: double), (UDFToDouble(csmallint) * UDFToDouble(csmallint)) (type: double), (cdouble * cdouble) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9 @@ -643,7 +643,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3, 0, 1, 2, 5, 13, 16, 17, 20, 21] selectExpressions: CastLongToDouble(col 3:bigint) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 14:double, CastLongToDouble(col 3:bigint) -> 15:double) -> 16:double, CastLongToDouble(col 1:smallint) -> 17:double, DoubleColMultiplyDoubleColumn(col 18:double, col 19:double)(children: CastLongToDouble(col 1:smallint) -> 18:double, CastLongToDouble(col 1:smallint) -> 19:double) -> 20:double, DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 21:double - Statistics: Num rows: 11346 Data size: 2856120 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10571 Data size: 2446670 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col6), sum(_col5), count(_col0), count(), max(_col1), sum(_col8), sum(_col7), count(_col2), max(_col3), sum(_col9), sum(_col4), count(_col4), count(_col1), sum(_col1) Group By Vectorization: @@ -864,7 +864,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: (((csmallint < UDFToShort(ctinyint)) and (UDFToDouble(ctimestamp1) >= 0.0D)) or (cfloat = 17.0) or ((ctimestamp2 <= ctimestamp1) and (UDFToDouble(cbigint) <> cdouble) and (cstring1 >= 'ss'))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2139070 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 1889990 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -873,7 +873,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColLessLongColumn(col 1:smallint, col 0:smallint)(children: col 0:tinyint), FilterDoubleColGreaterEqualDoubleScalar(col 13:double, val 0.0)(children: CastTimestampToDouble(col 8:timestamp) -> 13:double)), FilterDoubleColEqualDoubleScalar(col 4:float, val 17.0), FilterExprAndExpr(children: FilterTimestampColLessEqualTimestampColumn(col 9:timestamp, col 8:timestamp), FilterDoubleColNotEqualDoubleColumn(col 14:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 14:double), FilterStringGroupColGreaterEqualStringScalar(col 6:string, val ss))) predicate: (((csmallint < UDFToShort(ctinyint)) and (UDFToDouble(ctimestamp1) >= 0.0D)) or (cfloat = 17.0) or ((ctimestamp2 <= ctimestamp1) and (UDFToDouble(cbigint) <> cdouble) and (cstring1 >= 'ss'))) (type: boolean) - Statistics: Num rows: 2824 Data size: 491654 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2824 Data size: 434454 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), cbigint (type: bigint), cint (type: int), cfloat (type: float), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 @@ -882,7 +882,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 3, 2, 4, 15, 18, 19, 22] selectExpressions: CastLongToDouble(col 2:int) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 2:int) -> 16:double, CastLongToDouble(col 2:int) -> 17:double) -> 18:double, CastLongToDouble(col 3:bigint) -> 19:double, DoubleColMultiplyDoubleColumn(col 20:double, col 21:double)(children: CastLongToDouble(col 3:bigint) -> 20:double, CastLongToDouble(col 3:bigint) -> 21:double) -> 22:double - Statistics: Num rows: 2824 Data size: 491654 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2824 Data size: 434454 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), count(_col0), max(_col1), sum(_col5), sum(_col4), count(_col2), sum(_col7), sum(_col6), count(_col1), max(_col3) Group By Vectorization: @@ -1111,7 +1111,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: (((cboolean2 <> 1) and (CAST( csmallint AS decimal(8,3)) < 79.553) and (UDFToInteger(ctinyint) <> -257)) or ((cdouble > UDFToDouble(ctinyint)) and (cfloat >= UDFToFloat(cint))) or ((UDFToLong(cint) < cbigint) and (UDFToLong(ctinyint) > cbigint)) or (cstring1 regexp 'a.*' and (cstring2 like '%ss%'))) (type: boolean) - Statistics: Num rows: 12288 Data size: 3056470 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2807390 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -1120,7 +1120,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 11:boolean, val 1), FilterDecimalColLessDecimalScalar(col 13:decimal(8,3), val 79.553)(children: CastLongToDecimal(col 1:smallint) -> 13:decimal(8,3)), FilterLongColNotEqualLongScalar(col 0:int, val -257)(children: col 0:tinyint)), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 5:double, col 14:double)(children: CastLongToDouble(col 0:tinyint) -> 14:double), FilterDoubleColGreaterEqualDoubleColumn(col 4:float, col 15:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 15:float)), FilterExprAndExpr(children: FilterLongColLessLongColumn(col 2:bigint, col 3:bigint)(children: col 2:int), FilterLongColGreaterLongColumn(col 0:bigint, col 3:bigint)(children: col 0:tinyint)), FilterExprAndExpr(children: FilterStringColRegExpStringScalar(col 6:string, pattern a.*), FilterStringColLikeStringScalar(col 7:string, pattern %ss%))) predicate: (((cboolean2 <> 1) and (CAST( csmallint AS decimal(8,3)) < 79.553) and (UDFToInteger(ctinyint) <> -257)) or ((cdouble > UDFToDouble(ctinyint)) and (cfloat >= UDFToFloat(cint))) or ((UDFToLong(cint) < cbigint) and (UDFToLong(ctinyint) > cbigint)) or (cstring1 regexp 'a.*' and (cstring2 like '%ss%'))) (type: boolean) - Statistics: Num rows: 9898 Data size: 2462086 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9898 Data size: 2261486 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int), cdouble (type: double), ctimestamp2 (type: timestamp), cstring1 (type: string), cboolean2 (type: boolean), ctinyint (type: tinyint), cfloat (type: float), ctimestamp1 (type: timestamp), csmallint (type: smallint), cbigint (type: bigint), (-3728L * cbigint) (type: bigint), (- cint) (type: int), (-863.257 - CAST( cint AS decimal(10,0))) (type: decimal(14,3)), (- csmallint) (type: smallint), (csmallint - (- csmallint)) (type: smallint), ((csmallint - (- csmallint)) + (- csmallint)) (type: smallint), (UDFToDouble(cint) / UDFToDouble(cint)) (type: double), ((-863.257 - CAST( cint AS decimal(10,0))) - -26.28) (type: decimal(15,3)), (- cfloat) (type: float), (cdouble * -89010.0D) (type: double), (UDFToDouble(ctinyint) / 988888.0D) (type: double), (- ctinyint) (type: tinyint), (79.553 / CAST( ctinyint AS decimal(3,0))) (type: decimal(9,7)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22 @@ -1129,7 +1129,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [2, 5, 9, 6, 11, 0, 4, 8, 1, 3, 16, 17, 19, 20, 22, 26, 29, 32, 33, 34, 36, 37, 39] selectExpressions: LongScalarMultiplyLongColumn(val -3728, col 3:bigint) -> 16:bigint, LongColUnaryMinus(col 2:int) -> 17:int, DecimalScalarSubtractDecimalColumn(val -863.257, col 18:decimal(10,0))(children: CastLongToDecimal(col 2:int) -> 18:decimal(10,0)) -> 19:decimal(14,3), LongColUnaryMinus(col 1:smallint) -> 20:smallint, LongColSubtractLongColumn(col 1:smallint, col 21:smallint)(children: LongColUnaryMinus(col 1:smallint) -> 21:smallint) -> 22:smallint, LongColAddLongColumn(col 24:smallint, col 25:smallint)(children: LongColSubtractLongColumn(col 1:smallint, col 23:smallint)(children: LongColUnaryMinus(col 1:smallint) -> 23:smallint) -> 24:smallint, LongColUnaryMinus(col 1:smallint) -> 25:smallint) -> 26:smallint, DoubleColDivideDoubleColumn(col 27:double, col 28:double)(children: CastLongToDouble(col 2:int) -> 27:double, CastLongToDouble(col 2:int) -> 28:double) -> 29:double, DecimalColSubtractDecimalScalar(col 31:decimal(14,3), val -26.28)(children: DecimalScalarSubtractDecimalColumn(val -863.257, col 30:decimal(10,0))(children: CastLongToDecimal(col 2:int) -> 30:decimal(10,0)) -> 31:decimal(14,3)) -> 32:decimal(15,3), DoubleColUnaryMinus(col 4:float) -> 33:float, DoubleColMultiplyDoubleScalar(col 5:double, val -89010.0) -> 34:double, DoubleColDivideDoubleScalar(col 35:double, val 988888.0)(children: CastLongToDouble(col 0:tinyint) -> 35:double) -> 36:double, LongColUnaryMinus(col 0:tinyint) -> 37:tinyint, DecimalScalarDivideDecimalColumn(val 79.553, col 38:decimal(3,0))(children: CastLongToDecimal(col 0:tinyint) -> 38:decimal(3,0)) -> 39:decimal(9,7) - Statistics: Num rows: 9898 Data size: 5632662 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9898 Data size: 5432062 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: double), _col2 (type: timestamp), _col3 (type: string), _col4 (type: boolean), _col5 (type: tinyint), _col6 (type: float), _col7 (type: timestamp), _col8 (type: smallint), _col9 (type: bigint), _col10 (type: bigint), _col11 (type: int), _col12 (type: decimal(14,3)), _col13 (type: smallint), _col14 (type: smallint), _col15 (type: smallint), _col16 (type: double), _col17 (type: decimal(15,3)), _col18 (type: float), _col19 (type: double), _col20 (type: double), _col21 (type: tinyint), _col22 (type: decimal(9,7)) sort order: +++++++++++++++++++++++ @@ -1137,7 +1137,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 9898 Data size: 5632662 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9898 Data size: 5432062 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs @@ -1166,19 +1166,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22] - Statistics: Num rows: 9898 Data size: 5632662 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 9898 Data size: 5432062 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 50 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 50 Data size: 28540 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 27580 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 50 Data size: 28540 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 27580 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1413,7 +1413,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: ((cboolean1 < 0) or (cbigint = 359L) or ((UDFToInteger(ctinyint) < 197) and (UDFToLong(cint) = cbigint)) or ((cstring1 like '%ss') and (cfloat <= UDFToFloat(ctinyint)))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2477130 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -1422,7 +1422,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterLongColLessLongScalar(col 10:boolean, val 0), FilterLongColEqualLongScalar(col 3:bigint, val 359), FilterExprAndExpr(children: FilterLongColLessLongScalar(col 0:int, val 197)(children: col 0:tinyint), FilterLongColEqualLongColumn(col 2:bigint, col 3:bigint)(children: col 2:int)), FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 6:string, pattern %ss), FilterDoubleColLessEqualDoubleColumn(col 4:float, col 13:float)(children: CastLongToFloatViaLongToDouble(col 0:tinyint) -> 13:float))) predicate: ((cboolean1 < 0) or (cbigint = 359L) or ((UDFToInteger(ctinyint) < 197) and (UDFToLong(cint) = cbigint)) or ((cstring1 like '%ss') and (cfloat <= UDFToFloat(ctinyint)))) (type: boolean) - Statistics: Num rows: 8194 Data size: 1734900 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8194 Data size: 1651860 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int), cbigint (type: bigint), cstring1 (type: string), cboolean1 (type: boolean), cfloat (type: float), cdouble (type: double), ctimestamp2 (type: timestamp), csmallint (type: smallint), cstring2 (type: string), cboolean2 (type: boolean), (UDFToDouble(cint) / UDFToDouble(cbigint)) (type: double), (CAST( cbigint AS decimal(19,0)) % 79.553) (type: decimal(5,3)), (- (UDFToDouble(cint) / UDFToDouble(cbigint))) (type: double), (10.175 % cfloat) (type: float), (- cfloat) (type: float), (cfloat - (- cfloat)) (type: float), ((cfloat - (- cfloat)) % -6432.0) (type: float), (cdouble * UDFToDouble(csmallint)) (type: double), (- cdouble) (type: double), (- cbigint) (type: bigint), (UDFToDouble(cfloat) - (UDFToDouble(cint) / UDFToDouble(cbigint))) (type: double), (- csmallint) (type: smallint), (3569L % cbigint) (type: bigint), (359.0D - cdouble) (type: double), (- csmallint) (type: smallint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24 @@ -1431,7 +1431,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [2, 3, 6, 10, 4, 5, 9, 1, 7, 11, 16, 18, 22, 23, 24, 26, 29, 31, 32, 33, 37, 38, 39, 40, 41] selectExpressions: DoubleColDivideDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 2:int) -> 14:double, CastLongToDouble(col 3:bigint) -> 15:double) -> 16:double, DecimalColModuloDecimalScalar(col 17:decimal(19,0), val 79.553)(children: CastLongToDecimal(col 3:bigint) -> 17:decimal(19,0)) -> 18:decimal(5,3), DoubleColUnaryMinus(col 21:double)(children: DoubleColDivideDoubleColumn(col 19:double, col 20:double)(children: CastLongToDouble(col 2:int) -> 19:double, CastLongToDouble(col 3:bigint) -> 20:double) -> 21:double) -> 22:double, DoubleScalarModuloDoubleColumn(val 10.175000190734863, col 4:float) -> 23:float, DoubleColUnaryMinus(col 4:float) -> 24:float, DoubleColSubtractDoubleColumn(col 4:float, col 25:float)(children: DoubleColUnaryMinus(col 4:float) -> 25:float) -> 26:float, DoubleColModuloDoubleScalar(col 28:float, val -6432.0)(children: DoubleColSubtractDoubleColumn(col 4:float, col 27:float)(children: DoubleColUnaryMinus(col 4:float) -> 27:float) -> 28:float) -> 29:float, DoubleColMultiplyDoubleColumn(col 5:double, col 30:double)(children: CastLongToDouble(col 1:smallint) -> 30:double) -> 31:double, DoubleColUnaryMinus(col 5:double) -> 32:double, LongColUnaryMinus(col 3:bigint) -> 33:bigint, DoubleColSubtractDoubleColumn(col 4:double, col 36:double)(children: col 4:float, DoubleColDivideDoubleColumn(col 34:double, col 35:double)(children: CastLongToDouble(col 2:int) -> 34:double, CastLongToDouble(col 3:bigint) -> 35:double) -> 36:double) -> 37:double, LongColUnaryMinus(col 1:smallint) -> 38:smallint, LongScalarModuloLongColumn(val 3569, col 3:bigint) -> 39:bigint, DoubleScalarSubtractDoubleColumn(val 359.0, col 5:double) -> 40:double, LongColUnaryMinus(col 1:smallint) -> 41:smallint - Statistics: Num rows: 8194 Data size: 3349228 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8194 Data size: 3266188 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: int), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean), _col4 (type: float), _col5 (type: double), _col6 (type: timestamp), _col7 (type: smallint), _col8 (type: string), _col9 (type: boolean), _col10 (type: double), _col11 (type: decimal(5,3)), _col12 (type: double), _col13 (type: float), _col14 (type: float), _col15 (type: float), _col16 (type: float), _col17 (type: double), _col18 (type: double), _col19 (type: bigint), _col20 (type: double), _col21 (type: smallint), _col22 (type: bigint), _col23 (type: double), _col24 (type: smallint) sort order: +++++++++++++++++++++++++ @@ -1439,7 +1439,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 8194 Data size: 3349228 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8194 Data size: 3266188 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized, llap LLAP IO: all inputs @@ -1468,19 +1468,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 21] - Statistics: Num rows: 8194 Data size: 3349228 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 8194 Data size: 3266188 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 25 Data size: 10520 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 10320 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 25 Data size: 10520 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 10320 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1664,7 +1664,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: (((UDFToFloat(cbigint) <= cfloat) and (CAST( csmallint AS decimal(7,2)) >= -26.28)) or ((cdouble <= UDFToDouble(cbigint)) and (cstring1 >= 'ss') and (UDFToDouble(cint) <> cdouble)) or (UDFToInteger(ctinyint) = -89010) or ((CAST( csmallint AS decimal(7,2)) > -26.28) and (cstring2 like 'ss'))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2477130 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -1673,7 +1673,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 13:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 3:bigint) -> 13:float), FilterDecimalColGreaterEqualDecimalScalar(col 14:decimal(7,2), val -26.28)(children: CastLongToDecimal(col 1:smallint) -> 14:decimal(7,2))), FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 5:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 15:double), FilterStringGroupColGreaterEqualStringScalar(col 6:string, val ss), FilterDoubleColNotEqualDoubleColumn(col 16:double, col 5:double)(children: CastLongToDouble(col 2:int) -> 16:double)), FilterLongColEqualLongScalar(col 0:int, val -89010)(children: col 0:tinyint), FilterExprAndExpr(children: FilterDecimalColGreaterDecimalScalar(col 17:decimal(7,2), val -26.28)(children: CastLongToDecimal(col 1:smallint) -> 17:decimal(7,2)), FilterStringColLikeStringScalar(col 7:string, pattern ss))) predicate: (((UDFToFloat(cbigint) <= cfloat) and (CAST( csmallint AS decimal(7,2)) >= -26.28)) or ((cdouble <= UDFToDouble(cbigint)) and (cstring1 >= 'ss') and (UDFToDouble(cint) <> cdouble)) or (UDFToInteger(ctinyint) = -89010) or ((CAST( csmallint AS decimal(7,2)) > -26.28) and (cstring2 like 'ss'))) (type: boolean) - Statistics: Num rows: 10922 Data size: 2312410 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10922 Data size: 2201730 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int), cstring1 (type: string), cboolean2 (type: boolean), ctimestamp2 (type: timestamp), cdouble (type: double), cfloat (type: float), cbigint (type: bigint), csmallint (type: smallint), cboolean1 (type: boolean), (cint + UDFToInteger(csmallint)) (type: int), (cbigint - UDFToLong(ctinyint)) (type: bigint), (- cbigint) (type: bigint), (- cfloat) (type: float), ((cbigint - UDFToLong(ctinyint)) + cbigint) (type: bigint), (cdouble / cdouble) (type: double), (- cdouble) (type: double), (UDFToLong((cint + UDFToInteger(csmallint))) * (- cbigint)) (type: bigint), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (-1.389 / CAST( ctinyint AS decimal(3,0))) (type: decimal(8,7)), (UDFToDouble(cbigint) % cdouble) (type: double), (- csmallint) (type: smallint), (UDFToInteger(csmallint) + (cint + UDFToInteger(csmallint))) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 @@ -1682,7 +1682,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [2, 6, 11, 9, 5, 4, 3, 1, 10, 18, 19, 20, 21, 23, 24, 25, 28, 31, 33, 35, 36, 38] selectExpressions: LongColAddLongColumn(col 2:int, col 1:int)(children: col 1:smallint) -> 18:int, LongColSubtractLongColumn(col 3:bigint, col 0:bigint)(children: col 0:tinyint) -> 19:bigint, LongColUnaryMinus(col 3:bigint) -> 20:bigint, DoubleColUnaryMinus(col 4:float) -> 21:float, LongColAddLongColumn(col 22:bigint, col 3:bigint)(children: LongColSubtractLongColumn(col 3:bigint, col 0:bigint)(children: col 0:tinyint) -> 22:bigint) -> 23:bigint, DoubleColDivideDoubleColumn(col 5:double, col 5:double) -> 24:double, DoubleColUnaryMinus(col 5:double) -> 25:double, LongColMultiplyLongColumn(col 26:bigint, col 27:bigint)(children: LongColAddLongColumn(col 2:int, col 1:int)(children: col 1:smallint) -> 26:int, LongColUnaryMinus(col 3:bigint) -> 27:bigint) -> 28:bigint, DoubleColAddDoubleColumn(col 29:double, col 30:double)(children: DoubleColUnaryMinus(col 5:double) -> 29:double, CastLongToDouble(col 3:bigint) -> 30:double) -> 31:double, DecimalScalarDivideDecimalColumn(val -1.389, col 32:decimal(3,0))(children: CastLongToDecimal(col 0:tinyint) -> 32:decimal(3,0)) -> 33:decimal(8,7), DoubleColModuloDoubleColumn(col 34:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 34:double) -> 35:double, LongColUnaryMinus(col 1:smallint) -> 36:smallint, LongColAddLongColumn(col 1:int, col 37:int)(children: col 1:smallint, LongColAddLongColumn(col 2:int, col 1:int)(children: col 1:smallint) -> 37:int) -> 38:int - Statistics: Num rows: 10922 Data size: 3594034 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10922 Data size: 3483354 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col8 (type: boolean), _col1 (type: string), _col3 (type: timestamp), _col5 (type: float), _col6 (type: bigint), _col4 (type: double), _col0 (type: int), _col7 (type: smallint), _col9 (type: int), _col10 (type: bigint), _col11 (type: bigint), _col12 (type: float), _col13 (type: bigint), _col14 (type: double), _col15 (type: double), _col16 (type: bigint), _col17 (type: double), _col18 (type: decimal(8,7)), _col19 (type: double), _col20 (type: smallint), _col21 (type: int) sort order: +++++++++++++++++++++ @@ -1690,7 +1690,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 10922 Data size: 3594034 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10922 Data size: 3483354 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col2 (type: boolean) Execution mode: vectorized, llap @@ -1720,19 +1720,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [6, 1, 21, 2, 5, 3, 4, 7, 0, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20] - Statistics: Num rows: 10922 Data size: 3594034 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10922 Data size: 3483354 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 75 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 75 Data size: 24810 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 75 Data size: 24090 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 75 Data size: 24810 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 75 Data size: 24090 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -1973,7 +1973,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: (((cdouble >= UDFToDouble(cfloat)) and (cstring2 <= 'a')) or ((CAST( cint AS decimal(13,3)) <= -1.389) and (csmallint < UDFToShort(ctinyint)) and (UDFToInteger(csmallint) < -6432)) or ((cstring1 like 'ss%') and (CAST( cbigint AS decimal(22,3)) < 10.175))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2528254 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2403694 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -1982,7 +1982,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColGreaterEqualDoubleColumn(col 5:double, col 4:double)(children: col 4:float), FilterStringGroupColLessEqualStringScalar(col 7:string, val a)), FilterExprAndExpr(children: FilterDecimalColLessEqualDecimalScalar(col 13:decimal(13,3), val -1.389)(children: CastLongToDecimal(col 2:int) -> 13:decimal(13,3)), FilterLongColLessLongColumn(col 1:smallint, col 0:smallint)(children: col 0:tinyint), FilterLongColLessLongScalar(col 1:int, val -6432)(children: col 1:smallint)), FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 6:string, pattern ss%), FilterDecimalColLessDecimalScalar(col 14:decimal(22,3), val 10.175)(children: CastLongToDecimal(col 3:bigint) -> 14:decimal(22,3)))) predicate: (((cdouble >= UDFToDouble(cfloat)) and (cstring2 <= 'a')) or ((CAST( cint AS decimal(13,3)) <= -1.389) and (csmallint < UDFToShort(ctinyint)) and (UDFToInteger(csmallint) < -6432)) or ((cstring1 like 'ss%') and (CAST( cbigint AS decimal(22,3)) < 10.175))) (type: boolean) - Statistics: Num rows: 3868 Data size: 795962 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3868 Data size: 756762 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctimestamp1 (type: timestamp), cstring2 (type: string), cdouble (type: double), cfloat (type: float), cbigint (type: bigint), csmallint (type: smallint), (UDFToDouble(cbigint) / 3569.0D) (type: double), (-257 - UDFToInteger(csmallint)) (type: int), (-6432.0 * cfloat) (type: float), (- cdouble) (type: double), (cdouble * 10.175D) (type: double), (UDFToDouble((-6432.0 * cfloat)) / UDFToDouble(cfloat)) (type: double), (- cfloat) (type: float), (cint % UDFToInteger(csmallint)) (type: int), (- cdouble) (type: double), (cdouble * (- cdouble)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15 @@ -1991,7 +1991,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [8, 7, 5, 4, 3, 1, 16, 17, 18, 19, 20, 22, 23, 24, 25, 27] selectExpressions: DoubleColDivideDoubleScalar(col 15:double, val 3569.0)(children: CastLongToDouble(col 3:bigint) -> 15:double) -> 16:double, LongScalarSubtractLongColumn(val -257, col 1:int)(children: col 1:smallint) -> 17:int, DoubleScalarMultiplyDoubleColumn(val -6432.0, col 4:float) -> 18:float, DoubleColUnaryMinus(col 5:double) -> 19:double, DoubleColMultiplyDoubleScalar(col 5:double, val 10.175) -> 20:double, DoubleColDivideDoubleColumn(col 21:double, col 4:double)(children: DoubleScalarMultiplyDoubleColumn(val -6432.0, col 4:float) -> 21:float, col 4:float) -> 22:double, DoubleColUnaryMinus(col 4:float) -> 23:float, LongColModuloLongColumn(col 2:int, col 1:int)(children: col 1:smallint) -> 24:int, DoubleColUnaryMinus(col 5:double) -> 25:double, DoubleColMultiplyDoubleColumn(col 5:double, col 26:double)(children: DoubleColUnaryMinus(col 5:double) -> 26:double) -> 27:double - Statistics: Num rows: 3868 Data size: 748844 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3868 Data size: 709644 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col5 (type: smallint), _col1 (type: string), _col2 (type: double), _col3 (type: float), _col4 (type: bigint), _col6 (type: double), _col7 (type: int), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: float), _col13 (type: int), _col14 (type: double), _col15 (type: double) sort order: +++++++++++++++ @@ -1999,7 +1999,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 3868 Data size: 748844 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3868 Data size: 709644 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 value expressions: _col0 (type: timestamp) Execution mode: vectorized, llap @@ -2029,19 +2029,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [15, 1, 2, 3, 4, 0, 5, 6, 7, 8, 9, 10, 11, 12, 8, 14] - Statistics: Num rows: 3868 Data size: 748844 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3868 Data size: 709644 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 45 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 45 Data size: 8880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 45 Data size: 8480 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 45 Data size: 8880 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 45 Data size: 8480 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -2834,7 +2834,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: ((((UDFToInteger(ctinyint) <> -257) and cstring1 regexp '.*ss' and (UDFToDouble(ctimestamp1) > -3.0D) and cboolean2 is not null) or (UDFToDouble(ctimestamp2) = -5.0D) or ((UDFToDouble(ctimestamp1) < 0.0D) and (cstring2 like '%b%')) or (cdouble = UDFToDouble(cint)) or (cboolean1 is null and (cfloat < UDFToFloat(cint)))) and (UDFToDouble(ctimestamp1) <> 0.0D)) (type: boolean) - Statistics: Num rows: 12288 Data size: 3019778 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2770698 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -2843,7 +2843,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterLongColNotEqualLongScalar(col 0:int, val -257)(children: col 0:tinyint), FilterStringColRegExpStringScalar(col 6:string, pattern .*ss), FilterDoubleColGreaterDoubleScalar(col 13:double, val -3.0)(children: CastTimestampToDouble(col 8:timestamp) -> 13:double), SelectColumnIsNotNull(col 11:boolean)), FilterDoubleColEqualDoubleScalar(col 14:double, val -5.0)(children: CastTimestampToDouble(col 9:timestamp) -> 14:double), FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 15:double, val 0.0)(children: CastTimestampToDouble(col 8:timestamp) -> 15:double), FilterStringColLikeStringScalar(col 7:string, pattern %b%)), FilterDoubleColEqualDoubleColumn(col 5:double, col 16:double)(children: CastLongToDouble(col 2:int) -> 16:double), FilterExprAndExpr(children: SelectColumnIsNull(col 10:boolean), FilterDoubleColLessDoubleColumn(col 4:float, col 17:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 17:float))), FilterDoubleColNotEqualDoubleScalar(col 18:double, val 0.0)(children: CastTimestampToDouble(col 8:timestamp) -> 18:double)) predicate: ((((UDFToInteger(ctinyint) <> -257) and cstring1 regexp '.*ss' and (UDFToDouble(ctimestamp1) > -3.0D) and cboolean2 is not null) or (UDFToDouble(ctimestamp2) = -5.0D) or ((UDFToDouble(ctimestamp1) < 0.0D) and (cstring2 like '%b%')) or (cdouble = UDFToDouble(cint)) or (cboolean1 is null and (cfloat < UDFToFloat(cint)))) and (UDFToDouble(ctimestamp1) <> 0.0D)) (type: boolean) - Statistics: Num rows: 12288 Data size: 3019778 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2770698 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctimestamp1 (type: timestamp), cstring1 (type: string), cint (type: int), csmallint (type: smallint), ctinyint (type: tinyint), cfloat (type: float), cdouble (type: double), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double), UDFToDouble(csmallint) (type: double), (UDFToDouble(csmallint) * UDFToDouble(csmallint)) (type: double), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -2852,7 +2852,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [8, 6, 2, 1, 0, 4, 5, 19, 22, 23, 26, 4, 27, 28, 31] selectExpressions: CastLongToDouble(col 2:int) -> 19:double, DoubleColMultiplyDoubleColumn(col 20:double, col 21:double)(children: CastLongToDouble(col 2:int) -> 20:double, CastLongToDouble(col 2:int) -> 21:double) -> 22:double, CastLongToDouble(col 1:smallint) -> 23:double, DoubleColMultiplyDoubleColumn(col 24:double, col 25:double)(children: CastLongToDouble(col 1:smallint) -> 24:double, CastLongToDouble(col 1:smallint) -> 25:double) -> 26:double, DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 27:double, CastLongToDouble(col 0:tinyint) -> 28:double, DoubleColMultiplyDoubleColumn(col 29:double, col 30:double)(children: CastLongToDouble(col 0:tinyint) -> 29:double, CastLongToDouble(col 0:tinyint) -> 30:double) -> 31:double - Statistics: Num rows: 12288 Data size: 3019778 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2770698 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col8), sum(_col7), count(_col2), sum(_col3), count(_col3), count(), min(_col4), sum(_col10), sum(_col9), sum(_col12), sum(_col11), count(_col5), sum(_col2), sum(_col5), min(_col6), sum(_col14), sum(_col13), count(_col4) Group By Vectorization: @@ -2864,10 +2864,10 @@ STAGE PLANS: vectorProcessingMode: HASH projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17] keys: _col0 (type: timestamp), _col1 (type: string) - minReductionHashAggr: 0.99 + minReductionHashAggr: 0.5133463 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 - Statistics: Num rows: 6144 Data size: 1537192 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5980 Data size: 1435604 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: string) sort order: ++ @@ -2876,7 +2876,7 @@ STAGE PLANS: className: VectorReduceSinkMultiKeyOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 6144 Data size: 1537192 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5980 Data size: 1435604 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col2 (type: double), _col3 (type: double), _col4 (type: bigint), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: tinyint), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: bigint), _col14 (type: bigint), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: bigint) Execution mode: vectorized, llap LLAP IO: all inputs @@ -2911,7 +2911,7 @@ STAGE PLANS: keys: KEY._col0 (type: timestamp), KEY._col1 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19 - Statistics: Num rows: 3072 Data size: 768596 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5980 Data size: 1435604 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: timestamp), _col1 (type: string), power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5) (type: double), (power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5) * 10.175D) (type: double), (- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) (type: double), (UDFToDouble(_col5) / _col6) (type: double), (- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) (type: double), (-26.28D - power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) (type: double), _col7 (type: bigint), (- _col7) (type: bigint), ((-26.28D - power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) * (- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5))) (type: double), _col8 (type: tinyint), (((-26.28D - power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) * (- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5))) * UDFToDouble((- _col7))) (type: double), (- (power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5) * 10.175D)) (type: double), ((_col9 - ((_col10 * _col10) / _col6)) / CASE WHEN ((_col6 = 1L)) THEN (null) ELSE ((_col6 - 1)) END) (type: double), (((_col9 - ((_col10 * _col10) / _col6)) / CASE WHEN ((_col6 = 1L)) THEN (null) ELSE ((_col6 - 1)) END) + (((-26.28D - power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) * (- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5))) * UDFToDouble((- _col7)))) (type: double), (- (- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5))) (type: double), (UDFToDouble((- _col7)) / power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) (type: double), ((_col11 - ((_col12 * _col12) / _col13)) / _col13) (type: double), (10.175D / (UDFToDouble(_col5) / _col6)) (type: double), (UDFToDouble(_col14) / _col4) (type: double), ((_col11 - ((_col12 * _col12) / _col13)) / CASE WHEN ((_col13 = 1L)) THEN (null) ELSE ((_col13 - 1)) END) (type: double), ((((_col9 - ((_col10 * _col10) / _col6)) / CASE WHEN ((_col6 = 1L)) THEN (null) ELSE ((_col6 - 1)) END) + (((-26.28D - power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) * (- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5))) * UDFToDouble((- _col7)))) - (((-26.28D - power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) * (- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5))) * UDFToDouble((- _col7)))) (type: double), (- (- (power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5) * 10.175D))) (type: double), (_col15 / _col13) (type: double), (((((_col9 - ((_col10 * _col10) / _col6)) / CASE WHEN ((_col6 = 1L)) THEN (null) ELSE ((_col6 - 1)) END) + (((-26.28D - power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) * (- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5))) * UDFToDouble((- _col7)))) - (((-26.28D - power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) * (- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5))) * UDFToDouble((- _col7)))) * 10.175D) (type: double), (10.175D % (10.175D / (UDFToDouble(_col5) / _col6))) (type: double), (- _col8) (type: tinyint), _col16 (type: double), ((_col9 - ((_col10 * _col10) / _col6)) / _col6) (type: double), (- ((-26.28D - power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) * (- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)))) (type: double), ((- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) % (_col15 / _col13)) (type: double), (-26.28 / CAST( (- _col8) AS decimal(3,0))) (type: decimal(8,6)), power(((_col17 - ((_col18 * _col18) / _col19)) / _col19), 0.5) (type: double), _col14 (type: bigint), ((((_col9 - ((_col10 * _col10) / _col6)) / CASE WHEN ((_col6 = 1L)) THEN (null) ELSE ((_col6 - 1)) END) + (((-26.28D - power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) * (- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5))) * UDFToDouble((- _col7)))) / ((_col11 - ((_col12 * _col12) / _col13)) / _col13)) (type: double), (- (- _col7)) (type: bigint), _col7 (type: bigint), ((((_col9 - ((_col10 * _col10) / _col6)) / CASE WHEN ((_col6 = 1L)) THEN (null) ELSE ((_col6 - 1)) END) + (((-26.28D - power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5)) * (- power(((_col2 - ((_col3 * _col3) / _col4)) / _col4), 0.5))) * UDFToDouble((- _col7)))) % -26.28D) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38 @@ -2920,7 +2920,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [0, 1, 24, 30, 36, 38, 44, 50, 7, 51, 64, 8, 80, 87, 94, 118, 125, 133, 137, 140, 142, 149, 190, 198, 199, 241, 245, 246, 16, 250, 264, 272, 275, 280, 14, 309, 311, 7, 336] selectExpressions: FuncPowerDoubleToDouble(col 23:double)(children: DoubleColDivideLongColumn(col 22:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 21:double)(children: DoubleColDivideLongColumn(col 20:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 20:double) -> 21:double) -> 22:double) -> 23:double) -> 24:double, DoubleColMultiplyDoubleScalar(col 29:double, val 10.175)(children: FuncPowerDoubleToDouble(col 28:double)(children: DoubleColDivideLongColumn(col 27:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 26:double)(children: DoubleColDivideLongColumn(col 25:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 25:double) -> 26:double) -> 27:double) -> 28:double) -> 29:double) -> 30:double, DoubleColUnaryMinus(col 35:double)(children: FuncPowerDoubleToDouble(col 34:double)(children: DoubleColDivideLongColumn(col 33:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 32:double)(children: DoubleColDivideLongColumn(col 31:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 31:double) -> 32:double) -> 33:double) -> 34:double) -> 35:double) -> 36:double, DoubleColDivideLongColumn(col 37:double, col 6:bigint)(children: CastLongToDouble(col 5:bigint) -> 37:double) -> 38:double, DoubleColUnaryMinus(col 43:double)(children: FuncPowerDoubleToDouble(col 42:double)(children: DoubleColDivideLongColumn(col 41:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 40:double)(children: DoubleColDivideLongColumn(col 39:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 39:double) -> 40:double) -> 41:double) -> 42:double) -> 43:double) -> 44:double, DoubleScalarSubtractDoubleColumn(val -26.28, col 49:double)(children: FuncPowerDoubleToDouble(col 48:double)(children: DoubleColDivideLongColumn(col 47:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 46:double)(children: DoubleColDivideLongColumn(col 45:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 45:double) -> 46:double) -> 47:double) -> 48:double) -> 49:double) -> 50:double, LongColUnaryMinus(col 7:bigint) -> 51:bigint, DoubleColMultiplyDoubleColumn(col 57:double, col 63:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 56:double)(children: FuncPowerDoubleToDouble(col 55:double)(children: DoubleColDivideLongColumn(col 54:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 53:double)(children: DoubleColDivideLongColumn(col 52:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 52:double) -> 53:double) -> 54:double) -> 55:double) -> 56:double) -> 57:double, DoubleColUnaryMinus(col 62:double)(children: FuncPowerDoubleToDouble(col 61:double)(children: DoubleColDivideLongColumn(col 60:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 59:double)(children: DoubleColDivideLongColumn(col 58:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 58:double) -> 59:double) -> 60:double) -> 61:double) -> 62:double) -> 63:double) -> 64:double, DoubleColMultiplyDoubleColumn(col 77:double, col 79:double)(children: DoubleColMultiplyDoubleColumn(col 70:double, col 76:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 69:double)(children: FuncPowerDoubleToDouble(col 68:double)(children: DoubleColDivideLongColumn(col 67:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 66:double)(children: DoubleColDivideLongColumn(col 65:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 65:double) -> 66:double) -> 67:double) -> 68:double) -> 69:double) -> 70:double, DoubleColUnaryMinus(col 75:double)(children: FuncPowerDoubleToDouble(col 74:double)(children: DoubleColDivideLongColumn(col 73:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 72:double)(children: DoubleColDivideLongColumn(col 71:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 71:double) -> 72:double) -> 73:double) -> 74:double) -> 75:double) -> 76:double) -> 77:double, CastLongToDouble(col 78:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 78:bigint) -> 79:double) -> 80:double, DoubleColUnaryMinus(col 86:double)(children: DoubleColMultiplyDoubleScalar(col 85:double, val 10.175)(children: FuncPowerDoubleToDouble(col 84:double)(children: DoubleColDivideLongColumn(col 83:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 82:double)(children: DoubleColDivideLongColumn(col 81:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 81:double) -> 82:double) -> 83:double) -> 84:double) -> 85:double) -> 86:double) -> 87:double, DoubleColDivideLongColumn(col 90:double, col 93:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 89:double)(children: DoubleColDivideLongColumn(col 88:double, col 6:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 88:double) -> 89:double) -> 90:double, IfExprNullCondExpr(col 91:boolean, null, col 92:bigint)(children: LongColEqualLongScalar(col 6:bigint, val 1) -> 91:boolean, LongColSubtractLongScalar(col 6:bigint, val 1) -> 92:bigint) -> 93:bigint) -> 94:double, DoubleColAddDoubleColumn(col 101:double, col 117:double)(children: DoubleColDivideLongColumn(col 97:double, col 100:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 96:double)(children: DoubleColDivideLongColumn(col 95:double, col 6:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 95:double) -> 96:double) -> 97:double, IfExprNullCondExpr(col 98:boolean, null, col 99:bigint)(children: LongColEqualLongScalar(col 6:bigint, val 1) -> 98:boolean, LongColSubtractLongScalar(col 6:bigint, val 1) -> 99:bigint) -> 100:bigint) -> 101:double, DoubleColMultiplyDoubleColumn(col 114:double, col 116:double)(children: DoubleColMultiplyDoubleColumn(col 107:double, col 113:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 106:double)(children: FuncPowerDoubleToDouble(col 105:double)(children: DoubleColDivideLongColumn(col 104:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 103:double)(children: DoubleColDivideLongColumn(col 102:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 102:double) -> 103:double) -> 104:double) -> 105:double) -> 106:double) -> 107:double, DoubleColUnaryMinus(col 112:double)(children: FuncPowerDoubleToDouble(col 111:double)(children: DoubleColDivideLongColumn(col 110:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 109:double)(children: DoubleColDivideLongColumn(col 108:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 108:double) -> 109:double) -> 110:double) -> 111:double) -> 112:double) -> 113:double) -> 114:double, CastLongToDouble(col 115:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 115:bigint) -> 116:double) -> 117:double) -> 118:double, DoubleColUnaryMinus(col 124:double)(children: DoubleColUnaryMinus(col 123:double)(children: FuncPowerDoubleToDouble(col 122:double)(children: DoubleColDivideLongColumn(col 121:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 120:double)(children: DoubleColDivideLongColumn(col 119:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 119:double) -> 120:double) -> 121:double) -> 122:double) -> 123:double) -> 124:double) -> 125:double, DoubleColDivideDoubleColumn(col 127:double, col 132:double)(children: CastLongToDouble(col 126:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 126:bigint) -> 127:double, FuncPowerDoubleToDouble(col 131:double)(children: DoubleColDivideLongColumn(col 130:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 129:double)(children: DoubleColDivideLongColumn(col 128:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 128:double) -> 129:double) -> 130:double) -> 131:double) -> 132:double) -> 133:double, DoubleColDivideLongColumn(col 136:double, col 13:bigint)(children: DoubleColSubtractDoubleColumn(col 11:double, col 135:double)(children: DoubleColDivideLongColumn(col 134:double, col 13:bigint)(children: DoubleColMultiplyDoubleColumn(col 12:double, col 12:double) -> 134:double) -> 135:double) -> 136:double) -> 137:double, DoubleScalarDivideDoubleColumn(val 10.175, col 139:double)(children: DoubleColDivideLongColumn(col 138:double, col 6:bigint)(children: CastLongToDouble(col 5:bigint) -> 138:double) -> 139:double) -> 140:double, DoubleColDivideLongColumn(col 141:double, col 4:bigint)(children: CastLongToDouble(col 14:bigint) -> 141:double) -> 142:double, DoubleColDivideLongColumn(col 145:double, col 148:bigint)(children: DoubleColSubtractDoubleColumn(col 11:double, col 144:double)(children: DoubleColDivideLongColumn(col 143:double, col 13:bigint)(children: DoubleColMultiplyDoubleColumn(col 12:double, col 12:double) -> 143:double) -> 144:double) -> 145:double, IfExprNullCondExpr(col 146:boolean, null, col 147:bigint)(children: LongColEqualLongScalar(col 13:bigint, val 1) -> 146:boolean, LongColSubtractLongScalar(col 13:bigint, val 1) -> 147:bigint) -> 148:bigint) -> 149:double, DoubleColSubtractDoubleColumn(col 173:double, col 189:double)(children: DoubleColAddDoubleColumn(col 156:double, col 172:double)(children: DoubleColDivideLongColumn(col 152:double, col 155:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 151:double)(children: DoubleColDivideLongColumn(col 150:double, col 6:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 150:double) -> 151:double) -> 152:double, IfExprNullCondExpr(col 153:boolean, null, col 154:bigint)(children: LongColEqualLongScalar(col 6:bigint, val 1) -> 153:boolean, LongColSubtractLongScalar(col 6:bigint, val 1) -> 154:bigint) -> 155:bigint) -> 156:double, DoubleColMultiplyDoubleColumn(col 169:double, col 171:double)(children: DoubleColMultiplyDoubleColumn(col 162:double, col 168:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 161:double)(children: FuncPowerDoubleToDouble(col 160:double)(children: DoubleColDivideLongColumn(col 159:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 158:double)(children: DoubleColDivideLongColumn(col 157:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 157:double) -> 158:double) -> 159:double) -> 160:double) -> 161:double) -> 162:double, DoubleColUnaryMinus(col 167:double)(children: FuncPowerDoubleToDouble(col 166:double)(children: DoubleColDivideLongColumn(col 165:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 164:double)(children: DoubleColDivideLongColumn(col 163:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 163:double) -> 164:double) -> 165:double) -> 166:double) -> 167:double) -> 168:double) -> 169:double, CastLongToDouble(col 170:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 170:bigint) -> 171:double) -> 172:double) -> 173:double, DoubleColMultiplyDoubleColumn(col 186:double, col 188:double)(children: DoubleColMultiplyDoubleColumn(col 179:double, col 185:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 178:double)(children: FuncPowerDoubleToDouble(col 177:double)(children: DoubleColDivideLongColumn(col 176:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 175:double)(children: DoubleColDivideLongColumn(col 174:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 174:double) -> 175:double) -> 176:double) -> 177:double) -> 178:double) -> 179:double, DoubleColUnaryMinus(col 184:double)(children: FuncPowerDoubleToDouble(col 183:double)(children: DoubleColDivideLongColumn(col 182:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 181:double)(children: DoubleColDivideLongColumn(col 180:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 180:double) -> 181:double) -> 182:double) -> 183:double) -> 184:double) -> 185:double) -> 186:double, CastLongToDouble(col 187:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 187:bigint) -> 188:double) -> 189:double) -> 190:double, DoubleColUnaryMinus(col 197:double)(children: DoubleColUnaryMinus(col 196:double)(children: DoubleColMultiplyDoubleScalar(col 195:double, val 10.175)(children: FuncPowerDoubleToDouble(col 194:double)(children: DoubleColDivideLongColumn(col 193:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 192:double)(children: DoubleColDivideLongColumn(col 191:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 191:double) -> 192:double) -> 193:double) -> 194:double) -> 195:double) -> 196:double) -> 197:double) -> 198:double, DoubleColDivideLongColumn(col 15:double, col 13:bigint) -> 199:double, DoubleColMultiplyDoubleScalar(col 240:double, val 10.175)(children: DoubleColSubtractDoubleColumn(col 223:double, col 239:double)(children: DoubleColAddDoubleColumn(col 206:double, col 222:double)(children: DoubleColDivideLongColumn(col 202:double, col 205:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 201:double)(children: DoubleColDivideLongColumn(col 200:double, col 6:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 200:double) -> 201:double) -> 202:double, IfExprNullCondExpr(col 203:boolean, null, col 204:bigint)(children: LongColEqualLongScalar(col 6:bigint, val 1) -> 203:boolean, LongColSubtractLongScalar(col 6:bigint, val 1) -> 204:bigint) -> 205:bigint) -> 206:double, DoubleColMultiplyDoubleColumn(col 219:double, col 221:double)(children: DoubleColMultiplyDoubleColumn(col 212:double, col 218:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 211:double)(children: FuncPowerDoubleToDouble(col 210:double)(children: DoubleColDivideLongColumn(col 209:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 208:double)(children: DoubleColDivideLongColumn(col 207:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 207:double) -> 208:double) -> 209:double) -> 210:double) -> 211:double) -> 212:double, DoubleColUnaryMinus(col 217:double)(children: FuncPowerDoubleToDouble(col 216:double)(children: DoubleColDivideLongColumn(col 215:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 214:double)(children: DoubleColDivideLongColumn(col 213:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 213:double) -> 214:double) -> 215:double) -> 216:double) -> 217:double) -> 218:double) -> 219:double, CastLongToDouble(col 220:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 220:bigint) -> 221:double) -> 222:double) -> 223:double, DoubleColMultiplyDoubleColumn(col 236:double, col 238:double)(children: DoubleColMultiplyDoubleColumn(col 229:double, col 235:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 228:double)(children: FuncPowerDoubleToDouble(col 227:double)(children: DoubleColDivideLongColumn(col 226:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 225:double)(children: DoubleColDivideLongColumn(col 224:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 224:double) -> 225:double) -> 226:double) -> 227:double) -> 228:double) -> 229:double, DoubleColUnaryMinus(col 234:double)(children: FuncPowerDoubleToDouble(col 233:double)(children: DoubleColDivideLongColumn(col 232:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 231:double)(children: DoubleColDivideLongColumn(col 230:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 230:double) -> 231:double) -> 232:double) -> 233:double) -> 234:double) -> 235:double) -> 236:double, CastLongToDouble(col 237:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 237:bigint) -> 238:double) -> 239:double) -> 240:double) -> 241:double, DoubleScalarModuloDoubleColumn(val 10.175, col 244:double)(children: DoubleScalarDivideDoubleColumn(val 10.175, col 243:double)(children: DoubleColDivideLongColumn(col 242:double, col 6:bigint)(children: CastLongToDouble(col 5:bigint) -> 242:double) -> 243:double) -> 244:double) -> 245:double, LongColUnaryMinus(col 8:tinyint) -> 246:tinyint, DoubleColDivideLongColumn(col 249:double, col 6:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 248:double)(children: DoubleColDivideLongColumn(col 247:double, col 6:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 247:double) -> 248:double) -> 249:double) -> 250:double, DoubleColUnaryMinus(col 263:double)(children: DoubleColMultiplyDoubleColumn(col 256:double, col 262:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 255:double)(children: FuncPowerDoubleToDouble(col 254:double)(children: DoubleColDivideLongColumn(col 253:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 252:double)(children: DoubleColDivideLongColumn(col 251:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 251:double) -> 252:double) -> 253:double) -> 254:double) -> 255:double) -> 256:double, DoubleColUnaryMinus(col 261:double)(children: FuncPowerDoubleToDouble(col 260:double)(children: DoubleColDivideLongColumn(col 259:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 258:double)(children: DoubleColDivideLongColumn(col 257:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 257:double) -> 258:double) -> 259:double) -> 260:double) -> 261:double) -> 262:double) -> 263:double) -> 264:double, DoubleColModuloDoubleColumn(col 270:double, col 271:double)(children: DoubleColUnaryMinus(col 269:double)(children: FuncPowerDoubleToDouble(col 268:double)(children: DoubleColDivideLongColumn(col 267:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 266:double)(children: DoubleColDivideLongColumn(col 265:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 265:double) -> 266:double) -> 267:double) -> 268:double) -> 269:double) -> 270:double, DoubleColDivideLongColumn(col 15:double, col 13:bigint) -> 271:double) -> 272:double, DecimalScalarDivideDecimalColumn(val -26.28, col 274:decimal(3,0))(children: CastLongToDecimal(col 273:tinyint)(children: LongColUnaryMinus(col 8:tinyint) -> 273:tinyint) -> 274:decimal(3,0)) -> 275:decimal(8,6), FuncPowerDoubleToDouble(col 279:double)(children: DoubleColDivideLongColumn(col 278:double, col 19:bigint)(children: DoubleColSubtractDoubleColumn(col 17:double, col 277:double)(children: DoubleColDivideLongColumn(col 276:double, col 19:bigint)(children: DoubleColMultiplyDoubleColumn(col 18:double, col 18:double) -> 276:double) -> 277:double) -> 278:double) -> 279:double) -> 280:double, DoubleColDivideDoubleColumn(col 304:double, col 308:double)(children: DoubleColAddDoubleColumn(col 287:double, col 303:double)(children: DoubleColDivideLongColumn(col 283:double, col 286:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 282:double)(children: DoubleColDivideLongColumn(col 281:double, col 6:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 281:double) -> 282:double) -> 283:double, IfExprNullCondExpr(col 284:boolean, null, col 285:bigint)(children: LongColEqualLongScalar(col 6:bigint, val 1) -> 284:boolean, LongColSubtractLongScalar(col 6:bigint, val 1) -> 285:bigint) -> 286:bigint) -> 287:double, DoubleColMultiplyDoubleColumn(col 300:double, col 302:double)(children: DoubleColMultiplyDoubleColumn(col 293:double, col 299:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 292:double)(children: FuncPowerDoubleToDouble(col 291:double)(children: DoubleColDivideLongColumn(col 290:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 289:double)(children: DoubleColDivideLongColumn(col 288:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 288:double) -> 289:double) -> 290:double) -> 291:double) -> 292:double) -> 293:double, DoubleColUnaryMinus(col 298:double)(children: FuncPowerDoubleToDouble(col 297:double)(children: DoubleColDivideLongColumn(col 296:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 295:double)(children: DoubleColDivideLongColumn(col 294:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 294:double) -> 295:double) -> 296:double) -> 297:double) -> 298:double) -> 299:double) -> 300:double, CastLongToDouble(col 301:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 301:bigint) -> 302:double) -> 303:double) -> 304:double, DoubleColDivideLongColumn(col 307:double, col 13:bigint)(children: DoubleColSubtractDoubleColumn(col 11:double, col 306:double)(children: DoubleColDivideLongColumn(col 305:double, col 13:bigint)(children: DoubleColMultiplyDoubleColumn(col 12:double, col 12:double) -> 305:double) -> 306:double) -> 307:double) -> 308:double) -> 309:double, LongColUnaryMinus(col 310:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 310:bigint) -> 311:bigint, DoubleColModuloDoubleScalar(col 335:double, val -26.28)(children: DoubleColAddDoubleColumn(col 318:double, col 334:double)(children: DoubleColDivideLongColumn(col 314:double, col 317:bigint)(children: DoubleColSubtractDoubleColumn(col 9:double, col 313:double)(children: DoubleColDivideLongColumn(col 312:double, col 6:bigint)(children: DoubleColMultiplyDoubleColumn(col 10:double, col 10:double) -> 312:double) -> 313:double) -> 314:double, IfExprNullCondExpr(col 315:boolean, null, col 316:bigint)(children: LongColEqualLongScalar(col 6:bigint, val 1) -> 315:boolean, LongColSubtractLongScalar(col 6:bigint, val 1) -> 316:bigint) -> 317:bigint) -> 318:double, DoubleColMultiplyDoubleColumn(col 331:double, col 333:double)(children: DoubleColMultiplyDoubleColumn(col 324:double, col 330:double)(children: DoubleScalarSubtractDoubleColumn(val -26.28, col 323:double)(children: FuncPowerDoubleToDouble(col 322:double)(children: DoubleColDivideLongColumn(col 321:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 320:double)(children: DoubleColDivideLongColumn(col 319:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 319:double) -> 320:double) -> 321:double) -> 322:double) -> 323:double) -> 324:double, DoubleColUnaryMinus(col 329:double)(children: FuncPowerDoubleToDouble(col 328:double)(children: DoubleColDivideLongColumn(col 327:double, col 4:bigint)(children: DoubleColSubtractDoubleColumn(col 2:double, col 326:double)(children: DoubleColDivideLongColumn(col 325:double, col 4:bigint)(children: DoubleColMultiplyDoubleColumn(col 3:double, col 3:double) -> 325:double) -> 326:double) -> 327:double) -> 328:double) -> 329:double) -> 330:double) -> 331:double, CastLongToDouble(col 332:bigint)(children: LongColUnaryMinus(col 7:bigint) -> 332:bigint) -> 333:double) -> 334:double) -> 335:double) -> 336:double - Statistics: Num rows: 3072 Data size: 1542740 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5980 Data size: 2942564 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: string), _col2 (type: double), _col3 (type: double), _col4 (type: double), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: double), _col11 (type: tinyint), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double), _col22 (type: double), _col23 (type: double), _col24 (type: double), _col25 (type: double), _col26 (type: double), _col27 (type: tinyint), _col28 (type: double), _col29 (type: double), _col30 (type: double), _col31 (type: double), _col32 (type: decimal(8,6)), _col33 (type: double), _col34 (type: bigint), _col35 (type: double), _col36 (type: bigint), _col37 (type: bigint), _col38 (type: double) sort order: +++++++++++++++++++++++++++++++++++++++ @@ -2928,7 +2928,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 3072 Data size: 1542740 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5980 Data size: 2942564 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Reducer 3 Execution mode: vectorized, llap @@ -2946,19 +2946,19 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 4, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 8, 38] - Statistics: Num rows: 3072 Data size: 1542740 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 5980 Data size: 2942564 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 50 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 50 Data size: 25172 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 24692 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 50 Data size: 25172 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 50 Data size: 24692 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -3242,7 +3242,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: (cboolean1 is not null and (((cdouble < UDFToDouble(csmallint)) and (cboolean2 = cboolean1) and (CAST( cbigint AS decimal(22,3)) <= -863.257)) or ((cint >= -257) and (cboolean1 >= 1) and cstring1 is not null) or cstring2 regexp 'b' or ((csmallint >= UDFToShort(ctinyint)) and ctimestamp2 is null))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2601650 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2477130 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -3251,7 +3251,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 10:boolean), FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 5:double, col 13:double)(children: CastLongToDouble(col 1:smallint) -> 13:double), FilterLongColEqualLongColumn(col 11:boolean, col 10:boolean), FilterDecimalColLessEqualDecimalScalar(col 14:decimal(22,3), val -863.257)(children: CastLongToDecimal(col 3:bigint) -> 14:decimal(22,3))), FilterExprAndExpr(children: FilterLongColGreaterEqualLongScalar(col 2:int, val -257), FilterLongColGreaterEqualLongScalar(col 10:boolean, val 1), SelectColumnIsNotNull(col 6:string)), FilterStringColRegExpStringScalar(col 7:string, pattern b), FilterExprAndExpr(children: FilterLongColGreaterEqualLongColumn(col 1:smallint, col 0:smallint)(children: col 0:tinyint), SelectColumnIsNull(col 9:timestamp)))) predicate: (cboolean1 is not null and (((cdouble < UDFToDouble(csmallint)) and (cboolean2 = cboolean1) and (CAST( cbigint AS decimal(22,3)) <= -863.257)) or ((cint >= -257) and (cboolean1 >= 1) and cstring1 is not null) or cstring2 regexp 'b' or ((csmallint >= UDFToShort(ctinyint)) and ctimestamp2 is null))) (type: boolean) - Statistics: Num rows: 6237 Data size: 1320590 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7012 Data size: 1413600 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cboolean1 (type: boolean), cfloat (type: float), cbigint (type: bigint), cint (type: int), cdouble (type: double), ctinyint (type: tinyint), csmallint (type: smallint), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double), UDFToDouble(csmallint) (type: double), (UDFToDouble(csmallint) * UDFToDouble(csmallint)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -3260,7 +3260,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [10, 4, 3, 2, 5, 0, 1, 15, 18, 19, 22, 23, 26, 27, 30] selectExpressions: CastLongToDouble(col 2:int) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 2:int) -> 16:double, CastLongToDouble(col 2:int) -> 17:double) -> 18:double, CastLongToDouble(col 3:bigint) -> 19:double, DoubleColMultiplyDoubleColumn(col 20:double, col 21:double)(children: CastLongToDouble(col 3:bigint) -> 20:double, CastLongToDouble(col 3:bigint) -> 21:double) -> 22:double, CastLongToDouble(col 0:tinyint) -> 23:double, DoubleColMultiplyDoubleColumn(col 24:double, col 25:double)(children: CastLongToDouble(col 0:tinyint) -> 24:double, CastLongToDouble(col 0:tinyint) -> 25:double) -> 26:double, CastLongToDouble(col 1:smallint) -> 27:double, DoubleColMultiplyDoubleColumn(col 28:double, col 29:double)(children: CastLongToDouble(col 1:smallint) -> 28:double, CastLongToDouble(col 1:smallint) -> 29:double) -> 30:double - Statistics: Num rows: 6237 Data size: 1320590 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7012 Data size: 1413600 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(_col1), sum(_col2), sum(_col8), sum(_col7), count(_col3), sum(_col4), count(_col4), min(_col2), sum(_col10), sum(_col9), count(_col2), sum(_col3), sum(_col12), sum(_col11), count(_col5), sum(_col14), sum(_col13), count(_col6) Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/llap/vectorized_casts.q.out b/ql/src/test/results/clientpositive/llap/vectorized_casts.q.out index 333cbd30c6..6a3c158990 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_casts.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_casts.q.out @@ -170,7 +170,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: ((cbigint % 250L) = 0L) (type: boolean) - Statistics: Num rows: 12288 Data size: 1684250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 1559690 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -180,7 +180,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColEqualLongScalar(col 13:bigint, val 0)(children: LongColModuloLongScalar(col 3:bigint, val 250) -> 13:bigint) predicate: ((cbigint % 250L) = 0L) (type: boolean) - Statistics: Num rows: 6144 Data size: 842180 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 779900 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToBoolean(ctinyint) (type: boolean), UDFToBoolean(csmallint) (type: boolean), UDFToBoolean(cint) (type: boolean), UDFToBoolean(cbigint) (type: boolean), UDFToBoolean(cfloat) (type: boolean), UDFToBoolean(cdouble) (type: boolean), cboolean1 (type: boolean), ((cbigint * 0L) <> 0L) (type: boolean), UDFToBoolean(ctimestamp1) (type: boolean), UDFToBoolean(cstring1) (type: boolean), UDFToInteger(ctinyint) (type: int), UDFToInteger(csmallint) (type: int), cint (type: int), UDFToInteger(cbigint) (type: int), UDFToInteger(cfloat) (type: int), UDFToInteger(cdouble) (type: int), UDFToInteger(cboolean1) (type: int), UDFToInteger(ctimestamp1) (type: int), UDFToInteger(cstring1) (type: int), UDFToInteger(substr(cstring1, 1, 1)) (type: int), UDFToByte(cfloat) (type: tinyint), UDFToShort(cfloat) (type: smallint), UDFToLong(cfloat) (type: bigint), UDFToDouble(ctinyint) (type: double), UDFToDouble(csmallint) (type: double), UDFToDouble(cint) (type: double), UDFToDouble(cbigint) (type: double), UDFToDouble(cfloat) (type: double), cdouble (type: double), UDFToDouble(cboolean1) (type: double), UDFToDouble(ctimestamp1) (type: double), UDFToDouble(cstring1) (type: double), UDFToDouble(substr(cstring1, 1, 1)) (type: double), UDFToFloat(cint) (type: float), UDFToFloat(cdouble) (type: float), CAST( ctinyint AS TIMESTAMP) (type: timestamp), CAST( csmallint AS TIMESTAMP) (type: timestamp), CAST( cint AS TIMESTAMP) (type: timestamp), CAST( cbigint AS TIMESTAMP) (type: timestamp), CAST( cfloat AS TIMESTAMP) (type: timestamp), CAST( cdouble AS TIMESTAMP) (type: timestamp), CAST( cboolean1 AS TIMESTAMP) (type: timestamp), CAST( (cbigint * 0L) AS TIMESTAMP) (type: timestamp), CAST( CAST( ctimestamp1 AS DATE) AS TIMESTAMP) (type: timestamp), ctimestamp1 (type: timestamp), CAST( cstring1 AS TIMESTAMP) (type: timestamp), CAST( substr(cstring1, 1, 1) AS TIMESTAMP) (type: timestamp), CAST( ctinyint AS STRING) (type: string), CAST( csmallint AS STRING) (type: string), CAST( cint AS STRING) (type: string), CAST( cbigint AS STRING) (type: string), CAST( cfloat AS STRING) (type: string), CAST( cdouble AS STRING) (type: string), CAST( cboolean1 AS STRING) (type: string), CAST( (cbigint * 0L) AS STRING) (type: string), CAST( ctimestamp1 AS STRING) (type: string), cstring1 (type: string), CAST( CAST( cstring1 AS CHAR(10)) AS STRING) (type: string), CAST( CAST( cstring1 AS varchar(10)) AS STRING) (type: string), UDFToFloat(UDFToInteger(cfloat)) (type: float), UDFToDouble((cint * 2)) (type: double), CAST( sin(cfloat) AS STRING) (type: string), (UDFToDouble(UDFToFloat(cint)) + UDFToDouble(cboolean1)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47, _col48, _col49, _col50, _col51, _col52, _col53, _col54, _col55, _col56, _col57, _col58, _col59, _col60, _col61, _col62 @@ -189,13 +189,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [14, 15, 16, 17, 18, 19, 10, 21, 22, 23, 0, 1, 2, 3, 24, 25, 10, 26, 27, 29, 30, 31, 32, 33, 34, 35, 36, 4, 5, 37, 38, 39, 41, 42, 5, 44, 46, 48, 50, 51, 52, 54, 58, 60, 8, 61, 63, 64, 65, 66, 67, 68, 69, 70, 72, 73, 6, 74, 75, 77, 79, 81, 84] selectExpressions: CastLongToBooleanViaLongToLong(col 0:tinyint) -> 14:boolean, CastLongToBooleanViaLongToLong(col 1:smallint) -> 15:boolean, CastLongToBooleanViaLongToLong(col 2:int) -> 16:boolean, CastLongToBooleanViaLongToLong(col 3:bigint) -> 17:boolean, CastDoubleToBooleanViaDoubleToLong(col 4:float) -> 18:boolean, CastDoubleToBooleanViaDoubleToLong(col 5:double) -> 19:boolean, LongColNotEqualLongScalar(col 20:bigint, val 0)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 20:bigint) -> 21:boolean, CastTimestampToBoolean(col 8:timestamp) -> 22:boolean, CastStringToBoolean(col 6) -> 23:boolean, CastDoubleToLong(col 4:float) -> 24:int, CastDoubleToLong(col 5:double) -> 25:int, CastTimestampToLong(col 8:timestamp) -> 26:int, CastStringToLong(col 6:string) -> 27:int, CastStringToLong(col 28:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 28:string) -> 29:int, CastDoubleToLong(col 4:float) -> 30:tinyint, CastDoubleToLong(col 4:float) -> 31:smallint, CastDoubleToLong(col 4:float) -> 32:bigint, CastLongToDouble(col 0:tinyint) -> 33:double, CastLongToDouble(col 1:smallint) -> 34:double, CastLongToDouble(col 2:int) -> 35:double, CastLongToDouble(col 3:bigint) -> 36:double, CastLongToDouble(col 10:boolean) -> 37:double, CastTimestampToDouble(col 8:timestamp) -> 38:double, CastStringToDouble(col 6:string) -> 39:double, CastStringToDouble(col 40:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 40:string) -> 41:double, CastLongToFloatViaLongToDouble(col 2:int) -> 42:float, CastMillisecondsLongToTimestamp(col 0:tinyint) -> 44:timestamp, CastMillisecondsLongToTimestamp(col 1:smallint) -> 46:timestamp, CastMillisecondsLongToTimestamp(col 2:int) -> 48:timestamp, CastMillisecondsLongToTimestamp(col 3:bigint) -> 50:timestamp, CastDoubleToTimestamp(col 4:float) -> 51:timestamp, CastDoubleToTimestamp(col 5:double) -> 52:timestamp, CastMillisecondsLongToTimestamp(col 10:boolean) -> 54:timestamp, CastMillisecondsLongToTimestamp(col 57:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 57:bigint) -> 58:timestamp, CastDateToTimestamp(col 59:date)(children: CastTimestampToDate(col 8:timestamp) -> 59:date) -> 60:timestamp, CastStringToTimestamp(col 6:string) -> 61:timestamp, CastStringToTimestamp(col 62:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 62:string) -> 63:timestamp, CastLongToString(col 0:tinyint) -> 64:string, CastLongToString(col 1:smallint) -> 65:string, CastLongToString(col 2:int) -> 66:string, CastLongToString(col 3:bigint) -> 67:string, CastFloatToString(col 4:float) -> 68:string, CastDoubleToString(col 5:double) -> 69:string, CastBooleanToStringViaLongToString(col 10:boolean) -> 70:string, CastLongToString(col 71:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 71:bigint) -> 72:string, CastTimestampToString(col 8:timestamp) -> 73:string, CastStringGroupToChar(col 6:string, maxLength 10) -> 74:char(10), CastStringGroupToVarChar(col 6:string, maxLength 10) -> 75:varchar(10), CastLongToFloatViaLongToDouble(col 76:int)(children: CastDoubleToLong(col 4:float) -> 76:int) -> 77:float, CastLongToDouble(col 78:int)(children: LongColMultiplyLongScalar(col 2:int, val 2) -> 78:int) -> 79:double, CastDoubleToString(col 80:double)(children: FuncSinDoubleToDouble(col 4:float) -> 80:double) -> 81:string, DoubleColAddDoubleColumn(col 82:double, col 83:double)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 82:float, CastLongToDouble(col 10:boolean) -> 83:double) -> 84:double - Statistics: Num rows: 6144 Data size: 16362860 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 16014092 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6144 Data size: 16362860 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 16014092 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out index d455e35de1..ea8a7faabc 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_funcs.q.out @@ -263,7 +263,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 52 Data size: 4276 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 3956 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -274,7 +274,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 0, 1, 3, 13, 14, 15, 16, 17] selectExpressions: VectorUDFUnixTimeStampTimestamp(col 1:timestamp) -> 5:bigint, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 6:int, VectorUDFMonthTimestamp(col 1:timestamp, field MONTH) -> 7:int, VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 8:int, VectorUDFWeekOfYearTimestamp(col 1:timestamp, field WEEK_OF_YEAR) -> 9:int, VectorUDFHourTimestamp(col 1:timestamp, field HOUR_OF_DAY) -> 10:int, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 11:int, VectorUDFSecondTimestamp(col 1:timestamp, field SECOND) -> 12:int, IfExprTimestampColumnScalar(col 0:boolean, col 1:timestamp, val 1319-01-25 08:31:57.778) -> 13:timestamp, IfExprTimestampScalarColumn(col 0:boolean, val 2000-12-18 00:42:30.0005, col 1:timestamp) -> 14:timestamp, IfExprTimestampColumnColumn(col 0:boolean, col 1:timestampcol 3:timestamp) -> 15:timestamp, IfExprColumnNull(col 0:boolean, col 1:timestamp, null)(children: col 0:boolean, col 1:timestamp) -> 16:timestamp, IfExprNullColumn(col 0:boolean, null, col 3)(children: col 0:boolean, col 3:timestamp) -> 17:timestamp - Statistics: Num rows: 52 Data size: 16756 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 16436 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + @@ -282,7 +282,7 @@ STAGE PLANS: className: VectorReduceSinkObjectHashOperator native: true nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, hive.execution.engine tez IN [tez, spark] IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true - Statistics: Num rows: 52 Data size: 16756 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 16436 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: boolean), _col10 (type: timestamp), _col11 (type: timestamp), _col12 (type: timestamp), _col13 (type: timestamp), _col14 (type: timestamp), _col15 (type: timestamp), _col16 (type: timestamp) Execution mode: vectorized, llap LLAP IO: all inputs @@ -311,13 +311,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] - Statistics: Num rows: 52 Data size: 16756 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 16436 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 52 Data size: 16756 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 16436 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -675,7 +675,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 52 Data size: 7617 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 7497 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -1010,7 +1010,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 52 Data size: 2080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 1960 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -1020,7 +1020,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1] - Statistics: Num rows: 52 Data size: 2080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 1960 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count() Group By Vectorization: @@ -1142,7 +1142,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 52 Data size: 2080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 1960 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -1152,7 +1152,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1] - Statistics: Num rows: 52 Data size: 2080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 1960 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(ctimestamp1) Group By Vectorization: @@ -1291,7 +1291,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 52 Data size: 2080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 1960 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -1302,7 +1302,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 5, 8] selectExpressions: CastTimestampToDouble(col 1:timestamp) -> 5:double, DoubleColMultiplyDoubleColumn(col 6:double, col 7:double)(children: CastTimestampToDouble(col 1:timestamp) -> 6:double, CastTimestampToDouble(col 1:timestamp) -> 7:double) -> 8:double - Statistics: Num rows: 52 Data size: 2080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 1960 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), count(_col0), sum(_col2), sum(_col1) Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/llap/vectorized_timestamp_ints_casts.q.out b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_ints_casts.q.out index e13d4a90b9..0bc73870a9 100644 --- a/ql/src/test/results/clientpositive/llap/vectorized_timestamp_ints_casts.q.out +++ b/ql/src/test/results/clientpositive/llap/vectorized_timestamp_ints_casts.q.out @@ -56,7 +56,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: ((cbigint % 250L) = 0L) (type: boolean) - Statistics: Num rows: 12288 Data size: 1684250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 1559690 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -65,7 +65,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColEqualLongScalar(col 13:bigint, val 0)(children: LongColModuloLongScalar(col 3:bigint, val 250) -> 13:bigint) predicate: ((cbigint % 250L) = 0L) (type: boolean) - Statistics: Num rows: 6144 Data size: 842180 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 779900 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: CAST( ctinyint AS TIMESTAMP) (type: timestamp), CAST( csmallint AS TIMESTAMP) (type: timestamp), CAST( cint AS TIMESTAMP) (type: timestamp), CAST( cbigint AS TIMESTAMP) (type: timestamp), CAST( cfloat AS TIMESTAMP) (type: timestamp), CAST( cdouble AS TIMESTAMP) (type: timestamp), CAST( cboolean1 AS TIMESTAMP) (type: timestamp), CAST( (cbigint * 0L) AS TIMESTAMP) (type: timestamp), ctimestamp1 (type: timestamp), CAST( cstring1 AS TIMESTAMP) (type: timestamp), CAST( substr(cstring1, 1, 1) AS TIMESTAMP) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -74,13 +74,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [15, 17, 19, 21, 22, 23, 25, 29, 8, 30, 32] selectExpressions: CastMillisecondsLongToTimestamp(col 0:tinyint) -> 15:timestamp, CastMillisecondsLongToTimestamp(col 1:smallint) -> 17:timestamp, CastMillisecondsLongToTimestamp(col 2:int) -> 19:timestamp, CastMillisecondsLongToTimestamp(col 3:bigint) -> 21:timestamp, CastDoubleToTimestamp(col 4:float) -> 22:timestamp, CastDoubleToTimestamp(col 5:double) -> 23:timestamp, CastMillisecondsLongToTimestamp(col 10:boolean) -> 25:timestamp, CastMillisecondsLongToTimestamp(col 28:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 28:bigint) -> 29:timestamp, CastStringToTimestamp(col 6:string) -> 30:timestamp, CastStringToTimestamp(col 31:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 31:string) -> 32:timestamp - Statistics: Num rows: 6144 Data size: 2703360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 2641080 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6144 Data size: 2703360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 2641080 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -225,7 +225,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: ((cbigint % 250L) = 0L) (type: boolean) - Statistics: Num rows: 12288 Data size: 1684250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 1559690 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -234,7 +234,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColEqualLongScalar(col 13:bigint, val 0)(children: LongColModuloLongScalar(col 3:bigint, val 250) -> 13:bigint) predicate: ((cbigint % 250L) = 0L) (type: boolean) - Statistics: Num rows: 6144 Data size: 842180 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 779900 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: CAST( ctinyint AS TIMESTAMP) (type: timestamp), CAST( csmallint AS TIMESTAMP) (type: timestamp), CAST( cint AS TIMESTAMP) (type: timestamp), CAST( cbigint AS TIMESTAMP) (type: timestamp), CAST( cfloat AS TIMESTAMP) (type: timestamp), CAST( cdouble AS TIMESTAMP) (type: timestamp), CAST( cboolean1 AS TIMESTAMP) (type: timestamp), CAST( (cbigint * 0L) AS TIMESTAMP) (type: timestamp), ctimestamp1 (type: timestamp), CAST( cstring1 AS TIMESTAMP) (type: timestamp), CAST( substr(cstring1, 1, 1) AS TIMESTAMP) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -243,13 +243,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [14, 15, 16, 17, 18, 19, 20, 22, 8, 23, 25] selectExpressions: CastLongToTimestamp(col 0:tinyint) -> 14:timestamp, CastLongToTimestamp(col 1:smallint) -> 15:timestamp, CastLongToTimestamp(col 2:int) -> 16:timestamp, CastLongToTimestamp(col 3:bigint) -> 17:timestamp, CastDoubleToTimestamp(col 4:float) -> 18:timestamp, CastDoubleToTimestamp(col 5:double) -> 19:timestamp, CastLongToTimestamp(col 10:boolean) -> 20:timestamp, CastLongToTimestamp(col 21:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 21:bigint) -> 22:timestamp, CastStringToTimestamp(col 6:string) -> 23:timestamp, CastStringToTimestamp(col 24:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 24:string) -> 25:timestamp - Statistics: Num rows: 6144 Data size: 2703360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 2641080 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6144 Data size: 2703360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 2641080 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/orc_merge5.q.out b/ql/src/test/results/clientpositive/orc_merge5.q.out index df5651e284..da18e7c739 100644 --- a/ql/src/test/results/clientpositive/orc_merge5.q.out +++ b/ql/src/test/results/clientpositive/orc_merge5.q.out @@ -67,20 +67,20 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2696 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 2696 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2824 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2824 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -188,20 +188,20 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2696 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 2696 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2824 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2824 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/orc_merge6.q.out b/ql/src/test/results/clientpositive/orc_merge6.q.out index bc05d2fadf..4a8f97589b 100644 --- a/ql/src/test/results/clientpositive/orc_merge6.q.out +++ b/ql/src/test/results/clientpositive/orc_merge6.q.out @@ -74,7 +74,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) @@ -83,7 +83,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: int) + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -248,7 +248,7 @@ STAGE PLANS: sort order: ++ Map-reduce partition columns: _col0 (type: string), _col1 (type: int) Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE - value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) + value expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) @@ -257,7 +257,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: int) + expressions: _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col6 (type: struct), _col0 (type: string), _col1 (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/orc_merge_incompat1.q.out b/ql/src/test/results/clientpositive/orc_merge_incompat1.q.out index 538e2a0241..bd59c4aa4e 100644 --- a/ql/src/test/results/clientpositive/orc_merge_incompat1.q.out +++ b/ql/src/test/results/clientpositive/orc_merge_incompat1.q.out @@ -67,20 +67,20 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2696 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator sort order: - Statistics: Num rows: 1 Data size: 2696 Basic stats: COMPLETE Column stats: NONE - value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) + Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: struct), _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct) Reduce Operator Tree: Group By Operator aggregations: compute_stats(VALUE._col0), compute_stats(VALUE._col1), compute_stats(VALUE._col2), compute_stats(VALUE._col3), compute_stats(VALUE._col4) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4 - Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2824 Basic stats: COMPLETE Column stats: NONE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 2760 Basic stats: COMPLETE Column stats: NONE + Statistics: Num rows: 1 Data size: 2824 Basic stats: COMPLETE Column stats: NONE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/orc_merge_incompat2.q.out b/ql/src/test/results/clientpositive/orc_merge_incompat2.q.out index b3cbf4a65f..58e2654aa8 100644 --- a/ql/src/test/results/clientpositive/orc_merge_incompat2.q.out +++ b/ql/src/test/results/clientpositive/orc_merge_incompat2.q.out @@ -62,7 +62,7 @@ STAGE PLANS: sort order: + Map-reduce partition columns: _col0 (type: double) Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE - value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) + value expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct) File Output Operator compressed: false table: @@ -77,7 +77,7 @@ STAGE PLANS: outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) + expressions: _col1 (type: struct), _col2 (type: struct), _col3 (type: struct), _col4 (type: struct), _col5 (type: struct), _col0 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 Statistics: Num rows: 1 Data size: 352 Basic stats: COMPLETE Column stats: NONE File Output Operator diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out index 7d82465506..158157a6c0 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_0.q.out @@ -29912,7 +29912,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: (((cint = 49) and (cfloat = 3.5)) or ((cint = 47) and (cfloat = 2.09)) or ((cint = 45) and (cfloat = 3.02))) (type: boolean) - Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2844090 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false @@ -30033,7 +30033,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: (struct(cint,cfloat)) IN (const struct(49,3.5), const struct(47,2.09), const struct(45,3.02)) (type: boolean) - Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2844090 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false @@ -30153,7 +30153,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: (((cint = 49) or (cfloat = 3.5)) and ((cint = 47) or (cfloat = 2.09)) and ((cint = 45) or (cfloat = 3.02))) (type: boolean) - Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2844090 Basic stats: COMPLETE Column stats: COMPLETE GatherStats: false Filter Operator isSamplingPred: false diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_10.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_10.q.out index 0cc7e67363..776f7c8c31 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_10.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_10.q.out @@ -65,7 +65,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cstring2 <= '10') or ((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or ((cdouble > 6981.0D) and ((CAST( csmallint AS decimal(11,4)) = 9763215.5639) or (cstring1 like '%a')))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2491562 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2367002 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -74,7 +74,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterStringGroupColLessEqualStringScalar(col 7:string, val 10), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterDecimalColLessEqualDecimalScalar(col 14:decimal(6,2), val -5638.15)(children: CastLongToDecimal(col 0:tinyint) -> 14:decimal(6,2))), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 6981.0), FilterExprOrExpr(children: FilterDecimalColEqualDecimalScalar(col 15:decimal(11,4), val 9763215.5639)(children: CastLongToDecimal(col 1:smallint) -> 15:decimal(11,4)), FilterStringColLikeStringScalar(col 6:string, pattern %a)))) predicate: ((cstring2 <= '10') or ((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or ((cdouble > 6981.0D) and ((CAST( csmallint AS decimal(11,4)) = 9763215.5639) or (cstring1 like '%a')))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2491562 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2367002 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cdouble (type: double), ctimestamp1 (type: timestamp), ctinyint (type: tinyint), cboolean1 (type: boolean), cstring1 (type: string), (- cdouble) (type: double), (cdouble + UDFToDouble(csmallint)) (type: double), ((cdouble + UDFToDouble(csmallint)) % 33.0D) (type: double), (- cdouble) (type: double), (UDFToDouble(ctinyint) % cdouble) (type: double), (UDFToShort(ctinyint) % csmallint) (type: smallint), (- cdouble) (type: double), (cbigint * UDFToLong((UDFToShort(ctinyint) % csmallint))) (type: bigint), (9763215.5639D - (cdouble + UDFToDouble(csmallint))) (type: double), (- (- cdouble)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -83,13 +83,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [5, 8, 0, 10, 6, 16, 18, 21, 22, 24, 25, 26, 28, 31, 33] selectExpressions: DoubleColUnaryMinus(col 5:double) -> 16:double, DoubleColAddDoubleColumn(col 5:double, col 17:double)(children: CastLongToDouble(col 1:smallint) -> 17:double) -> 18:double, DoubleColModuloDoubleScalar(col 20:double, val 33.0)(children: DoubleColAddDoubleColumn(col 5:double, col 19:double)(children: CastLongToDouble(col 1:smallint) -> 19:double) -> 20:double) -> 21:double, DoubleColUnaryMinus(col 5:double) -> 22:double, DoubleColModuloDoubleColumn(col 23:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 23:double) -> 24:double, LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 25:smallint, DoubleColUnaryMinus(col 5:double) -> 26:double, LongColMultiplyLongColumn(col 3:bigint, col 27:bigint)(children: LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 27:smallint) -> 28:bigint, DoubleScalarSubtractDoubleColumn(val 9763215.5639, col 30:double)(children: DoubleColAddDoubleColumn(col 5:double, col 29:double)(children: CastLongToDouble(col 1:smallint) -> 29:double) -> 30:double) -> 31:double, DoubleColUnaryMinus(col 32:double)(children: DoubleColUnaryMinus(col 5:double) -> 32:double) -> 33:double - Statistics: Num rows: 12288 Data size: 2434654 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2310094 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 12288 Data size: 2434654 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2310094 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_11.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_11.q.out index 97f5ede98a..5603015358 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_11.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_11.q.out @@ -47,7 +47,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((ctimestamp1 is null and (cstring1 like '%a')) or (cstring2 = cstring1)) (type: boolean) - Statistics: Num rows: 12288 Data size: 2381474 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2256914 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -56,7 +56,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: SelectColumnIsNull(col 8:timestamp), FilterStringColLikeStringScalar(col 6:string, pattern %a)), FilterStringGroupColEqualStringGroupColumn(col 7:string, col 6:string)) predicate: ((ctimestamp1 is null and (cstring1 like '%a')) or (cstring2 = cstring1)) (type: boolean) - Statistics: Num rows: 6144 Data size: 1190792 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7701 Data size: 1414500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), ctimestamp1 (type: timestamp), (-3728 * UDFToInteger(csmallint)) (type: int), (cdouble - 9763215.5639D) (type: double), (- cdouble) (type: double), ((- cdouble) + 6981.0D) (type: double), (cdouble * -5638.15D) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -65,13 +65,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [6, 10, 5, 8, 13, 14, 15, 17, 18] selectExpressions: LongScalarMultiplyLongColumn(val -3728, col 1:int)(children: col 1:smallint) -> 13:int, DoubleColSubtractDoubleScalar(col 5:double, val 9763215.5639) -> 14:double, DoubleColUnaryMinus(col 5:double) -> 15:double, DoubleColAddDoubleScalar(col 16:double, val 6981.0)(children: DoubleColUnaryMinus(col 5:double) -> 16:double) -> 17:double, DoubleColMultiplyDoubleScalar(col 5:double, val -5638.15) -> 18:double - Statistics: Num rows: 6144 Data size: 953272 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7701 Data size: 1116736 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6144 Data size: 953272 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7701 Data size: 1116736 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_12.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_12.q.out index 6059ac98fd..b60816e24c 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_12.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_12.q.out @@ -82,7 +82,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: (((cstring1 like '%a') or ((cboolean2 <= 1) and (cbigint >= UDFToLong(csmallint)))) and ((cboolean1 >= cboolean2) or (UDFToShort(ctinyint) <> csmallint)) and ctimestamp1 is null) (type: boolean) - Statistics: Num rows: 12288 Data size: 1647554 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 1522994 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -91,7 +91,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 6:string, pattern %a), FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 11:boolean, val 1), FilterLongColGreaterEqualLongColumn(col 3:bigint, col 1:bigint)(children: col 1:smallint))), FilterExprOrExpr(children: FilterLongColGreaterEqualLongColumn(col 10:boolean, col 11:boolean), FilterLongColNotEqualLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint)), SelectColumnIsNull(col 8:timestamp)) predicate: (((cstring1 like '%a') or ((cboolean2 <= 1) and (cbigint >= UDFToLong(csmallint)))) and ((cboolean1 >= cboolean2) or (UDFToShort(ctinyint) <> csmallint)) and ctimestamp1 is null) (type: boolean) - Statistics: Num rows: 1 Data size: 166 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1903 Data size: 236052 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cbigint (type: bigint), cboolean1 (type: boolean), cstring1 (type: string), cdouble (type: double), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double), (cdouble * cdouble) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -100,7 +100,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3, 10, 6, 5, 13, 16, 17] selectExpressions: CastLongToDouble(col 3:bigint) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 14:double, CastLongToDouble(col 3:bigint) -> 15:double) -> 16:double, DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 17:double - Statistics: Num rows: 1 Data size: 166 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1903 Data size: 236052 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col0), sum(_col5), sum(_col4), sum(_col3), count(_col3), sum(_col0), sum(_col6) Group By Vectorization: @@ -115,7 +115,7 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1903 Data size: 268676 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean) sort order: ++++ @@ -125,7 +125,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1903 Data size: 268676 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: double) Execution mode: vectorized Map Vectorization: @@ -147,11 +147,11 @@ STAGE PLANS: keys: KEY._col0 (type: double), KEY._col1 (type: bigint), KEY._col2 (type: string), KEY._col3 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1903 Data size: 268676 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: bigint), _col3 (type: boolean), _col2 (type: string), _col0 (type: double), (-6432.0D * _col0) (type: double), (- _col1) (type: bigint), _col4 (type: bigint), (_col1 * _col4) (type: bigint), power(((_col5 - ((_col6 * _col6) / _col4)) / CASE WHEN ((_col4 = 1L)) THEN (null) ELSE ((_col4 - 1)) END), 0.5) (type: double), ((-6432.0D * _col0) / -6432.0D) (type: double), (- ((-6432.0D * _col0) / -6432.0D)) (type: double), (_col7 / _col8) (type: double), (- (-6432.0D * _col0)) (type: double), (-5638.15 + CAST( _col1 AS decimal(19,0))) (type: decimal(22,2)), _col9 (type: bigint), ((_col7 / _col8) / (-6432.0D * _col0)) (type: double), (- (- ((-6432.0D * _col0) / -6432.0D))) (type: double), (((-6432.0D * _col0) / -6432.0D) + (- (-6432.0D * _col0))) (type: double), power(((_col10 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col17, _col18, _col19 - Statistics: Num rows: 1 Data size: 346 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1903 Data size: 603604 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -173,7 +173,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 346 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1903 Data size: 603604 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean), _col4 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: decimal(22,2)), _col14 (type: bigint), _col15 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double) Execution mode: vectorized Map Vectorization: @@ -193,10 +193,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: bigint), VALUE._col0 (type: boolean), KEY.reducesinkkey2 (type: string), null (type: timestamp), KEY.reducesinkkey0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: bigint), VALUE._col3 (type: bigint), VALUE._col4 (type: bigint), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: decimal(22,2)), VALUE._col11 (type: bigint), VALUE._col12 (type: double), VALUE._col8 (type: double), VALUE._col13 (type: double), VALUE._col14 (type: double), VALUE._col15 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 1 Data size: 386 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1903 Data size: 603644 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 386 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1903 Data size: 603644 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_13.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_13.q.out index 134ce1f12e..90462a6de9 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_13.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_13.q.out @@ -84,7 +84,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: (((cfloat < 3569.0) and (cdouble <= 10.175D) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > -28789.0D) and (UDFToDouble(ctimestamp2) <> -28788.0D) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2028982 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 1779902 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -93,7 +93,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 4:float, val 3569.0), FilterDoubleColLessEqualDoubleScalar(col 5:double, val 10.175), FilterLongColNotEqualLongScalar(col 10:boolean, val 1)), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 13:double, val -28789.0)(children: CastTimestampToDouble(col 8:timestamp) -> 13:double), FilterDoubleColNotEqualDoubleScalar(col 14:double, val -28788.0)(children: CastTimestampToDouble(col 9:timestamp) -> 14:double), FilterDecimalColLessDecimalScalar(col 15:decimal(11,4), val 9763215.5639)(children: CastLongToDecimal(col 0:tinyint) -> 15:decimal(11,4)))) predicate: (((cfloat < 3569.0) and (cdouble <= 10.175D) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > -28789.0D) and (UDFToDouble(ctimestamp2) <> -28788.0D) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean) - Statistics: Num rows: 1386 Data size: 228984 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1386 Data size: 200984 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -102,7 +102,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [10, 0, 8, 4, 6, 4, 16, 17, 20] selectExpressions: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 16:double, CastLongToDouble(col 0:tinyint) -> 17:double, DoubleColMultiplyDoubleColumn(col 18:double, col 19:double)(children: CastLongToDouble(col 0:tinyint) -> 18:double, CastLongToDouble(col 0:tinyint) -> 19:double) -> 20:double - Statistics: Num rows: 1386 Data size: 228984 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1386 Data size: 200984 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(_col1), sum(_col3), sum(_col6), sum(_col5), count(_col3), sum(_col8), sum(_col7), count(_col1), max(_col3), min(_col1) Group By Vectorization: @@ -117,7 +117,7 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 693 Data size: 129752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 693 Data size: 122752 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) sort order: +++++ @@ -127,7 +127,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 693 Data size: 129752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 693 Data size: 122752 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: float), _col14 (type: tinyint) Execution mode: vectorized Map Vectorization: @@ -149,11 +149,11 @@ STAGE PLANS: keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 346 Data size: 64822 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 693 Data size: 122752 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), power(((_col7 - ((_col8 * _col8) / _col9)) / _col9), 0.5) (type: double), (- _col6) (type: double), power(((_col10 - ((_col11 * _col11) / _col12)) / _col12), 0.5) (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28D / (- (- _col6))) (type: double), _col13 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 346 Data size: 113262 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 693 Data size: 219772 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -175,7 +175,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 346 Data size: 113262 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 693 Data size: 219772 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -195,13 +195,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: tinyint), KEY.reducesinkkey6 (type: tinyint), KEY.reducesinkkey7 (type: tinyint), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey14 (type: double), KEY.reducesinkkey15 (type: decimal(7,3)), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: float), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 346 Data size: 113262 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 693 Data size: 219772 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 40 - Statistics: Num rows: 40 Data size: 13206 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 12846 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 40 Data size: 13206 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 12846 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -407,7 +407,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: (((cfloat < 3569.0) and (cdouble <= 10.175D) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > -28801.388D) and (UDFToDouble(ctimestamp2) <> -28801.336D) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2028982 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 1779902 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -416,7 +416,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 4:float, val 3569.0), FilterDoubleColLessEqualDoubleScalar(col 5:double, val 10.175), FilterLongColNotEqualLongScalar(col 10:boolean, val 1)), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 13:double, val -28801.388)(children: CastTimestampToDouble(col 8:timestamp) -> 13:double), FilterDoubleColNotEqualDoubleScalar(col 14:double, val -28801.336)(children: CastTimestampToDouble(col 9:timestamp) -> 14:double), FilterDecimalColLessDecimalScalar(col 15:decimal(11,4), val 9763215.5639)(children: CastLongToDecimal(col 0:tinyint) -> 15:decimal(11,4)))) predicate: (((cfloat < 3569.0) and (cdouble <= 10.175D) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > -28801.388D) and (UDFToDouble(ctimestamp2) <> -28801.336D) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean) - Statistics: Num rows: 1386 Data size: 228984 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1386 Data size: 200984 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -425,7 +425,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [10, 0, 8, 4, 6, 4, 16, 17, 20] selectExpressions: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 16:double, CastLongToDouble(col 0:tinyint) -> 17:double, DoubleColMultiplyDoubleColumn(col 18:double, col 19:double)(children: CastLongToDouble(col 0:tinyint) -> 18:double, CastLongToDouble(col 0:tinyint) -> 19:double) -> 20:double - Statistics: Num rows: 1386 Data size: 228984 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1386 Data size: 200984 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(_col1), sum(_col3), sum(_col6), sum(_col5), count(_col3), sum(_col8), sum(_col7), count(_col1), max(_col3), min(_col1) Group By Vectorization: @@ -440,7 +440,7 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 693 Data size: 129752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 693 Data size: 122752 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) sort order: +++++ @@ -450,7 +450,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 693 Data size: 129752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 693 Data size: 122752 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: float), _col14 (type: tinyint) Execution mode: vectorized Map Vectorization: @@ -472,11 +472,11 @@ STAGE PLANS: keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 346 Data size: 64822 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 693 Data size: 122752 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), power(((_col7 - ((_col8 * _col8) / _col9)) / _col9), 0.5) (type: double), (- _col6) (type: double), power(((_col10 - ((_col11 * _col11) / _col12)) / _col12), 0.5) (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28D / (- (- _col6))) (type: double), _col13 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 346 Data size: 113262 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 693 Data size: 219772 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -498,7 +498,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 346 Data size: 113262 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 693 Data size: 219772 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -518,13 +518,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: tinyint), KEY.reducesinkkey6 (type: tinyint), KEY.reducesinkkey7 (type: tinyint), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey14 (type: double), KEY.reducesinkkey15 (type: decimal(7,3)), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: float), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 346 Data size: 113262 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 693 Data size: 219772 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 40 - Statistics: Num rows: 40 Data size: 13206 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 12846 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 40 Data size: 13206 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 12846 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_14.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_14.q.out index c943d7b07b..26948e3d81 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_14.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_14.q.out @@ -84,7 +84,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((UDFToLong(ctinyint) <= cbigint) and (cdouble < UDFToDouble(ctinyint)) and ((cbigint > -257L) or (cfloat < UDFToFloat(cint))) and ((UDFToDouble(cint) <= cdouble) or (ctimestamp2 < ctimestamp1))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2139070 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 1889990 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -93,7 +93,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColLessEqualLongColumn(col 0:bigint, col 3:bigint)(children: col 0:tinyint), FilterDoubleColLessDoubleColumn(col 5:double, col 13:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterExprOrExpr(children: FilterLongColGreaterLongScalar(col 3:bigint, val -257), FilterDoubleColLessDoubleColumn(col 4:float, col 14:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 14:float)), FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleColumn(col 15:double, col 5:double)(children: CastLongToDouble(col 2:int) -> 15:double), FilterTimestampColLessTimestampColumn(col 9:timestamp, col 8:timestamp))) predicate: ((UDFToLong(ctinyint) <= cbigint) and (cdouble < UDFToDouble(ctinyint)) and ((cbigint > -257L) or (cfloat < UDFToFloat(cint))) and ((UDFToDouble(cint) <= cdouble) or (ctimestamp2 < ctimestamp1))) (type: boolean) - Statistics: Num rows: 758 Data size: 132082 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 758 Data size: 116802 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), (- (-26.28D + cdouble)) (type: double), ((- (-26.28D + cdouble)) * (- (-26.28D + cdouble))) (type: double), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -102,7 +102,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [8, 4, 6, 10, 5, 17, 22, 4, 23] selectExpressions: DoubleColUnaryMinus(col 16:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 16:double) -> 17:double, DoubleColMultiplyDoubleColumn(col 19:double, col 21:double)(children: DoubleColUnaryMinus(col 18:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 18:double) -> 19:double, DoubleColUnaryMinus(col 20:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 20:double) -> 21:double) -> 22:double, DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 23:double - Statistics: Num rows: 758 Data size: 132082 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 758 Data size: 116802 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col6), sum(_col5), count(_col5), max(_col1), sum(_col8), sum(_col7), count(_col1) Group By Vectorization: @@ -117,7 +117,7 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 379 Data size: 66108 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 379 Data size: 62308 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) sort order: +++++ @@ -127,7 +127,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 379 Data size: 66108 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 379 Data size: 62308 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -149,11 +149,11 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: float), KEY._col2 (type: double), KEY._col3 (type: timestamp), KEY._col4 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 189 Data size: 33008 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 379 Data size: 62308 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col3 (type: timestamp), _col1 (type: float), _col0 (type: string), _col4 (type: boolean), _col2 (type: double), (-26.28D + _col2) (type: double), (- (-26.28D + _col2)) (type: double), power(((_col5 - ((_col6 * _col6) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5) (type: double), (_col1 * -26.28) (type: float), _col8 (type: float), (- _col1) (type: float), (- _col8) (type: float), ((- (-26.28D + _col2)) / 10.175D) (type: double), power(((_col9 - ((_col10 * _col10) / _col11)) / _col11), 0.5) (type: double), _col11 (type: bigint), (- ((- (-26.28D + _col2)) / 10.175D)) (type: double), (-1.389D % power(((_col5 - ((_col6 * _col6) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5)) (type: double), (UDFToDouble(_col1) - _col2) (type: double), ((_col9 - ((_col10 * _col10) / _col11)) / _col11) (type: double), (((_col9 - ((_col10 * _col10) / _col11)) / _col11) % 10.175D) (type: double), ((_col9 - ((_col10 * _col10) / _col11)) / CASE WHEN ((_col11 = 1L)) THEN (null) ELSE ((_col11 - 1)) END) (type: double), (- (UDFToDouble(_col1) - _col2)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 - Statistics: Num rows: 189 Data size: 45860 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 379 Data size: 88080 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -175,7 +175,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 189 Data size: 45860 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 379 Data size: 88080 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: boolean), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: float), _col10 (type: float), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double) Execution mode: vectorized Map Vectorization: @@ -195,10 +195,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey3 (type: timestamp), KEY.reducesinkkey1 (type: float), KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), KEY.reducesinkkey2 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: float), VALUE._col6 (type: float), VALUE._col7 (type: float), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: bigint), VALUE._col11 (type: double), VALUE._col12 (type: double), VALUE._col13 (type: double), VALUE._col14 (type: double), VALUE._col15 (type: double), VALUE._col16 (type: double), VALUE._col17 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 - Statistics: Num rows: 189 Data size: 45860 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 379 Data size: 88080 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 189 Data size: 45860 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 379 Data size: 88080 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_15.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_15.q.out index f2d32b35f1..92d5961735 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_15.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_15.q.out @@ -80,7 +80,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cstring1 like '10%') or (cstring2 like '%ss%') or ((cint >= -75) and (UDFToShort(ctinyint) = csmallint) and (cdouble >= -3728.0D))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2491562 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2367002 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -89,7 +89,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 6:string, pattern 10%), FilterStringColLikeStringScalar(col 7:string, pattern %ss%), FilterExprAndExpr(children: FilterLongColGreaterEqualLongScalar(col 2:int, val -75), FilterLongColEqualLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint), FilterDoubleColGreaterEqualDoubleScalar(col 5:double, val -3728.0))) predicate: ((cstring1 like '10%') or (cstring2 like '%ss%') or ((cint >= -75) and (UDFToShort(ctinyint) = csmallint) and (cdouble >= -3728.0D))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2491562 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2367002 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cfloat (type: float), cboolean1 (type: boolean), cdouble (type: double), cstring1 (type: string), ctinyint (type: tinyint), cint (type: int), ctimestamp1 (type: timestamp), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 @@ -98,7 +98,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4, 10, 5, 6, 0, 2, 8, 4, 13, 14, 17, 18, 21] selectExpressions: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 13:double, CastLongToDouble(col 0:tinyint) -> 14:double, DoubleColMultiplyDoubleColumn(col 15:double, col 16:double)(children: CastLongToDouble(col 0:tinyint) -> 15:double, CastLongToDouble(col 0:tinyint) -> 16:double) -> 17:double, CastLongToDouble(col 2:int) -> 18:double, DoubleColMultiplyDoubleColumn(col 19:double, col 20:double)(children: CastLongToDouble(col 2:int) -> 19:double, CastLongToDouble(col 2:int) -> 20:double) -> 21:double - Statistics: Num rows: 12288 Data size: 2491562 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2367002 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col8), sum(_col7), count(_col0), min(_col2), sum(_col10), sum(_col9), count(_col4), sum(_col12), sum(_col11), count(_col5) Group By Vectorization: @@ -113,7 +113,7 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 - Statistics: Num rows: 6144 Data size: 1278652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 1216372 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) sort order: +++++++ @@ -123,7 +123,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 6144 Data size: 1278652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 1216372 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: bigint), _col14 (type: double), _col15 (type: double), _col16 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -144,11 +144,11 @@ STAGE PLANS: keys: KEY._col0 (type: float), KEY._col1 (type: boolean), KEY._col2 (type: double), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int), KEY._col6 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 - Statistics: Num rows: 3072 Data size: 639332 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 1216372 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp), power(((_col7 - ((_col8 * _col8) / _col9)) / CASE WHEN ((_col9 = 1L)) THEN (null) ELSE ((_col9 - 1)) END), 0.5) (type: double), (-26.28 - CAST( _col5 AS decimal(10,0))) (type: decimal(13,2)), _col10 (type: double), (_col2 * 79.553D) (type: double), (33.0 % _col0) (type: float), power(((_col11 - ((_col12 * _col12) / _col13)) / CASE WHEN ((_col13 = 1L)) THEN (null) ELSE ((_col13 - 1)) END), 0.5) (type: double), ((_col11 - ((_col12 * _col12) / _col13)) / _col13) (type: double), (-23.0D % _col2) (type: double), (- _col4) (type: tinyint), ((_col14 - ((_col15 * _col15) / _col16)) / CASE WHEN ((_col16 = 1L)) THEN (null) ELSE ((_col16 - 1)) END) (type: double), (UDFToFloat(_col5) - _col0) (type: float), (-23 % UDFToInteger(_col4)) (type: int), (- (-26.28 - CAST( _col5 AS decimal(10,0)))) (type: decimal(13,2)), power(((_col14 - ((_col15 * _col15) / _col16)) / _col16), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 3072 Data size: 1327460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 2592628 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -170,7 +170,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 3072 Data size: 1327460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 2592628 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col7 (type: double), _col8 (type: decimal(13,2)), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: tinyint), _col16 (type: double), _col17 (type: float), _col18 (type: int), _col19 (type: decimal(13,2)), _col20 (type: double) Execution mode: vectorized Map Vectorization: @@ -189,10 +189,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: float), KEY.reducesinkkey1 (type: boolean), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: tinyint), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey6 (type: timestamp), VALUE._col0 (type: double), VALUE._col1 (type: decimal(13,2)), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: tinyint), VALUE._col9 (type: double), VALUE._col10 (type: float), VALUE._col11 (type: int), VALUE._col12 (type: decimal(13,2)), VALUE._col13 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 3072 Data size: 1327460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 2592628 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3072 Data size: 1327460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 2592628 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_16.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_16.q.out index ae684e6cae..a3981ebf90 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_16.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_16.q.out @@ -57,7 +57,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cstring2 like '%b%') and ((cdouble >= -1.389D) or (cstring1 < 'a'))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2308074 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2183514 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -66,7 +66,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %b%), FilterExprOrExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5:double, val -1.389), FilterStringGroupColLessStringScalar(col 6:string, val a))) predicate: ((cstring2 like '%b%') and ((cdouble >= -1.389D) or (cstring1 < 'a'))) (type: boolean) - Statistics: Num rows: 6144 Data size: 1154088 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 1091808 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cstring1 (type: string), cdouble (type: double), ctimestamp1 (type: timestamp), (cdouble * cdouble) (type: double) outputColumnNames: _col0, _col1, _col2, _col3 @@ -75,7 +75,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [6, 5, 8, 13] selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 13:double - Statistics: Num rows: 6144 Data size: 1154088 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 1091808 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col1), sum(_col3), sum(_col1), min(_col1) Group By Vectorization: @@ -90,7 +90,7 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 3072 Data size: 455172 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3072 Data size: 424052 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) sort order: +++ @@ -100,7 +100,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 3072 Data size: 455172 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3072 Data size: 424052 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double) Execution mode: vectorized Map Vectorization: @@ -122,14 +122,14 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: double), KEY._col2 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1536 Data size: 227586 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3072 Data size: 424052 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp), (_col1 - 9763215.5639D) (type: double), (- (_col1 - 9763215.5639D)) (type: double), _col3 (type: bigint), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), (- power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5)) (type: double), (power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) * UDFToDouble(_col3)) (type: double), _col6 (type: double), (9763215.5639D / _col1) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 1536 Data size: 461058 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3072 Data size: 890996 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1536 Data size: 461058 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3072 Data size: 890996 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_17.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_17.q.out index 6adb1289d7..5fed7fcd2b 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_17.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_17.q.out @@ -65,7 +65,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cbigint > -23L) and ((ctinyint >= 33Y) or (UDFToLong(csmallint) >= cbigint) or (UDFToDouble(cfloat) = cdouble)) and ((cdouble <> 988888.0D) or (CAST( cint AS decimal(13,3)) > -863.257))) (type: boolean) - Statistics: Num rows: 12288 Data size: 1647550 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 1522990 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -74,7 +74,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 3:bigint, val -23), FilterExprOrExpr(children: FilterLongColGreaterEqualLongScalar(col 0:tinyint, val 33), FilterLongColGreaterEqualLongColumn(col 1:bigint, col 3:bigint)(children: col 1:smallint), FilterDoubleColEqualDoubleColumn(col 4:double, col 5:double)(children: col 4:float)), FilterExprOrExpr(children: FilterDoubleColNotEqualDoubleScalar(col 5:double, val 988888.0), FilterDecimalColGreaterDecimalScalar(col 13:decimal(13,3), val -863.257)(children: CastLongToDecimal(col 2:int) -> 13:decimal(13,3)))) predicate: ((cbigint > -23L) and ((ctinyint >= 33Y) or (UDFToLong(csmallint) >= cbigint) or (UDFToDouble(cfloat) = cdouble)) and ((cdouble <> 988888.0D) or (CAST( cint AS decimal(13,3)) > -863.257))) (type: boolean) - Statistics: Num rows: 6141 Data size: 823456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6141 Data size: 761216 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cfloat (type: float), cstring1 (type: string), cint (type: int), ctimestamp1 (type: timestamp), cdouble (type: double), cbigint (type: bigint), (UDFToDouble(cfloat) / UDFToDouble(ctinyint)) (type: double), (UDFToLong(cint) % cbigint) (type: bigint), (- cdouble) (type: double), (cdouble + (UDFToDouble(cfloat) / UDFToDouble(ctinyint))) (type: double), (cdouble / UDFToDouble(cint)) (type: double), (- (- cdouble)) (type: double), (9763215.5639 % CAST( cbigint AS decimal(19,0))) (type: decimal(11,4)), (2563.58D + (- (- cdouble))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 @@ -83,7 +83,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4, 6, 2, 8, 5, 3, 15, 16, 17, 20, 22, 24, 26, 29] selectExpressions: DoubleColDivideDoubleColumn(col 4:double, col 14:double)(children: col 4:float, CastLongToDouble(col 0:tinyint) -> 14:double) -> 15:double, LongColModuloLongColumn(col 2:bigint, col 3:bigint)(children: col 2:int) -> 16:bigint, DoubleColUnaryMinus(col 5:double) -> 17:double, DoubleColAddDoubleColumn(col 5:double, col 19:double)(children: DoubleColDivideDoubleColumn(col 4:double, col 18:double)(children: col 4:float, CastLongToDouble(col 0:tinyint) -> 18:double) -> 19:double) -> 20:double, DoubleColDivideDoubleColumn(col 5:double, col 21:double)(children: CastLongToDouble(col 2:int) -> 21:double) -> 22:double, DoubleColUnaryMinus(col 23:double)(children: DoubleColUnaryMinus(col 5:double) -> 23:double) -> 24:double, DecimalScalarModuloDecimalColumn(val 9763215.5639, col 25:decimal(19,0))(children: CastLongToDecimal(col 3:bigint) -> 25:decimal(19,0)) -> 26:decimal(11,4), DoubleScalarAddDoubleColumn(val 2563.58, col 28:double)(children: DoubleColUnaryMinus(col 27:double)(children: DoubleColUnaryMinus(col 5:double) -> 27:double) -> 28:double) -> 29:double - Statistics: Num rows: 6141 Data size: 1818460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6141 Data size: 1756220 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col5 (type: bigint), _col0 (type: float) sort order: ++ @@ -92,7 +92,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 6141 Data size: 1818460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6141 Data size: 1756220 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: double) Execution mode: vectorized Map Vectorization: @@ -112,10 +112,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: float), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: timestamp), VALUE._col3 (type: double), KEY.reducesinkkey0 (type: bigint), VALUE._col4 (type: double), VALUE._col5 (type: bigint), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: decimal(11,4)), VALUE._col11 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 6141 Data size: 1818460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6141 Data size: 1756220 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6141 Data size: 1818460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6141 Data size: 1756220 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_2.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_2.q.out index 1b2800f55b..8ac5aafc03 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_2.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_2.q.out @@ -63,7 +63,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: (((cdouble < UDFToDouble(ctinyint)) and ((UDFToDouble(ctimestamp2) <> -10669.0D) or (cint < 359))) or ((ctimestamp1 < ctimestamp2) and (cstring2 like 'b%') and (cfloat <= -5638.15))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2157324 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 1908244 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -72,7 +72,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 5:double, col 13:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterExprOrExpr(children: FilterDoubleColNotEqualDoubleScalar(col 14:double, val -10669.0)(children: CastTimestampToDouble(col 9:timestamp) -> 14:double), FilterLongColLessLongScalar(col 2:int, val 359))), FilterExprAndExpr(children: FilterTimestampColLessTimestampColumn(col 8:timestamp, col 9:timestamp), FilterStringColLikeStringScalar(col 7:string, pattern b%), FilterDoubleColLessEqualDoubleScalar(col 4:float, val -5638.14990234375))) predicate: (((cdouble < UDFToDouble(ctinyint)) and ((UDFToDouble(ctimestamp2) <> -10669.0D) or (cint < 359))) or ((ctimestamp1 < ctimestamp2) and (cstring2 like 'b%') and (cfloat <= -5638.15))) (type: boolean) - Statistics: Num rows: 4096 Data size: 719232 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4096 Data size: 636272 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: csmallint (type: smallint), cfloat (type: float), cbigint (type: bigint), ctinyint (type: tinyint), cdouble (type: double), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -81,7 +81,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 4, 3, 0, 5, 15, 18] selectExpressions: CastLongToDouble(col 3:bigint) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 3:bigint) -> 16:double, CastLongToDouble(col 3:bigint) -> 17:double) -> 18:double - Statistics: Num rows: 4096 Data size: 719232 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4096 Data size: 636272 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), count(_col0), sum(_col1), sum(_col6), sum(_col5), count(_col2), count(), min(_col3), sum(_col4), count(_col4) Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_3.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_3.q.out index 4d3f0b6603..1c87b710e8 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_3.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_3.q.out @@ -68,7 +68,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: (((UDFToFloat(cint) <= cfloat) and (CAST( cbigint AS decimal(22,3)) <> 79.553) and (UDFToDouble(ctimestamp2) = -29071.0D)) or ((UDFToDouble(cbigint) > cdouble) and (CAST( csmallint AS decimal(8,3)) >= 79.553) and (ctimestamp1 > ctimestamp2))) (type: boolean) - Statistics: Num rows: 12288 Data size: 1276620 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 1027540 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -77,7 +77,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 13:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 13:float), FilterDecimalColNotEqualDecimalScalar(col 14:decimal(22,3), val 79.553)(children: CastLongToDecimal(col 3:bigint) -> 14:decimal(22,3)), FilterDoubleColEqualDoubleScalar(col 15:double, val -29071.0)(children: CastTimestampToDouble(col 9:timestamp) -> 15:double)), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 16:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 16:double), FilterDecimalColGreaterEqualDecimalScalar(col 17:decimal(8,3), val 79.553)(children: CastLongToDecimal(col 1:smallint) -> 17:decimal(8,3)), FilterTimestampColGreaterTimestampColumn(col 8:timestamp, col 9:timestamp))) predicate: (((UDFToFloat(cint) <= cfloat) and (CAST( cbigint AS decimal(22,3)) <> 79.553) and (UDFToDouble(ctimestamp2) = -29071.0D)) or ((UDFToDouble(cbigint) > cdouble) and (CAST( csmallint AS decimal(8,3)) >= 79.553) and (ctimestamp1 > ctimestamp2))) (type: boolean) - Statistics: Num rows: 2503 Data size: 260060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2503 Data size: 209380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: csmallint (type: smallint), ctinyint (type: tinyint), cfloat (type: float), cint (type: int), UDFToDouble(csmallint) (type: double), (UDFToDouble(csmallint) * UDFToDouble(csmallint)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 @@ -86,7 +86,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 0, 4, 2, 18, 21, 22, 25, 4, 26, 27, 30] selectExpressions: CastLongToDouble(col 1:smallint) -> 18:double, DoubleColMultiplyDoubleColumn(col 19:double, col 20:double)(children: CastLongToDouble(col 1:smallint) -> 19:double, CastLongToDouble(col 1:smallint) -> 20:double) -> 21:double, CastLongToDouble(col 0:tinyint) -> 22:double, DoubleColMultiplyDoubleColumn(col 23:double, col 24:double)(children: CastLongToDouble(col 0:tinyint) -> 23:double, CastLongToDouble(col 0:tinyint) -> 24:double) -> 25:double, DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 26:double, CastLongToDouble(col 2:int) -> 27:double, DoubleColMultiplyDoubleColumn(col 28:double, col 29:double)(children: CastLongToDouble(col 2:int) -> 28:double, CastLongToDouble(col 2:int) -> 29:double) -> 30:double - Statistics: Num rows: 2503 Data size: 260060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2503 Data size: 209380 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col5), sum(_col4), count(_col0), sum(_col7), sum(_col6), count(_col1), sum(_col9), sum(_col8), count(_col2), sum(_col2), sum(_col3), count(_col3), sum(_col11), sum(_col10) Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_5.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_5.q.out index f100fff487..9e99c47048 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_5.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_5.q.out @@ -57,7 +57,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: (((cstring1 like '%b%') and cboolean2 is not null) or ((UDFToDouble(ctinyint) = cdouble) and (cstring2 like 'a') and ctimestamp2 is not null)) (type: boolean) - Statistics: Num rows: 12288 Data size: 2454862 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2330342 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -66,7 +66,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 6:string, pattern %b%), SelectColumnIsNotNull(col 11:boolean)), FilterExprAndExpr(children: FilterDoubleColEqualDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterStringColLikeStringScalar(col 7:string, pattern a), SelectColumnIsNotNull(col 9:timestamp))) predicate: (((cstring1 like '%b%') and cboolean2 is not null) or ((UDFToDouble(ctinyint) = cdouble) and (cstring2 like 'a') and ctimestamp2 is not null)) (type: boolean) - Statistics: Num rows: 7658 Data size: 1529972 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6879 Data size: 1304690 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int) outputColumnNames: ctinyint, csmallint, cint @@ -74,7 +74,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 7658 Data size: 1529972 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6879 Data size: 1304690 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(csmallint), count(), min(csmallint), sum(cint), max(ctinyint) Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_7.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_7.q.out index d1a1ae4065..3a8c788cb5 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_7.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_7.q.out @@ -71,7 +71,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: (((cdouble > 988888.0D) or ((UDFToDouble(ctimestamp2) > -28815.0D) and (cdouble <= 3569.0D))) and ((UDFToDouble(ctimestamp1) <= -28800.0D) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and (ctinyint <> 0Y)) (type: boolean) - Statistics: Num rows: 12288 Data size: 3019778 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2770698 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -80,7 +80,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterExprOrExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 988888.0), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 13:double, val -28815.0)(children: CastTimestampToDouble(col 9:timestamp) -> 13:double), FilterDoubleColLessEqualDoubleScalar(col 5:double, val 3569.0))), FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleScalar(col 14:double, val -28800.0)(children: CastTimestampToDouble(col 8:timestamp) -> 14:double), FilterLongColEqualLongColumn(col 0:int, col 2:int)(children: col 0:tinyint), FilterStringColLikeStringScalar(col 7:string, pattern ss)), FilterLongColNotEqualLongScalar(col 0:tinyint, val 0)) predicate: (((cdouble > 988888.0D) or ((UDFToDouble(ctimestamp2) > -28815.0D) and (cdouble <= 3569.0D))) and ((UDFToDouble(ctimestamp1) <= -28800.0D) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and (ctinyint <> 0Y)) (type: boolean) - Statistics: Num rows: 11033 Data size: 2711364 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11033 Data size: 2487724 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -89,7 +89,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [10, 3, 1, 0, 8, 6, 15, 16, 17, 18, 20, 22, 23, 24, 26] selectExpressions: LongColAddLongColumn(col 3:bigint, col 3:bigint) -> 15:bigint, LongColModuloLongScalar(col 1:int, val -257)(children: col 1:smallint) -> 16:int, LongColUnaryMinus(col 1:smallint) -> 17:smallint, LongColUnaryMinus(col 0:tinyint) -> 18:tinyint, LongColAddLongScalar(col 19:int, val 17)(children: LongColUnaryMinus(col 0:tinyint) -> 19:tinyint) -> 20:int, LongColMultiplyLongColumn(col 3:bigint, col 21:bigint)(children: LongColUnaryMinus(col 1:smallint) -> 21:smallint) -> 22:bigint, LongColModuloLongColumn(col 2:int, col 1:int)(children: col 1:smallint) -> 23:int, LongColUnaryMinus(col 0:tinyint) -> 24:tinyint, LongColModuloLongColumn(col 25:tinyint, col 0:tinyint)(children: LongColUnaryMinus(col 0:tinyint) -> 25:tinyint) -> 26:tinyint - Statistics: Num rows: 11033 Data size: 1865892 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11033 Data size: 1754052 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint) sort order: +++++++++++++++ @@ -98,7 +98,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 11033 Data size: 1865892 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11033 Data size: 1754052 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -118,13 +118,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: smallint), KEY.reducesinkkey3 (type: tinyint), KEY.reducesinkkey4 (type: timestamp), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: bigint), KEY.reducesinkkey7 (type: int), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 11033 Data size: 1865892 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11033 Data size: 1754052 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 - Statistics: Num rows: 25 Data size: 4380 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4180 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 25 Data size: 4380 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4180 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -290,7 +290,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: (((cdouble > 988888.0D) or ((UDFToDouble(ctimestamp2) > -28792.315D) and (cdouble <= 3569.0D))) and ((UDFToDouble(ctimestamp1) <= -28800.0D) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and (ctinyint <> 0Y)) (type: boolean) - Statistics: Num rows: 12288 Data size: 3019778 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2770698 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -299,7 +299,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterExprOrExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 988888.0), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 13:double, val -28792.315)(children: CastTimestampToDouble(col 9:timestamp) -> 13:double), FilterDoubleColLessEqualDoubleScalar(col 5:double, val 3569.0))), FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleScalar(col 14:double, val -28800.0)(children: CastTimestampToDouble(col 8:timestamp) -> 14:double), FilterLongColEqualLongColumn(col 0:int, col 2:int)(children: col 0:tinyint), FilterStringColLikeStringScalar(col 7:string, pattern ss)), FilterLongColNotEqualLongScalar(col 0:tinyint, val 0)) predicate: (((cdouble > 988888.0D) or ((UDFToDouble(ctimestamp2) > -28792.315D) and (cdouble <= 3569.0D))) and ((UDFToDouble(ctimestamp1) <= -28800.0D) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and (ctinyint <> 0Y)) (type: boolean) - Statistics: Num rows: 11033 Data size: 2711364 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11033 Data size: 2487724 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -308,7 +308,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [10, 3, 1, 0, 8, 6, 15, 16, 17, 18, 20, 22, 23, 24, 26] selectExpressions: LongColAddLongColumn(col 3:bigint, col 3:bigint) -> 15:bigint, LongColModuloLongScalar(col 1:int, val -257)(children: col 1:smallint) -> 16:int, LongColUnaryMinus(col 1:smallint) -> 17:smallint, LongColUnaryMinus(col 0:tinyint) -> 18:tinyint, LongColAddLongScalar(col 19:int, val 17)(children: LongColUnaryMinus(col 0:tinyint) -> 19:tinyint) -> 20:int, LongColMultiplyLongColumn(col 3:bigint, col 21:bigint)(children: LongColUnaryMinus(col 1:smallint) -> 21:smallint) -> 22:bigint, LongColModuloLongColumn(col 2:int, col 1:int)(children: col 1:smallint) -> 23:int, LongColUnaryMinus(col 0:tinyint) -> 24:tinyint, LongColModuloLongColumn(col 25:tinyint, col 0:tinyint)(children: LongColUnaryMinus(col 0:tinyint) -> 25:tinyint) -> 26:tinyint - Statistics: Num rows: 11033 Data size: 1865892 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11033 Data size: 1754052 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint) sort order: +++++++++++++++ @@ -317,7 +317,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 11033 Data size: 1865892 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11033 Data size: 1754052 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -337,13 +337,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: smallint), KEY.reducesinkkey3 (type: tinyint), KEY.reducesinkkey4 (type: timestamp), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: bigint), KEY.reducesinkkey7 (type: int), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 11033 Data size: 1865892 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11033 Data size: 1754052 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 - Statistics: Num rows: 25 Data size: 4380 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4180 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 25 Data size: 4380 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4180 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_8.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_8.q.out index b15b1452a7..afc9c8e159 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_8.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_8.q.out @@ -67,7 +67,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cfloat < -6432.0) or ((UDFToDouble(ctimestamp1) <= 10.0D) and (UDFToDouble(ctimestamp2) <> 16.0D) and cstring2 is not null) or ((cdouble = 988888.0D) and cboolean1 is not null)) (type: boolean) - Statistics: Num rows: 12288 Data size: 2983078 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2733998 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -76,7 +76,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterDoubleColLessDoubleScalar(col 4:float, val -6432.0), FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleScalar(col 13:double, val 10.0)(children: CastTimestampToDouble(col 8:timestamp) -> 13:double), FilterDoubleColNotEqualDoubleScalar(col 14:double, val 16.0)(children: CastTimestampToDouble(col 9:timestamp) -> 14:double), SelectColumnIsNotNull(col 7:string)), FilterExprAndExpr(children: FilterDoubleColEqualDoubleScalar(col 5:double, val 988888.0), SelectColumnIsNotNull(col 10:boolean))) predicate: ((cfloat < -6432.0) or ((UDFToDouble(ctimestamp1) <= 10.0D) and (UDFToDouble(ctimestamp2) <> 16.0D) and cstring2 is not null) or ((cdouble = 988888.0D) and cboolean1 is not null)) (type: boolean) - Statistics: Num rows: 3059 Data size: 742850 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3059 Data size: 680930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15D - cdouble) (type: double), (cdouble * -257.0D) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - cfloat) (type: float), (- cfloat) (type: float), ((-5638.15D - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 @@ -85,7 +85,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [8, 5, 10, 6, 4, 15, 16, 17, 19, 22, 23, 24, 25, 29] selectExpressions: DoubleColUnaryMinus(col 5:double) -> 15:double, DoubleScalarSubtractDoubleColumn(val -5638.15, col 5:double) -> 16:double, DoubleColMultiplyDoubleScalar(col 5:double, val -257.0) -> 17:double, DoubleColAddDoubleColumn(col 18:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 18:float) -> 19:float, DoubleColAddDoubleColumn(col 20:double, col 21:double)(children: DoubleColUnaryMinus(col 5:double) -> 20:double, CastLongToDouble(col 3:bigint) -> 21:double) -> 22:double, DoubleColUnaryMinus(col 5:double) -> 23:double, DoubleScalarSubtractDoubleColumn(val -1.3890000581741333, col 4:float) -> 24:float, DoubleColUnaryMinus(col 4:float) -> 25:float, DoubleColAddDoubleColumn(col 26:double, col 28:double)(children: DoubleScalarSubtractDoubleColumn(val -5638.15, col 5:double) -> 26:double, DoubleColAddDoubleColumn(col 27:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 27:float) -> 28:float) -> 29:double - Statistics: Num rows: 3059 Data size: 557250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3059 Data size: 526290 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: float), _col13 (type: double) sort order: ++++++++++++++ @@ -94,7 +94,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 3059 Data size: 557250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3059 Data size: 526290 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -114,13 +114,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: boolean), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: float), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: float), KEY.reducesinkkey13 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 3059 Data size: 557250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3059 Data size: 526290 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -273,7 +273,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cfloat < -6432.0) or ((UDFToDouble(ctimestamp1) <= 12.503D) and (UDFToDouble(ctimestamp2) <> 11.998D) and cstring2 is not null) or ((cdouble = 988888.0D) and cboolean1 is not null)) (type: boolean) - Statistics: Num rows: 12288 Data size: 2983078 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2733998 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -282,7 +282,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterDoubleColLessDoubleScalar(col 4:float, val -6432.0), FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleScalar(col 13:double, val 12.503)(children: CastTimestampToDouble(col 8:timestamp) -> 13:double), FilterDoubleColNotEqualDoubleScalar(col 14:double, val 11.998)(children: CastTimestampToDouble(col 9:timestamp) -> 14:double), SelectColumnIsNotNull(col 7:string)), FilterExprAndExpr(children: FilterDoubleColEqualDoubleScalar(col 5:double, val 988888.0), SelectColumnIsNotNull(col 10:boolean))) predicate: ((cfloat < -6432.0) or ((UDFToDouble(ctimestamp1) <= 12.503D) and (UDFToDouble(ctimestamp2) <> 11.998D) and cstring2 is not null) or ((cdouble = 988888.0D) and cboolean1 is not null)) (type: boolean) - Statistics: Num rows: 3059 Data size: 742850 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3059 Data size: 680930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15D - cdouble) (type: double), (cdouble * -257.0D) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - cfloat) (type: float), (- cfloat) (type: float), ((-5638.15D - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 @@ -291,7 +291,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [8, 5, 10, 6, 4, 15, 16, 17, 19, 22, 23, 24, 25, 29] selectExpressions: DoubleColUnaryMinus(col 5:double) -> 15:double, DoubleScalarSubtractDoubleColumn(val -5638.15, col 5:double) -> 16:double, DoubleColMultiplyDoubleScalar(col 5:double, val -257.0) -> 17:double, DoubleColAddDoubleColumn(col 18:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 18:float) -> 19:float, DoubleColAddDoubleColumn(col 20:double, col 21:double)(children: DoubleColUnaryMinus(col 5:double) -> 20:double, CastLongToDouble(col 3:bigint) -> 21:double) -> 22:double, DoubleColUnaryMinus(col 5:double) -> 23:double, DoubleScalarSubtractDoubleColumn(val -1.3890000581741333, col 4:float) -> 24:float, DoubleColUnaryMinus(col 4:float) -> 25:float, DoubleColAddDoubleColumn(col 26:double, col 28:double)(children: DoubleScalarSubtractDoubleColumn(val -5638.15, col 5:double) -> 26:double, DoubleColAddDoubleColumn(col 27:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 27:float) -> 28:float) -> 29:double - Statistics: Num rows: 3059 Data size: 557250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3059 Data size: 526290 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: float), _col13 (type: double) sort order: ++++++++++++++ @@ -300,7 +300,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 3059 Data size: 557250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3059 Data size: 526290 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -320,13 +320,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: boolean), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: float), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: float), KEY.reducesinkkey13 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 3059 Data size: 557250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3059 Data size: 526290 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/parquet_vectorization_9.q.out b/ql/src/test/results/clientpositive/parquet_vectorization_9.q.out index ae684e6cae..a3981ebf90 100644 --- a/ql/src/test/results/clientpositive/parquet_vectorization_9.q.out +++ b/ql/src/test/results/clientpositive/parquet_vectorization_9.q.out @@ -57,7 +57,7 @@ STAGE PLANS: TableScan alias: alltypesparquet filterExpr: ((cstring2 like '%b%') and ((cdouble >= -1.389D) or (cstring1 < 'a'))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2308074 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2183514 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -66,7 +66,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %b%), FilterExprOrExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5:double, val -1.389), FilterStringGroupColLessStringScalar(col 6:string, val a))) predicate: ((cstring2 like '%b%') and ((cdouble >= -1.389D) or (cstring1 < 'a'))) (type: boolean) - Statistics: Num rows: 6144 Data size: 1154088 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 1091808 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cstring1 (type: string), cdouble (type: double), ctimestamp1 (type: timestamp), (cdouble * cdouble) (type: double) outputColumnNames: _col0, _col1, _col2, _col3 @@ -75,7 +75,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [6, 5, 8, 13] selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 13:double - Statistics: Num rows: 6144 Data size: 1154088 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 1091808 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col1), sum(_col3), sum(_col1), min(_col1) Group By Vectorization: @@ -90,7 +90,7 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 3072 Data size: 455172 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3072 Data size: 424052 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) sort order: +++ @@ -100,7 +100,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 3072 Data size: 455172 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3072 Data size: 424052 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double) Execution mode: vectorized Map Vectorization: @@ -122,14 +122,14 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: double), KEY._col2 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1536 Data size: 227586 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3072 Data size: 424052 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp), (_col1 - 9763215.5639D) (type: double), (- (_col1 - 9763215.5639D)) (type: double), _col3 (type: bigint), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), (- power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5)) (type: double), (power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) * UDFToDouble(_col3)) (type: double), _col6 (type: double), (9763215.5639D / _col1) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 1536 Data size: 461058 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3072 Data size: 890996 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1536 Data size: 461058 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3072 Data size: 890996 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/partitioned_table_stats.q.out b/ql/src/test/results/clientpositive/partitioned_table_stats.q.out index 4376bedc01..0ce2d3e490 100644 --- a/ql/src/test/results/clientpositive/partitioned_table_stats.q.out +++ b/ql/src/test/results/clientpositive/partitioned_table_stats.q.out @@ -198,8 +198,8 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats_n0 col_name ts data_type timestamp -min 1325379723 -max 1325379723 +min 2012-01-01 01:02:03 +max 2012-01-01 01:02:03 num_nulls 1 distinct_count 1 avg_col_len @@ -485,8 +485,8 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats_n0 col_name ts data_type timestamp -min 1325379722 -max 1325379723 +min 2012-01-01 01:02:02 +max 2012-01-01 01:02:03 num_nulls 1 distinct_count 2 avg_col_len @@ -772,8 +772,8 @@ POSTHOOK: type: DESCTABLE POSTHOOK: Input: default@datatype_stats_n0 col_name ts data_type timestamp -min 1325379722 -max 1325379724 +min 2012-01-01 01:02:02 +max 2012-01-01 01:02:04 num_nulls 1 distinct_count 3 avg_col_len diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out b/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out index 2ef7b133f8..6ade6afc7b 100644 --- a/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out +++ b/ql/src/test/results/clientpositive/tez/explainanalyze_3.q.out @@ -767,7 +767,7 @@ Stage-3 Stage-1 Reducer 2 File Output Operator [FS_9] - Group By Operator [GBY_7] (rows=1/1 width=2760) + Group By Operator [GBY_7] (rows=1/1 width=2824) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["compute_stats(VALUE._col0, 'hll')","compute_stats(VALUE._col2, 'hll')","compute_stats(VALUE._col3, 'hll')","compute_stats(VALUE._col4, 'hll')","compute_stats(VALUE._col5, 'hll')"] <-Map 1 [CUSTOM_SIMPLE_EDGE] vectorized File Output Operator [FS_14] diff --git a/ql/src/test/results/clientpositive/tez/explainanalyze_4.q.out b/ql/src/test/results/clientpositive/tez/explainanalyze_4.q.out index 16ecfaa9a6..4eb698ecd1 100644 --- a/ql/src/test/results/clientpositive/tez/explainanalyze_4.q.out +++ b/ql/src/test/results/clientpositive/tez/explainanalyze_4.q.out @@ -48,29 +48,29 @@ Stage-0 Stage-1 Reducer 3 File Output Operator [FS_12] - Select Operator [SEL_11] (rows=4626/10 width=552) + Select Operator [SEL_11] (rows=4626/10 width=528) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_10] - Merge Join Operator [MERGEJOIN_27] (rows=4626/10 width=552) + Merge Join Operator [MERGEJOIN_27] (rows=4626/10 width=528) Conds:RS_6._col2=RS_7._col2(Inner),Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"] <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_6] PartitionCols:_col2 - Select Operator [SEL_2] (rows=3078/10 width=251) + Select Operator [SEL_2] (rows=3078/10 width=231) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - Filter Operator [FIL_15] (rows=3078/10 width=251) + Filter Operator [FIL_15] (rows=3078/10 width=231) predicate:cint BETWEEN 1000000 AND 3000000 - TableScan [TS_0] (rows=12288/12288 width=251) + TableScan [TS_0] (rows=12288/12288 width=231) default@alltypesorc,a,Tbl:COMPLETE,Col:COMPLETE,Output:["ctinyint","csmallint","cint","cbigint","cfloat","cdouble","cstring1","cstring2","ctimestamp1","ctimestamp2","cboolean1","cboolean2"] <-Map 4 [SIMPLE_EDGE] SHUFFLE [RS_7] PartitionCols:_col2 - Select Operator [SEL_5] (rows=2298/10 width=251) + Select Operator [SEL_5] (rows=2298/10 width=231) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"] - Filter Operator [FIL_16] (rows=2298/10 width=251) + Filter Operator [FIL_16] (rows=2298/10 width=231) predicate:(cint BETWEEN 1000000 AND 3000000 and cbigint is not null) - TableScan [TS_3] (rows=12288/12288 width=251) + TableScan [TS_3] (rows=12288/12288 width=231) default@alltypesorc,b,Tbl:COMPLETE,Col:COMPLETE,Output:["ctinyint","csmallint","cint","cbigint","cfloat","cdouble","cstring1","cstring2","ctimestamp1","ctimestamp2","cboolean1","cboolean2"] PREHOOK: query: select diff --git a/ql/src/test/results/clientpositive/tez/explainuser_3.q.out b/ql/src/test/results/clientpositive/tez/explainuser_3.q.out index fa85521be4..da57efe03e 100644 --- a/ql/src/test/results/clientpositive/tez/explainuser_3.q.out +++ b/ql/src/test/results/clientpositive/tez/explainuser_3.q.out @@ -615,7 +615,7 @@ Stage-3 Stage-1 Reducer 2 File Output Operator [FS_10] - Group By Operator [GBY_8] (rows=1 width=2760) + Group By Operator [GBY_8] (rows=1 width=2824) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["compute_stats(VALUE._col0)","compute_stats(VALUE._col1)","compute_stats(VALUE._col2)","compute_stats(VALUE._col3)","compute_stats(VALUE._col4)"] <-Map 1 [CUSTOM_SIMPLE_EDGE] File Output Operator [FS_3] @@ -627,7 +627,7 @@ Stage-3 TableScan [TS_0] (rows=1 width=352) default@orc_merge5_n0,orc_merge5_n0,Tbl:COMPLETE,Col:NONE,Output:["userid","string1","subtype","decimal1","ts"] PARTITION_ONLY_SHUFFLE [RS_7] - Group By Operator [GBY_6] (rows=1 width=2696) + Group By Operator [GBY_6] (rows=1 width=2760) Output:["_col0","_col1","_col2","_col3","_col4"],aggregations:["compute_stats(userid, 'hll')","compute_stats(string1, 'hll')","compute_stats(subtype, 'hll')","compute_stats(decimal1, 'hll')","compute_stats(ts, 'hll')"] Select Operator [SEL_5] (rows=1 width=352) Output:["userid","string1","subtype","decimal1","ts"] diff --git a/ql/src/test/results/clientpositive/timestamp_comparison3.q.out b/ql/src/test/results/clientpositive/timestamp_comparison3.q.out new file mode 100644 index 0000000000..3977be77f7 --- /dev/null +++ b/ql/src/test/results/clientpositive/timestamp_comparison3.q.out @@ -0,0 +1,212 @@ +PREHOOK: query: create database timestamp_test_n123 +PREHOOK: type: CREATEDATABASE +PREHOOK: Output: database:timestamp_test_n123 +POSTHOOK: query: create database timestamp_test_n123 +POSTHOOK: type: CREATEDATABASE +POSTHOOK: Output: database:timestamp_test_n123 +PREHOOK: query: create table timestamp_test_n123.onecolumntable (ts timestamp) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:timestamp_test_n123 +PREHOOK: Output: timestamp_test_n123@onecolumntable +POSTHOOK: query: create table timestamp_test_n123.onecolumntable (ts timestamp) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:timestamp_test_n123 +POSTHOOK: Output: timestamp_test_n123@onecolumntable +PREHOOK: query: insert into timestamp_test_n123.onecolumntable values +('2015-01-01 00:00:00'), +('2015-01-02 00:00:00'), +('2015-01-03 00:00:00'), +('2015-01-04 00:00:00'), +('2015-01-05 00:00:00') +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: timestamp_test_n123@onecolumntable +POSTHOOK: query: insert into timestamp_test_n123.onecolumntable values +('2015-01-01 00:00:00'), +('2015-01-02 00:00:00'), +('2015-01-03 00:00:00'), +('2015-01-04 00:00:00'), +('2015-01-05 00:00:00') +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: timestamp_test_n123@onecolumntable +POSTHOOK: Lineage: onecolumntable.ts SCRIPT [] +PREHOOK: query: describe formatted timestamp_test_n123.onecolumntable ts +PREHOOK: type: DESCTABLE +PREHOOK: Input: timestamp_test_n123@onecolumntable +POSTHOOK: query: describe formatted timestamp_test_n123.onecolumntable ts +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: timestamp_test_n123@onecolumntable +col_name ts +data_type timestamp +min 2015-01-01 00:00:00 +max 2015-01-05 00:00:00 +num_nulls 0 +distinct_count 5 +avg_col_len +max_col_len +num_trues +num_falses +bit_vector HL +comment from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"ts\":\"true\"}} +PREHOOK: query: explain +select ts from timestamp_test_n123.onecolumntable +where ts >= cast('2015-01-02 00:00:00' as timestamp) + and ts <= cast('2015-01-04 00:00:00' as timestamp) +PREHOOK: type: QUERY +PREHOOK: Input: timestamp_test_n123@onecolumntable +#### A masked pattern was here #### +POSTHOOK: query: explain +select ts from timestamp_test_n123.onecolumntable +where ts >= cast('2015-01-02 00:00:00' as timestamp) + and ts <= cast('2015-01-04 00:00:00' as timestamp) +POSTHOOK: type: QUERY +POSTHOOK: Input: timestamp_test_n123@onecolumntable +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: onecolumntable + filterExpr: ts BETWEEN TIMESTAMP'2015-01-02 00:00:00' AND TIMESTAMP'2015-01-04 00:00:00' (type: boolean) + Statistics: Num rows: 5 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ts BETWEEN TIMESTAMP'2015-01-02 00:00:00' AND TIMESTAMP'2015-01-04 00:00:00' (type: boolean) + Statistics: Num rows: 3 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 3 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 3 Data size: 120 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select ts from timestamp_test_n123.onecolumntable +where ts >= cast('2015-01-02 00:00:00' as timestamp) + and ts <= cast('2015-01-03 00:00:00' as timestamp) +PREHOOK: type: QUERY +PREHOOK: Input: timestamp_test_n123@onecolumntable +#### A masked pattern was here #### +POSTHOOK: query: explain +select ts from timestamp_test_n123.onecolumntable +where ts >= cast('2015-01-02 00:00:00' as timestamp) + and ts <= cast('2015-01-03 00:00:00' as timestamp) +POSTHOOK: type: QUERY +POSTHOOK: Input: timestamp_test_n123@onecolumntable +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: onecolumntable + filterExpr: ts BETWEEN TIMESTAMP'2015-01-02 00:00:00' AND TIMESTAMP'2015-01-03 00:00:00' (type: boolean) + Statistics: Num rows: 5 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ts BETWEEN TIMESTAMP'2015-01-02 00:00:00' AND TIMESTAMP'2015-01-03 00:00:00' (type: boolean) + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 2 Data size: 80 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: explain +select ts from timestamp_test_n123.onecolumntable +where ts >= cast('2015-01-01 00:00:00' as timestamp) + and ts <= cast('2015-01-08 00:00:00' as timestamp) +PREHOOK: type: QUERY +PREHOOK: Input: timestamp_test_n123@onecolumntable +#### A masked pattern was here #### +POSTHOOK: query: explain +select ts from timestamp_test_n123.onecolumntable +where ts >= cast('2015-01-01 00:00:00' as timestamp) + and ts <= cast('2015-01-08 00:00:00' as timestamp) +POSTHOOK: type: QUERY +POSTHOOK: Input: timestamp_test_n123@onecolumntable +#### A masked pattern was here #### +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: onecolumntable + filterExpr: ts BETWEEN TIMESTAMP'2015-01-01 00:00:00' AND TIMESTAMP'2015-01-08 00:00:00' (type: boolean) + Statistics: Num rows: 5 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + Filter Operator + predicate: ts BETWEEN TIMESTAMP'2015-01-01 00:00:00' AND TIMESTAMP'2015-01-08 00:00:00' (type: boolean) + Statistics: Num rows: 5 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + Select Operator + expressions: ts (type: timestamp) + outputColumnNames: _col0 + Statistics: Num rows: 5 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 200 Basic stats: COMPLETE Column stats: COMPLETE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: drop table timestamp_test_n123.onecolumntable +PREHOOK: type: DROPTABLE +PREHOOK: Input: timestamp_test_n123@onecolumntable +PREHOOK: Output: timestamp_test_n123@onecolumntable +POSTHOOK: query: drop table timestamp_test_n123.onecolumntable +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: timestamp_test_n123@onecolumntable +POSTHOOK: Output: timestamp_test_n123@onecolumntable +PREHOOK: query: drop database timestamp_test_n123 +PREHOOK: type: DROPDATABASE +PREHOOK: Input: database:timestamp_test_n123 +PREHOOK: Output: database:timestamp_test_n123 +POSTHOOK: query: drop database timestamp_test_n123 +POSTHOOK: type: DROPDATABASE +POSTHOOK: Input: database:timestamp_test_n123 +POSTHOOK: Output: database:timestamp_test_n123 diff --git a/ql/src/test/results/clientpositive/timestamp_ints_casts.q.out b/ql/src/test/results/clientpositive/timestamp_ints_casts.q.out index f687308d9e..572c49ea72 100644 --- a/ql/src/test/results/clientpositive/timestamp_ints_casts.q.out +++ b/ql/src/test/results/clientpositive/timestamp_ints_casts.q.out @@ -49,17 +49,17 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: ((cbigint % 250L) = 0L) (type: boolean) - Statistics: Num rows: 12288 Data size: 1684250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 1559690 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((cbigint % 250L) = 0L) (type: boolean) - Statistics: Num rows: 6144 Data size: 842180 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 779900 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: CAST( ctinyint AS TIMESTAMP) (type: timestamp), CAST( csmallint AS TIMESTAMP) (type: timestamp), CAST( cint AS TIMESTAMP) (type: timestamp), CAST( cbigint AS TIMESTAMP) (type: timestamp), CAST( cfloat AS TIMESTAMP) (type: timestamp), CAST( cdouble AS TIMESTAMP) (type: timestamp), CAST( cboolean1 AS TIMESTAMP) (type: timestamp), CAST( (cbigint * 0L) AS TIMESTAMP) (type: timestamp), ctimestamp1 (type: timestamp), CAST( cstring1 AS TIMESTAMP) (type: timestamp), CAST( substr(cstring1, 1, 1) AS TIMESTAMP) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 6144 Data size: 2703360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 2641080 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6144 Data size: 2703360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 2641080 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -187,17 +187,17 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: ((cbigint % 250L) = 0L) (type: boolean) - Statistics: Num rows: 12288 Data size: 1684250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 1559690 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((cbigint % 250L) = 0L) (type: boolean) - Statistics: Num rows: 6144 Data size: 842180 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 779900 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: CAST( ctinyint AS TIMESTAMP) (type: timestamp), CAST( csmallint AS TIMESTAMP) (type: timestamp), CAST( cint AS TIMESTAMP) (type: timestamp), CAST( cbigint AS TIMESTAMP) (type: timestamp), CAST( cfloat AS TIMESTAMP) (type: timestamp), CAST( cdouble AS TIMESTAMP) (type: timestamp), CAST( cboolean1 AS TIMESTAMP) (type: timestamp), CAST( (cbigint * 0L) AS TIMESTAMP) (type: timestamp), ctimestamp1 (type: timestamp), CAST( cstring1 AS TIMESTAMP) (type: timestamp), CAST( substr(cstring1, 1, 1) AS TIMESTAMP) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 6144 Data size: 2703360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 2641080 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6144 Data size: 2703360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 2641080 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/vector_aggregate_9.q.out b/ql/src/test/results/clientpositive/vector_aggregate_9.q.out index 0aa46a26c7..0c823afc4e 100644 --- a/ql/src/test/results/clientpositive/vector_aggregate_9.q.out +++ b/ql/src/test/results/clientpositive/vector_aggregate_9.q.out @@ -347,7 +347,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: vectortab2korc_n4 - Statistics: Num rows: 2000 Data size: 80000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 75760 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:t:tinyint, 1:si:smallint, 2:i:int, 3:b:bigint, 4:f:float, 5:d:double, 6:dc:decimal(38,18), 7:bo:boolean, 8:s:string, 9:s2:string, 10:ts:timestamp, 11:ts2:timestamp, 12:dt:date, 13:ROW__ID:struct] @@ -358,7 +358,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [10] - Statistics: Num rows: 2000 Data size: 80000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2000 Data size: 75760 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(ts), max(ts), sum(ts), count(ts) Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/vector_coalesce.q.out b/ql/src/test/results/clientpositive/vector_coalesce.q.out index 1ab2d6ebb1..dd7df44d7b 100644 --- a/ql/src/test/results/clientpositive/vector_coalesce.q.out +++ b/ql/src/test/results/clientpositive/vector_coalesce.q.out @@ -294,7 +294,7 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprOrExpr(children: SelectColumnIsNotNull(col 8:timestamp), SelectColumnIsNotNull(col 9:timestamp)) + predicateExpression: FilterExprOrExpr(children: SelectColumnIsNotNull(col 9:timestamp), SelectColumnIsNotNull(col 8:timestamp)) Select Vectorization: className: VectorSelectOperator native: true diff --git a/ql/src/test/results/clientpositive/vector_decimal_cast.q.out b/ql/src/test/results/clientpositive/vector_decimal_cast.q.out index 8889a7474d..2180e629a8 100644 --- a/ql/src/test/results/clientpositive/vector_decimal_cast.q.out +++ b/ql/src/test/results/clientpositive/vector_decimal_cast.q.out @@ -20,8 +20,8 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - filterExpr: (cboolean1 is not null and cint is not null and cdouble is not null and ctimestamp1 is not null) (type: boolean) - Statistics: Num rows: 12288 Data size: 638316 Basic stats: COMPLETE Column stats: COMPLETE + filterExpr: (cboolean1 is not null and cint is not null and ctimestamp1 is not null and cdouble is not null) (type: boolean) + Statistics: Num rows: 12288 Data size: 513756 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -29,9 +29,9 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 10:boolean), SelectColumnIsNotNull(col 2:int), SelectColumnIsNotNull(col 5:double), SelectColumnIsNotNull(col 8:timestamp)) - predicate: (cboolean1 is not null and cint is not null and cdouble is not null and ctimestamp1 is not null) (type: boolean) - Statistics: Num rows: 5112 Data size: 265564 Basic stats: COMPLETE Column stats: COMPLETE + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 10:boolean), SelectColumnIsNotNull(col 2:int), SelectColumnIsNotNull(col 8:timestamp), SelectColumnIsNotNull(col 5:double)) + predicate: (cboolean1 is not null and cint is not null and ctimestamp1 is not null and cdouble is not null) (type: boolean) + Statistics: Num rows: 3816 Data size: 159600 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cdouble (type: double), cint (type: int), cboolean1 (type: boolean), ctimestamp1 (type: timestamp), CAST( cdouble AS decimal(20,10)) (type: decimal(20,10)), CAST( cint AS decimal(23,14)) (type: decimal(23,14)), CAST( cboolean1 AS decimal(5,2)) (type: decimal(5,2)), CAST( ctimestamp1 AS decimal(15,0)) (type: decimal(15,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 @@ -40,19 +40,19 @@ STAGE PLANS: native: true projectedOutputColumnNums: [5, 2, 10, 8, 13, 14, 15, 16] selectExpressions: CastDoubleToDecimal(col 5:double) -> 13:decimal(20,10), CastLongToDecimal(col 2:int) -> 14:decimal(23,14), CastLongToDecimal(col 10:boolean) -> 15:decimal(5,2), CastTimestampToDecimal(col 8:timestamp) -> 16:decimal(15,0) - Statistics: Num rows: 5112 Data size: 2410700 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3816 Data size: 1760976 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 10 Data size: 4784 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 4704 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 10 Data size: 4784 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 4704 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -144,8 +144,8 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypes_small - filterExpr: (cboolean1 is not null and cint is not null and cdouble is not null and ctimestamp1 is not null) (type: boolean) - Statistics: Num rows: 12288 Data size: 638316 Basic stats: COMPLETE Column stats: COMPLETE + filterExpr: (cboolean1 is not null and cint is not null and ctimestamp1 is not null and cdouble is not null) (type: boolean) + Statistics: Num rows: 12288 Data size: 513756 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -153,9 +153,9 @@ STAGE PLANS: Filter Vectorization: className: VectorFilterOperator native: true - predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 10:boolean), SelectColumnIsNotNull(col 2:int), SelectColumnIsNotNull(col 5:double), SelectColumnIsNotNull(col 8:timestamp)) - predicate: (cboolean1 is not null and cint is not null and cdouble is not null and ctimestamp1 is not null) (type: boolean) - Statistics: Num rows: 5112 Data size: 265564 Basic stats: COMPLETE Column stats: COMPLETE + predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 10:boolean), SelectColumnIsNotNull(col 2:int), SelectColumnIsNotNull(col 8:timestamp), SelectColumnIsNotNull(col 5:double)) + predicate: (cboolean1 is not null and cint is not null and ctimestamp1 is not null and cdouble is not null) (type: boolean) + Statistics: Num rows: 3816 Data size: 159600 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cdouble (type: double), cint (type: int), cboolean1 (type: boolean), ctimestamp1 (type: timestamp), CAST( cdouble AS decimal(20,10)) (type: decimal(20,10)), CAST( cint AS decimal(23,14)) (type: decimal(23,14)), CAST( cboolean1 AS decimal(5,2)) (type: decimal(5,2)), CAST( ctimestamp1 AS decimal(15,0)) (type: decimal(15,0)) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7 @@ -164,19 +164,19 @@ STAGE PLANS: native: true projectedOutputColumnNums: [5, 2, 10, 8, 13, 14, 15, 16] selectExpressions: CastDoubleToDecimal(col 5:double) -> 13:decimal(20,10), CastLongToDecimal(col 2:int) -> 14:decimal(23,14), CastLongToDecimal(col 10:boolean) -> 15:decimal(5,2), CastTimestampToDecimal(col 8:timestamp) -> 16:decimal(15,0) - Statistics: Num rows: 5112 Data size: 2410700 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3816 Data size: 1760976 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 10 Limit Vectorization: className: VectorLimitOperator native: true - Statistics: Num rows: 10 Data size: 4784 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 4704 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 10 Data size: 4784 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 4704 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/vector_empty_where.q.out b/ql/src/test/results/clientpositive/vector_empty_where.q.out index 713357f89d..61f6d8005e 100644 --- a/ql/src/test/results/clientpositive/vector_empty_where.q.out +++ b/ql/src/test/results/clientpositive/vector_empty_where.q.out @@ -483,7 +483,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 528216 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 403656 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -492,7 +492,7 @@ STAGE PLANS: native: true predicateExpression: SelectColumnIsTrue(col 13:boolean)(children: CastTimestampToBoolean(col 8:timestamp) -> 13:boolean) predicate: ctimestamp1 (type: timestamp) - Statistics: Num rows: 6144 Data size: 264108 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 201828 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cint (type: int) outputColumnNames: cint @@ -500,7 +500,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [2] - Statistics: Num rows: 6144 Data size: 264108 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 201828 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator Group By Vectorization: className: VectorGroupByOperator diff --git a/ql/src/test/results/clientpositive/vector_interval_mapjoin.q.out b/ql/src/test/results/clientpositive/vector_interval_mapjoin.q.out index c5584e7646..e382b73b90 100644 --- a/ql/src/test/results/clientpositive/vector_interval_mapjoin.q.out +++ b/ql/src/test/results/clientpositive/vector_interval_mapjoin.q.out @@ -207,14 +207,14 @@ STAGE PLANS: TableScan alias: vectortab_b_1korc filterExpr: (dt is not null and CAST( ts AS DATE) is not null and s is not null) (type: boolean) - Statistics: Num rows: 1000 Data size: 186864 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 184664 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: (dt is not null and CAST( ts AS DATE) is not null and s is not null) (type: boolean) - Statistics: Num rows: 943 Data size: 176202 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 890 Data size: 164340 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: s (type: string), (dt - CAST( ts AS DATE)) (type: interval_day_time) outputColumnNames: _col0, _col1 - Statistics: Num rows: 943 Data size: 99958 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 890 Data size: 94340 Basic stats: COMPLETE Column stats: COMPLETE HashTable Sink Operator keys: 0 _col0 (type: string), _col1 (type: interval_day_time) @@ -226,7 +226,7 @@ STAGE PLANS: TableScan alias: vectortab_a_1korc filterExpr: (dt is not null and CAST( ts AS DATE) is not null and s is not null) (type: boolean) - Statistics: Num rows: 1000 Data size: 187480 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1000 Data size: 185480 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -235,7 +235,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: SelectColumnIsNotNull(col 12:date), SelectColumnIsNotNull(col 14:date)(children: CastTimestampToDate(col 10:timestamp) -> 14:date), SelectColumnIsNotNull(col 8:string)) predicate: (dt is not null and CAST( ts AS DATE) is not null and s is not null) (type: boolean) - Statistics: Num rows: 954 Data size: 178852 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 905 Data size: 167854 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: s (type: string), (dt - CAST( ts AS DATE)) (type: interval_day_time) outputColumnNames: _col0, _col1 @@ -244,7 +244,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [8, 16] selectExpressions: DateColSubtractDateColumn(col 12:date, col 15:date)(children: CastTimestampToDate(col 10:timestamp) -> 15:date) -> 16:interval_day_time - Statistics: Num rows: 954 Data size: 101124 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 905 Data size: 95930 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Inner Join 0 to 1 @@ -259,7 +259,7 @@ STAGE PLANS: nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Optimized Table and Supports Key Types IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col2 - Statistics: Num rows: 34600 Data size: 6920000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32218 Data size: 6443600 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col2 (type: string), _col1 (type: interval_day_time) outputColumnNames: _col0, _col1, _col2 @@ -267,13 +267,13 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 2, 1] - Statistics: Num rows: 34600 Data size: 6920000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32218 Data size: 6443600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 34600 Data size: 6920000 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 32218 Data size: 6443600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/vector_non_constant_in_expr.q.out b/ql/src/test/results/clientpositive/vector_non_constant_in_expr.q.out index d99cf4db00..58fa5b3b70 100644 --- a/ql/src/test/results/clientpositive/vector_non_constant_in_expr.q.out +++ b/ql/src/test/results/clientpositive/vector_non_constant_in_expr.q.out @@ -21,17 +21,17 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: ((cint = UDFToInteger(ctinyint)) or (UDFToLong(cint) = cbigint)) (type: boolean) - Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2844090 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: ((cint = UDFToInteger(ctinyint)) or (UDFToLong(cint) = cbigint)) (type: boolean) - Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2844090 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2844090 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 12288 Data size: 3093170 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2844090 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/vector_outer_join1.q.out b/ql/src/test/results/clientpositive/vector_outer_join1.q.out index b48ff791a7..aa6bffb988 100644 --- a/ql/src/test/results/clientpositive/vector_outer_join1.q.out +++ b/ql/src/test/results/clientpositive/vector_outer_join1.q.out @@ -253,14 +253,14 @@ STAGE PLANS: TableScan alias: cd filterExpr: cint is not null (type: boolean) - Statistics: Num rows: 15 Data size: 3745 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 15 Data size: 3545 Basic stats: COMPLETE Column stats: COMPLETE Filter Operator predicate: cint is not null (type: boolean) - Statistics: Num rows: 10 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 2520 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 10 Data size: 2640 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 10 Data size: 2520 Basic stats: COMPLETE Column stats: COMPLETE HashTable Sink Operator keys: 0 _col2 (type: int) @@ -271,7 +271,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: c - Statistics: Num rows: 15 Data size: 3745 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 15 Data size: 3545 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -282,7 +282,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11] - Statistics: Num rows: 15 Data size: 3745 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 15 Data size: 3545 Basic stats: COMPLETE Column stats: COMPLETE Map Join Operator condition map: Left Outer Join 0 to 1 @@ -297,13 +297,13 @@ STAGE PLANS: nativeConditionsMet: hive.mapjoin.optimized.hashtable IS true, hive.vectorized.execution.mapjoin.native.enabled IS true, One MapJoin Condition IS true, No nullsafe IS true, Small table vectorizes IS true, Outer Join has keys IS true, Optimized Table and Supports Key Types IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23 - Statistics: Num rows: 33 Data size: 14459 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 33 Data size: 14099 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 33 Data size: 14459 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 33 Data size: 14099 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/vector_outer_join3.q.out b/ql/src/test/results/clientpositive/vector_outer_join3.q.out index 0640aeec75..eab98e06be 100644 --- a/ql/src/test/results/clientpositive/vector_outer_join3.q.out +++ b/ql/src/test/results/clientpositive/vector_outer_join3.q.out @@ -248,7 +248,7 @@ left outer join small_alltypesorc_a_n1 hd POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a_n1 #### A masked pattern was here #### -{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_a_n1\"\n ],\n \"table:alias\": \"c\",\n \"inputs\": [],\n \"rowCount\": 20.0,\n \"avgRowSize\": 10.0,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n },\n {\n \"type\": \"SMALLINT\",\n \"nullable\": true,\n \"name\": \"csmallint\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"cint\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"cbigint\"\n },\n {\n \"type\": \"FLOAT\",\n \"nullable\": true,\n \"name\": \"cfloat\"\n },\n {\n \"type\": \"DOUBLE\",\n \"nullable\": true,\n \"name\": \"cdouble\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring1\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring2\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp1\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp2\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean1\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean2\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__INSIDE__FILE\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"INPUT__FILE__NAME\"\n },\n {\n \"fields\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"writeid\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"bucketid\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"rowid\"\n }\n ],\n \"name\": \"ROW__ID\"\n }\n ],\n \"colStats\": [\n {\n \"name\": \"cint\",\n \"ndv\": 8,\n \"minValue\": -738306196,\n \"maxValue\": 626923679\n },\n {\n \"name\": \"cstring1\",\n \"ndv\": 8\n },\n {\n \"name\": \"ctinyint\",\n \"ndv\": 2,\n \"minValue\": -64,\n \"maxValue\": -51\n },\n {\n \"name\": \"csmallint\",\n \"ndv\": 6,\n \"minValue\": -15920,\n \"maxValue\": -6907\n },\n {\n \"name\": \"cbigint\",\n \"ndv\": 15,\n \"minValue\": -1970551565,\n \"maxValue\": 1086455747\n },\n {\n \"name\": \"cfloat\",\n \"ndv\": 2,\n \"minValue\": -64.0,\n \"maxValue\": -51.0\n },\n {\n \"name\": \"cdouble\",\n \"ndv\": 6,\n \"minValue\": -15920.0,\n \"maxValue\": -6907.0\n },\n {\n \"name\": \"cstring2\",\n \"ndv\": 15\n },\n {\n \"name\": \"ctimestamp1\",\n \"ndv\": 0\n },\n {\n \"name\": \"ctimestamp2\",\n \"ndv\": 0\n },\n {\n \"name\": \"cboolean1\",\n \"ndv\": 2\n },\n {\n \"name\": \"cboolean2\",\n \"ndv\": 2\n }\n ]\n },\n {\n \"id\": \"1\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"cint\",\n \"cstring1\"\n ],\n \"exprs\": [\n {\n \"input\": 2,\n \"name\": \"$2\"\n },\n {\n \"input\": 6,\n \"name\": \"$6\"\n }\n ],\n \"rowCount\": 20.0\n },\n {\n \"id\": \"2\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_a_n1\"\n ],\n \"table:alias\": \"cd\",\n \"inputs\": [],\n \"rowCount\": 20.0,\n \"avgRowSize\": 4.0,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n },\n {\n \"type\": \"SMALLINT\",\n \"nullable\": true,\n \"name\": \"csmallint\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"cint\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"cbigint\"\n },\n {\n \"type\": \"FLOAT\",\n \"nullable\": true,\n \"name\": \"cfloat\"\n },\n {\n \"type\": \"DOUBLE\",\n \"nullable\": true,\n \"name\": \"cdouble\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring1\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring2\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp1\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp2\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean1\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean2\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__INSIDE__FILE\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"INPUT__FILE__NAME\"\n },\n {\n \"fields\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"writeid\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"bucketid\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"rowid\"\n }\n ],\n \"name\": \"ROW__ID\"\n }\n ],\n \"colStats\": [\n {\n \"name\": \"cint\",\n \"ndv\": 8,\n \"minValue\": -738306196,\n \"maxValue\": 626923679\n },\n {\n \"name\": \"ctinyint\",\n \"ndv\": 2,\n \"minValue\": -64,\n \"maxValue\": -51\n },\n {\n \"name\": \"csmallint\",\n \"ndv\": 6,\n \"minValue\": -15920,\n \"maxValue\": -6907\n },\n {\n \"name\": \"cbigint\",\n \"ndv\": 15,\n \"minValue\": -1970551565,\n \"maxValue\": 1086455747\n },\n {\n \"name\": \"cfloat\",\n \"ndv\": 2,\n \"minValue\": -64.0,\n \"maxValue\": -51.0\n },\n {\n \"name\": \"cdouble\",\n \"ndv\": 6,\n \"minValue\": -15920.0,\n \"maxValue\": -6907.0\n },\n {\n \"name\": \"cstring1\",\n \"ndv\": 8\n },\n {\n \"name\": \"cstring2\",\n \"ndv\": 15\n },\n {\n \"name\": \"ctimestamp1\",\n \"ndv\": 0\n },\n {\n \"name\": \"ctimestamp2\",\n \"ndv\": 0\n },\n {\n \"name\": \"cboolean1\",\n \"ndv\": 2\n },\n {\n \"name\": \"cboolean2\",\n \"ndv\": 2\n }\n ]\n },\n {\n \"id\": \"3\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter\",\n \"condition\": {\n \"op\": \"IS NOT NULL\",\n \"operands\": [\n {\n \"input\": 2,\n \"name\": \"$2\"\n }\n ]\n },\n \"rowCount\": 10.0\n },\n {\n \"id\": \"4\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"cint\"\n ],\n \"exprs\": [\n {\n \"input\": 2,\n \"name\": \"$2\"\n }\n ],\n \"rowCount\": 10.0\n },\n {\n \"id\": \"5\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin\",\n \"condition\": {\n \"op\": \"=\",\n \"operands\": [\n {\n \"input\": 2,\n \"name\": \"$2\"\n },\n {\n \"input\": 0,\n \"name\": \"$0\"\n }\n ]\n },\n \"joinType\": \"left\",\n \"algorithm\": \"none\",\n \"cost\": \"not available\",\n \"inputs\": [\n \"1\",\n \"4\"\n ],\n \"rowCount\": 25.0\n },\n {\n \"id\": \"6\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_a_n1\"\n ],\n \"table:alias\": \"hd\",\n \"inputs\": [],\n \"rowCount\": 20.0,\n \"avgRowSize\": 6.0,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n },\n {\n \"type\": \"SMALLINT\",\n \"nullable\": true,\n \"name\": \"csmallint\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"cint\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"cbigint\"\n },\n {\n \"type\": \"FLOAT\",\n \"nullable\": true,\n \"name\": \"cfloat\"\n },\n {\n \"type\": \"DOUBLE\",\n \"nullable\": true,\n \"name\": \"cdouble\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring1\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring2\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp1\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp2\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean1\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean2\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__INSIDE__FILE\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"INPUT__FILE__NAME\"\n },\n {\n \"fields\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"writeid\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"bucketid\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"rowid\"\n }\n ],\n \"name\": \"ROW__ID\"\n }\n ],\n \"colStats\": [\n {\n \"name\": \"cstring1\",\n \"ndv\": 8\n },\n {\n \"name\": \"ctinyint\",\n \"ndv\": 2,\n \"minValue\": -64,\n \"maxValue\": -51\n },\n {\n \"name\": \"csmallint\",\n \"ndv\": 6,\n \"minValue\": -15920,\n \"maxValue\": -6907\n },\n {\n \"name\": \"cint\",\n \"ndv\": 8,\n \"minValue\": -738306196,\n \"maxValue\": 626923679\n },\n {\n \"name\": \"cbigint\",\n \"ndv\": 15,\n \"minValue\": -1970551565,\n \"maxValue\": 1086455747\n },\n {\n \"name\": \"cfloat\",\n \"ndv\": 2,\n \"minValue\": -64.0,\n \"maxValue\": -51.0\n },\n {\n \"name\": \"cdouble\",\n \"ndv\": 6,\n \"minValue\": -15920.0,\n \"maxValue\": -6907.0\n },\n {\n \"name\": \"cstring2\",\n \"ndv\": 15\n },\n {\n \"name\": \"ctimestamp1\",\n \"ndv\": 0\n },\n {\n \"name\": \"ctimestamp2\",\n \"ndv\": 0\n },\n {\n \"name\": \"cboolean1\",\n \"ndv\": 2\n },\n {\n \"name\": \"cboolean2\",\n \"ndv\": 2\n }\n ]\n },\n {\n \"id\": \"7\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter\",\n \"condition\": {\n \"op\": \"IS NOT NULL\",\n \"operands\": [\n {\n \"input\": 6,\n \"name\": \"$6\"\n }\n ]\n },\n \"rowCount\": 10.0\n },\n {\n \"id\": \"8\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"cstring1\"\n ],\n \"exprs\": [\n {\n \"input\": 6,\n \"name\": \"$6\"\n }\n ],\n \"rowCount\": 10.0\n },\n {\n \"id\": \"9\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin\",\n \"condition\": {\n \"op\": \"=\",\n \"operands\": [\n {\n \"input\": 3,\n \"name\": \"$3\"\n },\n {\n \"input\": 1,\n \"name\": \"$1\"\n }\n ]\n },\n \"joinType\": \"left\",\n \"algorithm\": \"none\",\n \"cost\": \"not available\",\n \"inputs\": [\n \"5\",\n \"8\"\n ],\n \"rowCount\": 31.25\n },\n {\n \"id\": \"10\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate\",\n \"group\": [],\n \"aggs\": [\n {\n \"agg\": \"count\",\n \"type\": {\n \"type\": \"BIGINT\",\n \"nullable\": true\n },\n \"distinct\": false,\n \"operands\": []\n }\n ],\n \"rowCount\": 1.0\n }\n ]\n}","optimizedSQL":"SELECT COUNT(*) AS `$f0`\nFROM (SELECT `cint`, `cstring1`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t`\nLEFT JOIN (SELECT `cint`\nFROM `default`.`small_alltypesorc_a_n1`\nWHERE `cint` IS NOT NULL) AS `t1` ON `t`.`cint` = `t1`.`cint`\nLEFT JOIN (SELECT `cstring1`\nFROM `default`.`small_alltypesorc_a_n1`\nWHERE `cstring1` IS NOT NULL) AS `t3` ON `t`.`cstring1` = `t3`.`cstring1`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cint"],"database:":"default","filterExpr:":"cint is not null (type: boolean)","Statistics:":"Num rows: 20 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_2","children":{"Filter Operator":{"predicate:":"cint is not null (type: boolean)","Statistics:":"Num rows: 10 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"FIL_20","children":{"Select Operator":{"expressions:":"cint (type: int)","columnExprMap:":{"_col0":"cint"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 10 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_4","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_30"}}}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cstring1"],"database:":"default","filterExpr:":"cstring1 is not null (type: boolean)","Statistics:":"Num rows: 20 Data size: 990 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_5","children":{"Filter Operator":{"predicate:":"cstring1 is not null (type: boolean)","Statistics:":"Num rows: 10 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"FIL_21","children":{"Select Operator":{"expressions:":"cstring1 (type: string)","columnExprMap:":{"_col0":"cstring1"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 10 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_7","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_28"}}}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cint","cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 1034 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_a_n1","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cint (type: int), cstring1 (type: string)","columnExprMap:":{"_col0":"cint","_col1":"cstring1"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[2, 6]"},"Statistics:":"Num rows: 20 Data size: 1034 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_32","children":{"Map Join Operator":{"columnExprMap:":{"_col1":"0:_col1"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 2:int"],"bigTableValueExpressions:":["col 6:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col1"],"Statistics:":"Num rows: 45 Data size: 3240 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_33","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 101 Data size: 808 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_34","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"minReductionHashAggr:":"0.99","mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"GBY_35","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_36"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[2, 6]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"GBY_17","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_19"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_37"}}}}}} +{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_a_n1\"\n ],\n \"table:alias\": \"c\",\n \"inputs\": [],\n \"rowCount\": 20.0,\n \"avgRowSize\": 10.0,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n },\n {\n \"type\": \"SMALLINT\",\n \"nullable\": true,\n \"name\": \"csmallint\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"cint\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"cbigint\"\n },\n {\n \"type\": \"FLOAT\",\n \"nullable\": true,\n \"name\": \"cfloat\"\n },\n {\n \"type\": \"DOUBLE\",\n \"nullable\": true,\n \"name\": \"cdouble\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring1\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring2\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp1\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp2\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean1\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean2\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__INSIDE__FILE\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"INPUT__FILE__NAME\"\n },\n {\n \"fields\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"writeid\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"bucketid\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"rowid\"\n }\n ],\n \"name\": \"ROW__ID\"\n }\n ],\n \"colStats\": [\n {\n \"name\": \"cint\",\n \"ndv\": 8,\n \"minValue\": -738306196,\n \"maxValue\": 626923679\n },\n {\n \"name\": \"cstring1\",\n \"ndv\": 8\n },\n {\n \"name\": \"ctinyint\",\n \"ndv\": 2,\n \"minValue\": -64,\n \"maxValue\": -51\n },\n {\n \"name\": \"csmallint\",\n \"ndv\": 6,\n \"minValue\": -15920,\n \"maxValue\": -6907\n },\n {\n \"name\": \"cbigint\",\n \"ndv\": 15,\n \"minValue\": -1970551565,\n \"maxValue\": 1086455747\n },\n {\n \"name\": \"cfloat\",\n \"ndv\": 2,\n \"minValue\": -64.0,\n \"maxValue\": -51.0\n },\n {\n \"name\": \"cdouble\",\n \"ndv\": 6,\n \"minValue\": -15920.0,\n \"maxValue\": -6907.0\n },\n {\n \"name\": \"cstring2\",\n \"ndv\": 15\n },\n {\n \"name\": \"ctimestamp1\",\n \"ndv\": 0,\n \"minValue\": -28810,\n \"maxValue\": -28789\n },\n {\n \"name\": \"ctimestamp2\",\n \"ndv\": 0,\n \"minValue\": -28812,\n \"maxValue\": -28786\n },\n {\n \"name\": \"cboolean1\",\n \"ndv\": 2\n },\n {\n \"name\": \"cboolean2\",\n \"ndv\": 2\n }\n ]\n },\n {\n \"id\": \"1\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"cint\",\n \"cstring1\"\n ],\n \"exprs\": [\n {\n \"input\": 2,\n \"name\": \"$2\"\n },\n {\n \"input\": 6,\n \"name\": \"$6\"\n }\n ],\n \"rowCount\": 20.0\n },\n {\n \"id\": \"2\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_a_n1\"\n ],\n \"table:alias\": \"cd\",\n \"inputs\": [],\n \"rowCount\": 20.0,\n \"avgRowSize\": 4.0,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n },\n {\n \"type\": \"SMALLINT\",\n \"nullable\": true,\n \"name\": \"csmallint\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"cint\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"cbigint\"\n },\n {\n \"type\": \"FLOAT\",\n \"nullable\": true,\n \"name\": \"cfloat\"\n },\n {\n \"type\": \"DOUBLE\",\n \"nullable\": true,\n \"name\": \"cdouble\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring1\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring2\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp1\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp2\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean1\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean2\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__INSIDE__FILE\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"INPUT__FILE__NAME\"\n },\n {\n \"fields\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"writeid\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"bucketid\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"rowid\"\n }\n ],\n \"name\": \"ROW__ID\"\n }\n ],\n \"colStats\": [\n {\n \"name\": \"cint\",\n \"ndv\": 8,\n \"minValue\": -738306196,\n \"maxValue\": 626923679\n },\n {\n \"name\": \"ctinyint\",\n \"ndv\": 2,\n \"minValue\": -64,\n \"maxValue\": -51\n },\n {\n \"name\": \"csmallint\",\n \"ndv\": 6,\n \"minValue\": -15920,\n \"maxValue\": -6907\n },\n {\n \"name\": \"cbigint\",\n \"ndv\": 15,\n \"minValue\": -1970551565,\n \"maxValue\": 1086455747\n },\n {\n \"name\": \"cfloat\",\n \"ndv\": 2,\n \"minValue\": -64.0,\n \"maxValue\": -51.0\n },\n {\n \"name\": \"cdouble\",\n \"ndv\": 6,\n \"minValue\": -15920.0,\n \"maxValue\": -6907.0\n },\n {\n \"name\": \"cstring1\",\n \"ndv\": 8\n },\n {\n \"name\": \"cstring2\",\n \"ndv\": 15\n },\n {\n \"name\": \"ctimestamp1\",\n \"ndv\": 0,\n \"minValue\": -28810,\n \"maxValue\": -28789\n },\n {\n \"name\": \"ctimestamp2\",\n \"ndv\": 0,\n \"minValue\": -28812,\n \"maxValue\": -28786\n },\n {\n \"name\": \"cboolean1\",\n \"ndv\": 2\n },\n {\n \"name\": \"cboolean2\",\n \"ndv\": 2\n }\n ]\n },\n {\n \"id\": \"3\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter\",\n \"condition\": {\n \"op\": \"IS NOT NULL\",\n \"operands\": [\n {\n \"input\": 2,\n \"name\": \"$2\"\n }\n ]\n },\n \"rowCount\": 10.0\n },\n {\n \"id\": \"4\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"cint\"\n ],\n \"exprs\": [\n {\n \"input\": 2,\n \"name\": \"$2\"\n }\n ],\n \"rowCount\": 10.0\n },\n {\n \"id\": \"5\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin\",\n \"condition\": {\n \"op\": \"=\",\n \"operands\": [\n {\n \"input\": 2,\n \"name\": \"$2\"\n },\n {\n \"input\": 0,\n \"name\": \"$0\"\n }\n ]\n },\n \"joinType\": \"left\",\n \"algorithm\": \"none\",\n \"cost\": \"not available\",\n \"inputs\": [\n \"1\",\n \"4\"\n ],\n \"rowCount\": 25.0\n },\n {\n \"id\": \"6\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_a_n1\"\n ],\n \"table:alias\": \"hd\",\n \"inputs\": [],\n \"rowCount\": 20.0,\n \"avgRowSize\": 6.0,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n },\n {\n \"type\": \"SMALLINT\",\n \"nullable\": true,\n \"name\": \"csmallint\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"cint\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"cbigint\"\n },\n {\n \"type\": \"FLOAT\",\n \"nullable\": true,\n \"name\": \"cfloat\"\n },\n {\n \"type\": \"DOUBLE\",\n \"nullable\": true,\n \"name\": \"cdouble\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring1\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring2\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp1\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp2\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean1\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean2\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__INSIDE__FILE\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"INPUT__FILE__NAME\"\n },\n {\n \"fields\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"writeid\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"bucketid\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"rowid\"\n }\n ],\n \"name\": \"ROW__ID\"\n }\n ],\n \"colStats\": [\n {\n \"name\": \"cstring1\",\n \"ndv\": 8\n },\n {\n \"name\": \"ctinyint\",\n \"ndv\": 2,\n \"minValue\": -64,\n \"maxValue\": -51\n },\n {\n \"name\": \"csmallint\",\n \"ndv\": 6,\n \"minValue\": -15920,\n \"maxValue\": -6907\n },\n {\n \"name\": \"cint\",\n \"ndv\": 8,\n \"minValue\": -738306196,\n \"maxValue\": 626923679\n },\n {\n \"name\": \"cbigint\",\n \"ndv\": 15,\n \"minValue\": -1970551565,\n \"maxValue\": 1086455747\n },\n {\n \"name\": \"cfloat\",\n \"ndv\": 2,\n \"minValue\": -64.0,\n \"maxValue\": -51.0\n },\n {\n \"name\": \"cdouble\",\n \"ndv\": 6,\n \"minValue\": -15920.0,\n \"maxValue\": -6907.0\n },\n {\n \"name\": \"cstring2\",\n \"ndv\": 15\n },\n {\n \"name\": \"ctimestamp1\",\n \"ndv\": 0,\n \"minValue\": -28810,\n \"maxValue\": -28789\n },\n {\n \"name\": \"ctimestamp2\",\n \"ndv\": 0,\n \"minValue\": -28812,\n \"maxValue\": -28786\n },\n {\n \"name\": \"cboolean1\",\n \"ndv\": 2\n },\n {\n \"name\": \"cboolean2\",\n \"ndv\": 2\n }\n ]\n },\n {\n \"id\": \"7\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter\",\n \"condition\": {\n \"op\": \"IS NOT NULL\",\n \"operands\": [\n {\n \"input\": 6,\n \"name\": \"$6\"\n }\n ]\n },\n \"rowCount\": 10.0\n },\n {\n \"id\": \"8\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"cstring1\"\n ],\n \"exprs\": [\n {\n \"input\": 6,\n \"name\": \"$6\"\n }\n ],\n \"rowCount\": 10.0\n },\n {\n \"id\": \"9\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin\",\n \"condition\": {\n \"op\": \"=\",\n \"operands\": [\n {\n \"input\": 3,\n \"name\": \"$3\"\n },\n {\n \"input\": 1,\n \"name\": \"$1\"\n }\n ]\n },\n \"joinType\": \"left\",\n \"algorithm\": \"none\",\n \"cost\": \"not available\",\n \"inputs\": [\n \"5\",\n \"8\"\n ],\n \"rowCount\": 31.25\n },\n {\n \"id\": \"10\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate\",\n \"group\": [],\n \"aggs\": [\n {\n \"agg\": \"count\",\n \"type\": {\n \"type\": \"BIGINT\",\n \"nullable\": true\n },\n \"distinct\": false,\n \"operands\": []\n }\n ],\n \"rowCount\": 1.0\n }\n ]\n}","optimizedSQL":"SELECT COUNT(*) AS `$f0`\nFROM (SELECT `cint`, `cstring1`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t`\nLEFT JOIN (SELECT `cint`\nFROM `default`.`small_alltypesorc_a_n1`\nWHERE `cint` IS NOT NULL) AS `t1` ON `t`.`cint` = `t1`.`cint`\nLEFT JOIN (SELECT `cstring1`\nFROM `default`.`small_alltypesorc_a_n1`\nWHERE `cstring1` IS NOT NULL) AS `t3` ON `t`.`cstring1` = `t3`.`cstring1`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cint"],"database:":"default","filterExpr:":"cint is not null (type: boolean)","Statistics:":"Num rows: 20 Data size: 44 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_2","children":{"Filter Operator":{"predicate:":"cint is not null (type: boolean)","Statistics:":"Num rows: 10 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"FIL_20","children":{"Select Operator":{"expressions:":"cint (type: int)","columnExprMap:":{"_col0":"cint"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 10 Data size: 24 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_4","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_30"}}}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cstring1"],"database:":"default","filterExpr:":"cstring1 is not null (type: boolean)","Statistics:":"Num rows: 20 Data size: 990 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_5","children":{"Filter Operator":{"predicate:":"cstring1 is not null (type: boolean)","Statistics:":"Num rows: 10 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"FIL_21","children":{"Select Operator":{"expressions:":"cstring1 (type: string)","columnExprMap:":{"_col0":"cstring1"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 10 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_7","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_28"}}}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cint","cstring1"],"database:":"default","Statistics:":"Num rows: 20 Data size: 1034 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_a_n1","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cint (type: int), cstring1 (type: string)","columnExprMap:":{"_col0":"cint","_col1":"cstring1"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[2, 6]"},"Statistics:":"Num rows: 20 Data size: 1034 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_32","children":{"Map Join Operator":{"columnExprMap:":{"_col1":"0:_col1"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 2:int"],"bigTableValueExpressions:":["col 6:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col1"],"Statistics:":"Num rows: 45 Data size: 3240 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_33","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 101 Data size: 808 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_34","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"minReductionHashAggr:":"0.99","mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"GBY_35","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_36"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[2, 6]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"GBY_17","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_19"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_37"}}}}}} PREHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a_n1 c left outer join small_alltypesorc_a_n1 cd @@ -292,7 +292,7 @@ left outer join small_alltypesorc_a_n1 hd POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a_n1 #### A masked pattern was here #### -{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_a_n1\"\n ],\n \"table:alias\": \"c\",\n \"inputs\": [],\n \"rowCount\": 20.0,\n \"avgRowSize\": 14.75,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n },\n {\n \"type\": \"SMALLINT\",\n \"nullable\": true,\n \"name\": \"csmallint\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"cint\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"cbigint\"\n },\n {\n \"type\": \"FLOAT\",\n \"nullable\": true,\n \"name\": \"cfloat\"\n },\n {\n \"type\": \"DOUBLE\",\n \"nullable\": true,\n \"name\": \"cdouble\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring1\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring2\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp1\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp2\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean1\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean2\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__INSIDE__FILE\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"INPUT__FILE__NAME\"\n },\n {\n \"fields\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"writeid\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"bucketid\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"rowid\"\n }\n ],\n \"name\": \"ROW__ID\"\n }\n ],\n \"colStats\": [\n {\n \"name\": \"cstring1\",\n \"ndv\": 8\n },\n {\n \"name\": \"cstring2\",\n \"ndv\": 15\n },\n {\n \"name\": \"ctinyint\",\n \"ndv\": 2,\n \"minValue\": -64,\n \"maxValue\": -51\n },\n {\n \"name\": \"csmallint\",\n \"ndv\": 6,\n \"minValue\": -15920,\n \"maxValue\": -6907\n },\n {\n \"name\": \"cint\",\n \"ndv\": 8,\n \"minValue\": -738306196,\n \"maxValue\": 626923679\n },\n {\n \"name\": \"cbigint\",\n \"ndv\": 15,\n \"minValue\": -1970551565,\n \"maxValue\": 1086455747\n },\n {\n \"name\": \"cfloat\",\n \"ndv\": 2,\n \"minValue\": -64.0,\n \"maxValue\": -51.0\n },\n {\n \"name\": \"cdouble\",\n \"ndv\": 6,\n \"minValue\": -15920.0,\n \"maxValue\": -6907.0\n },\n {\n \"name\": \"ctimestamp1\",\n \"ndv\": 0\n },\n {\n \"name\": \"ctimestamp2\",\n \"ndv\": 0\n },\n {\n \"name\": \"cboolean1\",\n \"ndv\": 2\n },\n {\n \"name\": \"cboolean2\",\n \"ndv\": 2\n }\n ]\n },\n {\n \"id\": \"1\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"cstring1\",\n \"cstring2\"\n ],\n \"exprs\": [\n {\n \"input\": 6,\n \"name\": \"$6\"\n },\n {\n \"input\": 7,\n \"name\": \"$7\"\n }\n ],\n \"rowCount\": 20.0\n },\n {\n \"id\": \"2\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_a_n1\"\n ],\n \"table:alias\": \"cd\",\n \"inputs\": [],\n \"rowCount\": 20.0,\n \"avgRowSize\": 8.75,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n },\n {\n \"type\": \"SMALLINT\",\n \"nullable\": true,\n \"name\": \"csmallint\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"cint\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"cbigint\"\n },\n {\n \"type\": \"FLOAT\",\n \"nullable\": true,\n \"name\": \"cfloat\"\n },\n {\n \"type\": \"DOUBLE\",\n \"nullable\": true,\n \"name\": \"cdouble\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring1\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring2\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp1\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp2\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean1\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean2\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__INSIDE__FILE\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"INPUT__FILE__NAME\"\n },\n {\n \"fields\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"writeid\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"bucketid\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"rowid\"\n }\n ],\n \"name\": \"ROW__ID\"\n }\n ],\n \"colStats\": [\n {\n \"name\": \"cstring2\",\n \"ndv\": 15\n },\n {\n \"name\": \"ctinyint\",\n \"ndv\": 2,\n \"minValue\": -64,\n \"maxValue\": -51\n },\n {\n \"name\": \"csmallint\",\n \"ndv\": 6,\n \"minValue\": -15920,\n \"maxValue\": -6907\n },\n {\n \"name\": \"cint\",\n \"ndv\": 8,\n \"minValue\": -738306196,\n \"maxValue\": 626923679\n },\n {\n \"name\": \"cbigint\",\n \"ndv\": 15,\n \"minValue\": -1970551565,\n \"maxValue\": 1086455747\n },\n {\n \"name\": \"cfloat\",\n \"ndv\": 2,\n \"minValue\": -64.0,\n \"maxValue\": -51.0\n },\n {\n \"name\": \"cdouble\",\n \"ndv\": 6,\n \"minValue\": -15920.0,\n \"maxValue\": -6907.0\n },\n {\n \"name\": \"cstring1\",\n \"ndv\": 8\n },\n {\n \"name\": \"ctimestamp1\",\n \"ndv\": 0\n },\n {\n \"name\": \"ctimestamp2\",\n \"ndv\": 0\n },\n {\n \"name\": \"cboolean1\",\n \"ndv\": 2\n },\n {\n \"name\": \"cboolean2\",\n \"ndv\": 2\n }\n ]\n },\n {\n \"id\": \"3\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter\",\n \"condition\": {\n \"op\": \"IS NOT NULL\",\n \"operands\": [\n {\n \"input\": 7,\n \"name\": \"$7\"\n }\n ]\n },\n \"rowCount\": 15.0\n },\n {\n \"id\": \"4\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"cstring2\"\n ],\n \"exprs\": [\n {\n \"input\": 7,\n \"name\": \"$7\"\n }\n ],\n \"rowCount\": 15.0\n },\n {\n \"id\": \"5\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin\",\n \"condition\": {\n \"op\": \"=\",\n \"operands\": [\n {\n \"input\": 2,\n \"name\": \"$2\"\n },\n {\n \"input\": 1,\n \"name\": \"$1\"\n }\n ]\n },\n \"joinType\": \"left\",\n \"algorithm\": \"none\",\n \"cost\": \"not available\",\n \"inputs\": [\n \"1\",\n \"4\"\n ],\n \"rowCount\": 20.0\n },\n {\n \"id\": \"6\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_a_n1\"\n ],\n \"table:alias\": \"hd\",\n \"inputs\": [],\n \"rowCount\": 20.0,\n \"avgRowSize\": 6.0,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n },\n {\n \"type\": \"SMALLINT\",\n \"nullable\": true,\n \"name\": \"csmallint\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"cint\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"cbigint\"\n },\n {\n \"type\": \"FLOAT\",\n \"nullable\": true,\n \"name\": \"cfloat\"\n },\n {\n \"type\": \"DOUBLE\",\n \"nullable\": true,\n \"name\": \"cdouble\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring1\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring2\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp1\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp2\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean1\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean2\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__INSIDE__FILE\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"INPUT__FILE__NAME\"\n },\n {\n \"fields\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"writeid\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"bucketid\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"rowid\"\n }\n ],\n \"name\": \"ROW__ID\"\n }\n ],\n \"colStats\": [\n {\n \"name\": \"cstring1\",\n \"ndv\": 8\n },\n {\n \"name\": \"ctinyint\",\n \"ndv\": 2,\n \"minValue\": -64,\n \"maxValue\": -51\n },\n {\n \"name\": \"csmallint\",\n \"ndv\": 6,\n \"minValue\": -15920,\n \"maxValue\": -6907\n },\n {\n \"name\": \"cint\",\n \"ndv\": 8,\n \"minValue\": -738306196,\n \"maxValue\": 626923679\n },\n {\n \"name\": \"cbigint\",\n \"ndv\": 15,\n \"minValue\": -1970551565,\n \"maxValue\": 1086455747\n },\n {\n \"name\": \"cfloat\",\n \"ndv\": 2,\n \"minValue\": -64.0,\n \"maxValue\": -51.0\n },\n {\n \"name\": \"cdouble\",\n \"ndv\": 6,\n \"minValue\": -15920.0,\n \"maxValue\": -6907.0\n },\n {\n \"name\": \"cstring2\",\n \"ndv\": 15\n },\n {\n \"name\": \"ctimestamp1\",\n \"ndv\": 0\n },\n {\n \"name\": \"ctimestamp2\",\n \"ndv\": 0\n },\n {\n \"name\": \"cboolean1\",\n \"ndv\": 2\n },\n {\n \"name\": \"cboolean2\",\n \"ndv\": 2\n }\n ]\n },\n {\n \"id\": \"7\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter\",\n \"condition\": {\n \"op\": \"IS NOT NULL\",\n \"operands\": [\n {\n \"input\": 6,\n \"name\": \"$6\"\n }\n ]\n },\n \"rowCount\": 10.0\n },\n {\n \"id\": \"8\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"cstring1\"\n ],\n \"exprs\": [\n {\n \"input\": 6,\n \"name\": \"$6\"\n }\n ],\n \"rowCount\": 10.0\n },\n {\n \"id\": \"9\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin\",\n \"condition\": {\n \"op\": \"=\",\n \"operands\": [\n {\n \"input\": 3,\n \"name\": \"$3\"\n },\n {\n \"input\": 0,\n \"name\": \"$0\"\n }\n ]\n },\n \"joinType\": \"left\",\n \"algorithm\": \"none\",\n \"cost\": \"not available\",\n \"inputs\": [\n \"5\",\n \"8\"\n ],\n \"rowCount\": 25.0\n },\n {\n \"id\": \"10\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate\",\n \"group\": [],\n \"aggs\": [\n {\n \"agg\": \"count\",\n \"type\": {\n \"type\": \"BIGINT\",\n \"nullable\": true\n },\n \"distinct\": false,\n \"operands\": []\n }\n ],\n \"rowCount\": 1.0\n }\n ]\n}","optimizedSQL":"SELECT COUNT(*) AS `$f0`\nFROM (SELECT `cstring1`, `cstring2`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t`\nLEFT JOIN (SELECT `cstring2`\nFROM `default`.`small_alltypesorc_a_n1`\nWHERE `cstring2` IS NOT NULL) AS `t1` ON `t`.`cstring2` = `t1`.`cstring2`\nLEFT JOIN (SELECT `cstring1`\nFROM `default`.`small_alltypesorc_a_n1`\nWHERE `cstring1` IS NOT NULL) AS `t3` ON `t`.`cstring1` = `t3`.`cstring1`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cstring2"],"database:":"default","filterExpr:":"cstring2 is not null (type: boolean)","Statistics:":"Num rows: 20 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_2","children":{"Filter Operator":{"predicate:":"cstring2 is not null (type: boolean)","Statistics:":"Num rows: 15 Data size: 1116 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"FIL_20","children":{"Select Operator":{"expressions:":"cstring2 (type: string)","columnExprMap:":{"_col0":"cstring2"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 15 Data size: 1116 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_4","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_30"}}}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cstring1"],"database:":"default","filterExpr:":"cstring1 is not null (type: boolean)","Statistics:":"Num rows: 20 Data size: 990 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_5","children":{"Filter Operator":{"predicate:":"cstring1 is not null (type: boolean)","Statistics:":"Num rows: 10 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"FIL_21","children":{"Select Operator":{"expressions:":"cstring1 (type: string)","columnExprMap:":{"_col0":"cstring1"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 10 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_7","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_28"}}}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cstring1","cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 2478 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_a_n1","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cstring1 (type: string), cstring2 (type: string)","columnExprMap:":{"_col0":"cstring1","_col1":"cstring2"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[6, 7]"},"Statistics:":"Num rows: 20 Data size: 2478 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_32","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 7:string"],"bigTableValueExpressions:":["col 6:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 23 Data size: 1260 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_33","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 51 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_34","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"minReductionHashAggr:":"0.99","mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"GBY_35","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_36"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[6, 7]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"GBY_17","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_19"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_37"}}}}}} +{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_a_n1\"\n ],\n \"table:alias\": \"c\",\n \"inputs\": [],\n \"rowCount\": 20.0,\n \"avgRowSize\": 14.75,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n },\n {\n \"type\": \"SMALLINT\",\n \"nullable\": true,\n \"name\": \"csmallint\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"cint\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"cbigint\"\n },\n {\n \"type\": \"FLOAT\",\n \"nullable\": true,\n \"name\": \"cfloat\"\n },\n {\n \"type\": \"DOUBLE\",\n \"nullable\": true,\n \"name\": \"cdouble\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring1\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring2\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp1\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp2\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean1\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean2\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__INSIDE__FILE\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"INPUT__FILE__NAME\"\n },\n {\n \"fields\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"writeid\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"bucketid\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"rowid\"\n }\n ],\n \"name\": \"ROW__ID\"\n }\n ],\n \"colStats\": [\n {\n \"name\": \"cstring1\",\n \"ndv\": 8\n },\n {\n \"name\": \"cstring2\",\n \"ndv\": 15\n },\n {\n \"name\": \"ctinyint\",\n \"ndv\": 2,\n \"minValue\": -64,\n \"maxValue\": -51\n },\n {\n \"name\": \"csmallint\",\n \"ndv\": 6,\n \"minValue\": -15920,\n \"maxValue\": -6907\n },\n {\n \"name\": \"cint\",\n \"ndv\": 8,\n \"minValue\": -738306196,\n \"maxValue\": 626923679\n },\n {\n \"name\": \"cbigint\",\n \"ndv\": 15,\n \"minValue\": -1970551565,\n \"maxValue\": 1086455747\n },\n {\n \"name\": \"cfloat\",\n \"ndv\": 2,\n \"minValue\": -64.0,\n \"maxValue\": -51.0\n },\n {\n \"name\": \"cdouble\",\n \"ndv\": 6,\n \"minValue\": -15920.0,\n \"maxValue\": -6907.0\n },\n {\n \"name\": \"ctimestamp1\",\n \"ndv\": 0,\n \"minValue\": -28810,\n \"maxValue\": -28789\n },\n {\n \"name\": \"ctimestamp2\",\n \"ndv\": 0,\n \"minValue\": -28812,\n \"maxValue\": -28786\n },\n {\n \"name\": \"cboolean1\",\n \"ndv\": 2\n },\n {\n \"name\": \"cboolean2\",\n \"ndv\": 2\n }\n ]\n },\n {\n \"id\": \"1\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"cstring1\",\n \"cstring2\"\n ],\n \"exprs\": [\n {\n \"input\": 6,\n \"name\": \"$6\"\n },\n {\n \"input\": 7,\n \"name\": \"$7\"\n }\n ],\n \"rowCount\": 20.0\n },\n {\n \"id\": \"2\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_a_n1\"\n ],\n \"table:alias\": \"cd\",\n \"inputs\": [],\n \"rowCount\": 20.0,\n \"avgRowSize\": 8.75,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n },\n {\n \"type\": \"SMALLINT\",\n \"nullable\": true,\n \"name\": \"csmallint\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"cint\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"cbigint\"\n },\n {\n \"type\": \"FLOAT\",\n \"nullable\": true,\n \"name\": \"cfloat\"\n },\n {\n \"type\": \"DOUBLE\",\n \"nullable\": true,\n \"name\": \"cdouble\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring1\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring2\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp1\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp2\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean1\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean2\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__INSIDE__FILE\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"INPUT__FILE__NAME\"\n },\n {\n \"fields\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"writeid\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"bucketid\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"rowid\"\n }\n ],\n \"name\": \"ROW__ID\"\n }\n ],\n \"colStats\": [\n {\n \"name\": \"cstring2\",\n \"ndv\": 15\n },\n {\n \"name\": \"ctinyint\",\n \"ndv\": 2,\n \"minValue\": -64,\n \"maxValue\": -51\n },\n {\n \"name\": \"csmallint\",\n \"ndv\": 6,\n \"minValue\": -15920,\n \"maxValue\": -6907\n },\n {\n \"name\": \"cint\",\n \"ndv\": 8,\n \"minValue\": -738306196,\n \"maxValue\": 626923679\n },\n {\n \"name\": \"cbigint\",\n \"ndv\": 15,\n \"minValue\": -1970551565,\n \"maxValue\": 1086455747\n },\n {\n \"name\": \"cfloat\",\n \"ndv\": 2,\n \"minValue\": -64.0,\n \"maxValue\": -51.0\n },\n {\n \"name\": \"cdouble\",\n \"ndv\": 6,\n \"minValue\": -15920.0,\n \"maxValue\": -6907.0\n },\n {\n \"name\": \"cstring1\",\n \"ndv\": 8\n },\n {\n \"name\": \"ctimestamp1\",\n \"ndv\": 0,\n \"minValue\": -28810,\n \"maxValue\": -28789\n },\n {\n \"name\": \"ctimestamp2\",\n \"ndv\": 0,\n \"minValue\": -28812,\n \"maxValue\": -28786\n },\n {\n \"name\": \"cboolean1\",\n \"ndv\": 2\n },\n {\n \"name\": \"cboolean2\",\n \"ndv\": 2\n }\n ]\n },\n {\n \"id\": \"3\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter\",\n \"condition\": {\n \"op\": \"IS NOT NULL\",\n \"operands\": [\n {\n \"input\": 7,\n \"name\": \"$7\"\n }\n ]\n },\n \"rowCount\": 15.0\n },\n {\n \"id\": \"4\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"cstring2\"\n ],\n \"exprs\": [\n {\n \"input\": 7,\n \"name\": \"$7\"\n }\n ],\n \"rowCount\": 15.0\n },\n {\n \"id\": \"5\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin\",\n \"condition\": {\n \"op\": \"=\",\n \"operands\": [\n {\n \"input\": 2,\n \"name\": \"$2\"\n },\n {\n \"input\": 1,\n \"name\": \"$1\"\n }\n ]\n },\n \"joinType\": \"left\",\n \"algorithm\": \"none\",\n \"cost\": \"not available\",\n \"inputs\": [\n \"1\",\n \"4\"\n ],\n \"rowCount\": 20.0\n },\n {\n \"id\": \"6\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_a_n1\"\n ],\n \"table:alias\": \"hd\",\n \"inputs\": [],\n \"rowCount\": 20.0,\n \"avgRowSize\": 6.0,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n },\n {\n \"type\": \"SMALLINT\",\n \"nullable\": true,\n \"name\": \"csmallint\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"cint\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"cbigint\"\n },\n {\n \"type\": \"FLOAT\",\n \"nullable\": true,\n \"name\": \"cfloat\"\n },\n {\n \"type\": \"DOUBLE\",\n \"nullable\": true,\n \"name\": \"cdouble\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring1\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring2\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp1\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp2\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean1\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean2\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__INSIDE__FILE\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"INPUT__FILE__NAME\"\n },\n {\n \"fields\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"writeid\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"bucketid\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"rowid\"\n }\n ],\n \"name\": \"ROW__ID\"\n }\n ],\n \"colStats\": [\n {\n \"name\": \"cstring1\",\n \"ndv\": 8\n },\n {\n \"name\": \"ctinyint\",\n \"ndv\": 2,\n \"minValue\": -64,\n \"maxValue\": -51\n },\n {\n \"name\": \"csmallint\",\n \"ndv\": 6,\n \"minValue\": -15920,\n \"maxValue\": -6907\n },\n {\n \"name\": \"cint\",\n \"ndv\": 8,\n \"minValue\": -738306196,\n \"maxValue\": 626923679\n },\n {\n \"name\": \"cbigint\",\n \"ndv\": 15,\n \"minValue\": -1970551565,\n \"maxValue\": 1086455747\n },\n {\n \"name\": \"cfloat\",\n \"ndv\": 2,\n \"minValue\": -64.0,\n \"maxValue\": -51.0\n },\n {\n \"name\": \"cdouble\",\n \"ndv\": 6,\n \"minValue\": -15920.0,\n \"maxValue\": -6907.0\n },\n {\n \"name\": \"cstring2\",\n \"ndv\": 15\n },\n {\n \"name\": \"ctimestamp1\",\n \"ndv\": 0,\n \"minValue\": -28810,\n \"maxValue\": -28789\n },\n {\n \"name\": \"ctimestamp2\",\n \"ndv\": 0,\n \"minValue\": -28812,\n \"maxValue\": -28786\n },\n {\n \"name\": \"cboolean1\",\n \"ndv\": 2\n },\n {\n \"name\": \"cboolean2\",\n \"ndv\": 2\n }\n ]\n },\n {\n \"id\": \"7\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter\",\n \"condition\": {\n \"op\": \"IS NOT NULL\",\n \"operands\": [\n {\n \"input\": 6,\n \"name\": \"$6\"\n }\n ]\n },\n \"rowCount\": 10.0\n },\n {\n \"id\": \"8\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"cstring1\"\n ],\n \"exprs\": [\n {\n \"input\": 6,\n \"name\": \"$6\"\n }\n ],\n \"rowCount\": 10.0\n },\n {\n \"id\": \"9\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin\",\n \"condition\": {\n \"op\": \"=\",\n \"operands\": [\n {\n \"input\": 3,\n \"name\": \"$3\"\n },\n {\n \"input\": 0,\n \"name\": \"$0\"\n }\n ]\n },\n \"joinType\": \"left\",\n \"algorithm\": \"none\",\n \"cost\": \"not available\",\n \"inputs\": [\n \"5\",\n \"8\"\n ],\n \"rowCount\": 25.0\n },\n {\n \"id\": \"10\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate\",\n \"group\": [],\n \"aggs\": [\n {\n \"agg\": \"count\",\n \"type\": {\n \"type\": \"BIGINT\",\n \"nullable\": true\n },\n \"distinct\": false,\n \"operands\": []\n }\n ],\n \"rowCount\": 1.0\n }\n ]\n}","optimizedSQL":"SELECT COUNT(*) AS `$f0`\nFROM (SELECT `cstring1`, `cstring2`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t`\nLEFT JOIN (SELECT `cstring2`\nFROM `default`.`small_alltypesorc_a_n1`\nWHERE `cstring2` IS NOT NULL) AS `t1` ON `t`.`cstring2` = `t1`.`cstring2`\nLEFT JOIN (SELECT `cstring1`\nFROM `default`.`small_alltypesorc_a_n1`\nWHERE `cstring1` IS NOT NULL) AS `t3` ON `t`.`cstring1` = `t3`.`cstring1`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cstring2"],"database:":"default","filterExpr:":"cstring2 is not null (type: boolean)","Statistics:":"Num rows: 20 Data size: 1488 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_2","children":{"Filter Operator":{"predicate:":"cstring2 is not null (type: boolean)","Statistics:":"Num rows: 15 Data size: 1116 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"FIL_20","children":{"Select Operator":{"expressions:":"cstring2 (type: string)","columnExprMap:":{"_col0":"cstring2"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 15 Data size: 1116 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_4","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_30"}}}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cstring1"],"database:":"default","filterExpr:":"cstring1 is not null (type: boolean)","Statistics:":"Num rows: 20 Data size: 990 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_5","children":{"Filter Operator":{"predicate:":"cstring1 is not null (type: boolean)","Statistics:":"Num rows: 10 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"FIL_21","children":{"Select Operator":{"expressions:":"cstring1 (type: string)","columnExprMap:":{"_col0":"cstring1"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 10 Data size: 540 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_7","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"},"OperatorId:":"HASHTABLESINK_28"}}}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cstring1","cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 2478 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_a_n1","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cstring1 (type: string), cstring2 (type: string)","columnExprMap:":{"_col0":"cstring1","_col1":"cstring2"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[6, 7]"},"Statistics:":"Num rows: 20 Data size: 2478 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_32","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 7:string"],"bigTableValueExpressions:":["col 6:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 23 Data size: 1260 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_33","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: string)","1":"_col0 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 51 Data size: 408 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_34","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"minReductionHashAggr:":"0.99","mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"GBY_35","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_36"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[6, 7]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"GBY_17","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_19"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_37"}}}}}} PREHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a_n1 c left outer join small_alltypesorc_a_n1 cd @@ -336,7 +336,7 @@ left outer join small_alltypesorc_a_n1 hd POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_a_n1 #### A masked pattern was here #### -{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_a_n1\"\n ],\n \"table:alias\": \"c\",\n \"inputs\": [],\n \"rowCount\": 20.0,\n \"avgRowSize\": 26.75,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n },\n {\n \"type\": \"SMALLINT\",\n \"nullable\": true,\n \"name\": \"csmallint\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"cint\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"cbigint\"\n },\n {\n \"type\": \"FLOAT\",\n \"nullable\": true,\n \"name\": \"cfloat\"\n },\n {\n \"type\": \"DOUBLE\",\n \"nullable\": true,\n \"name\": \"cdouble\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring1\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring2\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp1\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp2\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean1\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean2\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__INSIDE__FILE\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"INPUT__FILE__NAME\"\n },\n {\n \"fields\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"writeid\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"bucketid\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"rowid\"\n }\n ],\n \"name\": \"ROW__ID\"\n }\n ],\n \"colStats\": [\n {\n \"name\": \"cint\",\n \"ndv\": 8,\n \"minValue\": -738306196,\n \"maxValue\": 626923679\n },\n {\n \"name\": \"cbigint\",\n \"ndv\": 15,\n \"minValue\": -1970551565,\n \"maxValue\": 1086455747\n },\n {\n \"name\": \"cstring1\",\n \"ndv\": 8\n },\n {\n \"name\": \"cstring2\",\n \"ndv\": 15\n },\n {\n \"name\": \"ctinyint\",\n \"ndv\": 2,\n \"minValue\": -64,\n \"maxValue\": -51\n },\n {\n \"name\": \"csmallint\",\n \"ndv\": 6,\n \"minValue\": -15920,\n \"maxValue\": -6907\n },\n {\n \"name\": \"cfloat\",\n \"ndv\": 2,\n \"minValue\": -64.0,\n \"maxValue\": -51.0\n },\n {\n \"name\": \"cdouble\",\n \"ndv\": 6,\n \"minValue\": -15920.0,\n \"maxValue\": -6907.0\n },\n {\n \"name\": \"ctimestamp1\",\n \"ndv\": 0\n },\n {\n \"name\": \"ctimestamp2\",\n \"ndv\": 0\n },\n {\n \"name\": \"cboolean1\",\n \"ndv\": 2\n },\n {\n \"name\": \"cboolean2\",\n \"ndv\": 2\n }\n ]\n },\n {\n \"id\": \"1\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"cint\",\n \"cbigint\",\n \"cstring1\",\n \"cstring2\"\n ],\n \"exprs\": [\n {\n \"input\": 2,\n \"name\": \"$2\"\n },\n {\n \"input\": 3,\n \"name\": \"$3\"\n },\n {\n \"input\": 6,\n \"name\": \"$6\"\n },\n {\n \"input\": 7,\n \"name\": \"$7\"\n }\n ],\n \"rowCount\": 20.0\n },\n {\n \"id\": \"2\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_a_n1\"\n ],\n \"table:alias\": \"cd\",\n \"inputs\": [],\n \"rowCount\": 20.0,\n \"avgRowSize\": 16.75,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n },\n {\n \"type\": \"SMALLINT\",\n \"nullable\": true,\n \"name\": \"csmallint\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"cint\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"cbigint\"\n },\n {\n \"type\": \"FLOAT\",\n \"nullable\": true,\n \"name\": \"cfloat\"\n },\n {\n \"type\": \"DOUBLE\",\n \"nullable\": true,\n \"name\": \"cdouble\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring1\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring2\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp1\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp2\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean1\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean2\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__INSIDE__FILE\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"INPUT__FILE__NAME\"\n },\n {\n \"fields\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"writeid\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"bucketid\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"rowid\"\n }\n ],\n \"name\": \"ROW__ID\"\n }\n ],\n \"colStats\": [\n {\n \"name\": \"cbigint\",\n \"ndv\": 15,\n \"minValue\": -1970551565,\n \"maxValue\": 1086455747\n },\n {\n \"name\": \"cstring2\",\n \"ndv\": 15\n },\n {\n \"name\": \"ctinyint\",\n \"ndv\": 2,\n \"minValue\": -64,\n \"maxValue\": -51\n },\n {\n \"name\": \"csmallint\",\n \"ndv\": 6,\n \"minValue\": -15920,\n \"maxValue\": -6907\n },\n {\n \"name\": \"cint\",\n \"ndv\": 8,\n \"minValue\": -738306196,\n \"maxValue\": 626923679\n },\n {\n \"name\": \"cfloat\",\n \"ndv\": 2,\n \"minValue\": -64.0,\n \"maxValue\": -51.0\n },\n {\n \"name\": \"cdouble\",\n \"ndv\": 6,\n \"minValue\": -15920.0,\n \"maxValue\": -6907.0\n },\n {\n \"name\": \"cstring1\",\n \"ndv\": 8\n },\n {\n \"name\": \"ctimestamp1\",\n \"ndv\": 0\n },\n {\n \"name\": \"ctimestamp2\",\n \"ndv\": 0\n },\n {\n \"name\": \"cboolean1\",\n \"ndv\": 2\n },\n {\n \"name\": \"cboolean2\",\n \"ndv\": 2\n }\n ]\n },\n {\n \"id\": \"3\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter\",\n \"condition\": {\n \"op\": \"AND\",\n \"operands\": [\n {\n \"op\": \"IS NOT NULL\",\n \"operands\": [\n {\n \"input\": 3,\n \"name\": \"$3\"\n }\n ]\n },\n {\n \"op\": \"IS NOT NULL\",\n \"operands\": [\n {\n \"input\": 7,\n \"name\": \"$7\"\n }\n ]\n }\n ]\n },\n \"rowCount\": 11.25\n },\n {\n \"id\": \"4\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"cbigint\",\n \"cstring2\"\n ],\n \"exprs\": [\n {\n \"input\": 3,\n \"name\": \"$3\"\n },\n {\n \"input\": 7,\n \"name\": \"$7\"\n }\n ],\n \"rowCount\": 11.25\n },\n {\n \"id\": \"5\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin\",\n \"condition\": {\n \"op\": \"AND\",\n \"operands\": [\n {\n \"op\": \"=\",\n \"operands\": [\n {\n \"input\": 5,\n \"name\": \"$5\"\n },\n {\n \"input\": 3,\n \"name\": \"$3\"\n }\n ]\n },\n {\n \"op\": \"=\",\n \"operands\": [\n {\n \"input\": 4,\n \"name\": \"$4\"\n },\n {\n \"input\": 1,\n \"name\": \"$1\"\n }\n ]\n }\n ]\n },\n \"joinType\": \"left\",\n \"algorithm\": \"none\",\n \"cost\": \"not available\",\n \"inputs\": [\n \"1\",\n \"4\"\n ],\n \"rowCount\": 20.0\n },\n {\n \"id\": \"6\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_a_n1\"\n ],\n \"table:alias\": \"hd\",\n \"inputs\": [],\n \"rowCount\": 20.0,\n \"avgRowSize\": 10.0,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n },\n {\n \"type\": \"SMALLINT\",\n \"nullable\": true,\n \"name\": \"csmallint\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"cint\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"cbigint\"\n },\n {\n \"type\": \"FLOAT\",\n \"nullable\": true,\n \"name\": \"cfloat\"\n },\n {\n \"type\": \"DOUBLE\",\n \"nullable\": true,\n \"name\": \"cdouble\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring1\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring2\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp1\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp2\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean1\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean2\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__INSIDE__FILE\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"INPUT__FILE__NAME\"\n },\n {\n \"fields\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"writeid\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"bucketid\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"rowid\"\n }\n ],\n \"name\": \"ROW__ID\"\n }\n ],\n \"colStats\": [\n {\n \"name\": \"cint\",\n \"ndv\": 8,\n \"minValue\": -738306196,\n \"maxValue\": 626923679\n },\n {\n \"name\": \"cstring1\",\n \"ndv\": 8\n },\n {\n \"name\": \"ctinyint\",\n \"ndv\": 2,\n \"minValue\": -64,\n \"maxValue\": -51\n },\n {\n \"name\": \"csmallint\",\n \"ndv\": 6,\n \"minValue\": -15920,\n \"maxValue\": -6907\n },\n {\n \"name\": \"cbigint\",\n \"ndv\": 15,\n \"minValue\": -1970551565,\n \"maxValue\": 1086455747\n },\n {\n \"name\": \"cfloat\",\n \"ndv\": 2,\n \"minValue\": -64.0,\n \"maxValue\": -51.0\n },\n {\n \"name\": \"cdouble\",\n \"ndv\": 6,\n \"minValue\": -15920.0,\n \"maxValue\": -6907.0\n },\n {\n \"name\": \"cstring2\",\n \"ndv\": 15\n },\n {\n \"name\": \"ctimestamp1\",\n \"ndv\": 0\n },\n {\n \"name\": \"ctimestamp2\",\n \"ndv\": 0\n },\n {\n \"name\": \"cboolean1\",\n \"ndv\": 2\n },\n {\n \"name\": \"cboolean2\",\n \"ndv\": 2\n }\n ]\n },\n {\n \"id\": \"7\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter\",\n \"condition\": {\n \"op\": \"AND\",\n \"operands\": [\n {\n \"op\": \"IS NOT NULL\",\n \"operands\": [\n {\n \"input\": 2,\n \"name\": \"$2\"\n }\n ]\n },\n {\n \"op\": \"IS NOT NULL\",\n \"operands\": [\n {\n \"input\": 6,\n \"name\": \"$6\"\n }\n ]\n }\n ]\n },\n \"rowCount\": 5.0\n },\n {\n \"id\": \"8\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"cint\",\n \"cstring1\"\n ],\n \"exprs\": [\n {\n \"input\": 2,\n \"name\": \"$2\"\n },\n {\n \"input\": 6,\n \"name\": \"$6\"\n }\n ],\n \"rowCount\": 5.0\n },\n {\n \"id\": \"9\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin\",\n \"condition\": {\n \"op\": \"AND\",\n \"operands\": [\n {\n \"op\": \"=\",\n \"operands\": [\n {\n \"input\": 7,\n \"name\": \"$7\"\n },\n {\n \"input\": 2,\n \"name\": \"$2\"\n }\n ]\n },\n {\n \"op\": \"=\",\n \"operands\": [\n {\n \"input\": 6,\n \"name\": \"$6\"\n },\n {\n \"input\": 0,\n \"name\": \"$0\"\n }\n ]\n }\n ]\n },\n \"joinType\": \"left\",\n \"algorithm\": \"none\",\n \"cost\": \"not available\",\n \"inputs\": [\n \"5\",\n \"8\"\n ],\n \"rowCount\": 20.0\n },\n {\n \"id\": \"10\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate\",\n \"group\": [],\n \"aggs\": [\n {\n \"agg\": \"count\",\n \"type\": {\n \"type\": \"BIGINT\",\n \"nullable\": true\n },\n \"distinct\": false,\n \"operands\": []\n }\n ],\n \"rowCount\": 1.0\n }\n ]\n}","optimizedSQL":"SELECT COUNT(*) AS `$f0`\nFROM (SELECT `cint`, `cbigint`, `cstring1`, `cstring2`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t`\nLEFT JOIN (SELECT `cbigint`, `cstring2`\nFROM `default`.`small_alltypesorc_a_n1`\nWHERE `cbigint` IS NOT NULL AND `cstring2` IS NOT NULL) AS `t1` ON `t`.`cstring2` = `t1`.`cstring2` AND `t`.`cbigint` = `t1`.`cbigint`\nLEFT JOIN (SELECT `cint`, `cstring1`\nFROM `default`.`small_alltypesorc_a_n1`\nWHERE `cint` IS NOT NULL AND `cstring1` IS NOT NULL) AS `t3` ON `t`.`cstring1` = `t3`.`cstring1` AND `t`.`cint` = `t3`.`cint`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cbigint","cstring2"],"database:":"default","filterExpr:":"(cbigint is not null and cstring2 is not null) (type: boolean)","Statistics:":"Num rows: 20 Data size: 1616 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_2","children":{"Filter Operator":{"predicate:":"(cbigint is not null and cstring2 is not null) (type: boolean)","Statistics:":"Num rows: 11 Data size: 909 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"FIL_20","children":{"Select Operator":{"expressions:":"cbigint (type: bigint), cstring2 (type: string)","columnExprMap:":{"_col0":"cbigint","_col1":"cstring2"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 11 Data size: 909 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_4","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: bigint), _col3 (type: string)","1":"_col0 (type: bigint), _col1 (type: string)"},"OperatorId:":"HASHTABLESINK_30"}}}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cint","cstring1"],"database:":"default","filterExpr:":"(cint is not null and cstring1 is not null) (type: boolean)","Statistics:":"Num rows: 20 Data size: 1034 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_5","children":{"Filter Operator":{"predicate:":"(cint is not null and cstring1 is not null) (type: boolean)","Statistics:":"Num rows: 5 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"FIL_21","children":{"Select Operator":{"expressions:":"cint (type: int), cstring1 (type: string)","columnExprMap:":{"_col0":"cint","_col1":"cstring1"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 5 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_7","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int), _col2 (type: string)","1":"_col0 (type: int), _col1 (type: string)"},"OperatorId:":"HASHTABLESINK_28"}}}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cint","cbigint","cstring1","cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 2650 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_a_n1","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cint (type: int), cbigint (type: bigint), cstring1 (type: string), cstring2 (type: string)","columnExprMap:":{"_col0":"cint","_col1":"cbigint","_col2":"cstring1","_col3":"cstring2"},"outputColumnNames:":["_col0","_col1","_col2","_col3"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[2, 3, 6, 7]"},"Statistics:":"Num rows: 20 Data size: 2650 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_32","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0","_col2":"0:_col2"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: bigint), _col3 (type: string)","1":"_col0 (type: bigint), _col1 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 3:bigint","col 7:string"],"bigTableValueExpressions:":["col 2:int","col 6:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col2"],"Statistics:":"Num rows: 26 Data size: 1598 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_33","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: int), _col2 (type: string)","1":"_col0 (type: int), _col1 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:int","col 1:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 42 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_34","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"minReductionHashAggr:":"0.99","mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"GBY_35","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_36"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[2, 3, 6, 7]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"GBY_17","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_19"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_37"}}}}}} +{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_a_n1\"\n ],\n \"table:alias\": \"c\",\n \"inputs\": [],\n \"rowCount\": 20.0,\n \"avgRowSize\": 26.75,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n },\n {\n \"type\": \"SMALLINT\",\n \"nullable\": true,\n \"name\": \"csmallint\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"cint\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"cbigint\"\n },\n {\n \"type\": \"FLOAT\",\n \"nullable\": true,\n \"name\": \"cfloat\"\n },\n {\n \"type\": \"DOUBLE\",\n \"nullable\": true,\n \"name\": \"cdouble\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring1\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring2\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp1\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp2\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean1\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean2\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__INSIDE__FILE\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"INPUT__FILE__NAME\"\n },\n {\n \"fields\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"writeid\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"bucketid\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"rowid\"\n }\n ],\n \"name\": \"ROW__ID\"\n }\n ],\n \"colStats\": [\n {\n \"name\": \"cint\",\n \"ndv\": 8,\n \"minValue\": -738306196,\n \"maxValue\": 626923679\n },\n {\n \"name\": \"cbigint\",\n \"ndv\": 15,\n \"minValue\": -1970551565,\n \"maxValue\": 1086455747\n },\n {\n \"name\": \"cstring1\",\n \"ndv\": 8\n },\n {\n \"name\": \"cstring2\",\n \"ndv\": 15\n },\n {\n \"name\": \"ctinyint\",\n \"ndv\": 2,\n \"minValue\": -64,\n \"maxValue\": -51\n },\n {\n \"name\": \"csmallint\",\n \"ndv\": 6,\n \"minValue\": -15920,\n \"maxValue\": -6907\n },\n {\n \"name\": \"cfloat\",\n \"ndv\": 2,\n \"minValue\": -64.0,\n \"maxValue\": -51.0\n },\n {\n \"name\": \"cdouble\",\n \"ndv\": 6,\n \"minValue\": -15920.0,\n \"maxValue\": -6907.0\n },\n {\n \"name\": \"ctimestamp1\",\n \"ndv\": 0,\n \"minValue\": -28810,\n \"maxValue\": -28789\n },\n {\n \"name\": \"ctimestamp2\",\n \"ndv\": 0,\n \"minValue\": -28812,\n \"maxValue\": -28786\n },\n {\n \"name\": \"cboolean1\",\n \"ndv\": 2\n },\n {\n \"name\": \"cboolean2\",\n \"ndv\": 2\n }\n ]\n },\n {\n \"id\": \"1\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"cint\",\n \"cbigint\",\n \"cstring1\",\n \"cstring2\"\n ],\n \"exprs\": [\n {\n \"input\": 2,\n \"name\": \"$2\"\n },\n {\n \"input\": 3,\n \"name\": \"$3\"\n },\n {\n \"input\": 6,\n \"name\": \"$6\"\n },\n {\n \"input\": 7,\n \"name\": \"$7\"\n }\n ],\n \"rowCount\": 20.0\n },\n {\n \"id\": \"2\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_a_n1\"\n ],\n \"table:alias\": \"cd\",\n \"inputs\": [],\n \"rowCount\": 20.0,\n \"avgRowSize\": 16.75,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n },\n {\n \"type\": \"SMALLINT\",\n \"nullable\": true,\n \"name\": \"csmallint\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"cint\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"cbigint\"\n },\n {\n \"type\": \"FLOAT\",\n \"nullable\": true,\n \"name\": \"cfloat\"\n },\n {\n \"type\": \"DOUBLE\",\n \"nullable\": true,\n \"name\": \"cdouble\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring1\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring2\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp1\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp2\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean1\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean2\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__INSIDE__FILE\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"INPUT__FILE__NAME\"\n },\n {\n \"fields\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"writeid\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"bucketid\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"rowid\"\n }\n ],\n \"name\": \"ROW__ID\"\n }\n ],\n \"colStats\": [\n {\n \"name\": \"cbigint\",\n \"ndv\": 15,\n \"minValue\": -1970551565,\n \"maxValue\": 1086455747\n },\n {\n \"name\": \"cstring2\",\n \"ndv\": 15\n },\n {\n \"name\": \"ctinyint\",\n \"ndv\": 2,\n \"minValue\": -64,\n \"maxValue\": -51\n },\n {\n \"name\": \"csmallint\",\n \"ndv\": 6,\n \"minValue\": -15920,\n \"maxValue\": -6907\n },\n {\n \"name\": \"cint\",\n \"ndv\": 8,\n \"minValue\": -738306196,\n \"maxValue\": 626923679\n },\n {\n \"name\": \"cfloat\",\n \"ndv\": 2,\n \"minValue\": -64.0,\n \"maxValue\": -51.0\n },\n {\n \"name\": \"cdouble\",\n \"ndv\": 6,\n \"minValue\": -15920.0,\n \"maxValue\": -6907.0\n },\n {\n \"name\": \"cstring1\",\n \"ndv\": 8\n },\n {\n \"name\": \"ctimestamp1\",\n \"ndv\": 0,\n \"minValue\": -28810,\n \"maxValue\": -28789\n },\n {\n \"name\": \"ctimestamp2\",\n \"ndv\": 0,\n \"minValue\": -28812,\n \"maxValue\": -28786\n },\n {\n \"name\": \"cboolean1\",\n \"ndv\": 2\n },\n {\n \"name\": \"cboolean2\",\n \"ndv\": 2\n }\n ]\n },\n {\n \"id\": \"3\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter\",\n \"condition\": {\n \"op\": \"AND\",\n \"operands\": [\n {\n \"op\": \"IS NOT NULL\",\n \"operands\": [\n {\n \"input\": 3,\n \"name\": \"$3\"\n }\n ]\n },\n {\n \"op\": \"IS NOT NULL\",\n \"operands\": [\n {\n \"input\": 7,\n \"name\": \"$7\"\n }\n ]\n }\n ]\n },\n \"rowCount\": 11.25\n },\n {\n \"id\": \"4\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"cbigint\",\n \"cstring2\"\n ],\n \"exprs\": [\n {\n \"input\": 3,\n \"name\": \"$3\"\n },\n {\n \"input\": 7,\n \"name\": \"$7\"\n }\n ],\n \"rowCount\": 11.25\n },\n {\n \"id\": \"5\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin\",\n \"condition\": {\n \"op\": \"AND\",\n \"operands\": [\n {\n \"op\": \"=\",\n \"operands\": [\n {\n \"input\": 5,\n \"name\": \"$5\"\n },\n {\n \"input\": 3,\n \"name\": \"$3\"\n }\n ]\n },\n {\n \"op\": \"=\",\n \"operands\": [\n {\n \"input\": 4,\n \"name\": \"$4\"\n },\n {\n \"input\": 1,\n \"name\": \"$1\"\n }\n ]\n }\n ]\n },\n \"joinType\": \"left\",\n \"algorithm\": \"none\",\n \"cost\": \"not available\",\n \"inputs\": [\n \"1\",\n \"4\"\n ],\n \"rowCount\": 20.0\n },\n {\n \"id\": \"6\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_a_n1\"\n ],\n \"table:alias\": \"hd\",\n \"inputs\": [],\n \"rowCount\": 20.0,\n \"avgRowSize\": 10.0,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n },\n {\n \"type\": \"SMALLINT\",\n \"nullable\": true,\n \"name\": \"csmallint\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"cint\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"cbigint\"\n },\n {\n \"type\": \"FLOAT\",\n \"nullable\": true,\n \"name\": \"cfloat\"\n },\n {\n \"type\": \"DOUBLE\",\n \"nullable\": true,\n \"name\": \"cdouble\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring1\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring2\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp1\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp2\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean1\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean2\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__INSIDE__FILE\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"INPUT__FILE__NAME\"\n },\n {\n \"fields\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"writeid\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"bucketid\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"rowid\"\n }\n ],\n \"name\": \"ROW__ID\"\n }\n ],\n \"colStats\": [\n {\n \"name\": \"cint\",\n \"ndv\": 8,\n \"minValue\": -738306196,\n \"maxValue\": 626923679\n },\n {\n \"name\": \"cstring1\",\n \"ndv\": 8\n },\n {\n \"name\": \"ctinyint\",\n \"ndv\": 2,\n \"minValue\": -64,\n \"maxValue\": -51\n },\n {\n \"name\": \"csmallint\",\n \"ndv\": 6,\n \"minValue\": -15920,\n \"maxValue\": -6907\n },\n {\n \"name\": \"cbigint\",\n \"ndv\": 15,\n \"minValue\": -1970551565,\n \"maxValue\": 1086455747\n },\n {\n \"name\": \"cfloat\",\n \"ndv\": 2,\n \"minValue\": -64.0,\n \"maxValue\": -51.0\n },\n {\n \"name\": \"cdouble\",\n \"ndv\": 6,\n \"minValue\": -15920.0,\n \"maxValue\": -6907.0\n },\n {\n \"name\": \"cstring2\",\n \"ndv\": 15\n },\n {\n \"name\": \"ctimestamp1\",\n \"ndv\": 0,\n \"minValue\": -28810,\n \"maxValue\": -28789\n },\n {\n \"name\": \"ctimestamp2\",\n \"ndv\": 0,\n \"minValue\": -28812,\n \"maxValue\": -28786\n },\n {\n \"name\": \"cboolean1\",\n \"ndv\": 2\n },\n {\n \"name\": \"cboolean2\",\n \"ndv\": 2\n }\n ]\n },\n {\n \"id\": \"7\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter\",\n \"condition\": {\n \"op\": \"AND\",\n \"operands\": [\n {\n \"op\": \"IS NOT NULL\",\n \"operands\": [\n {\n \"input\": 2,\n \"name\": \"$2\"\n }\n ]\n },\n {\n \"op\": \"IS NOT NULL\",\n \"operands\": [\n {\n \"input\": 6,\n \"name\": \"$6\"\n }\n ]\n }\n ]\n },\n \"rowCount\": 5.0\n },\n {\n \"id\": \"8\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"cint\",\n \"cstring1\"\n ],\n \"exprs\": [\n {\n \"input\": 2,\n \"name\": \"$2\"\n },\n {\n \"input\": 6,\n \"name\": \"$6\"\n }\n ],\n \"rowCount\": 5.0\n },\n {\n \"id\": \"9\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin\",\n \"condition\": {\n \"op\": \"AND\",\n \"operands\": [\n {\n \"op\": \"=\",\n \"operands\": [\n {\n \"input\": 7,\n \"name\": \"$7\"\n },\n {\n \"input\": 2,\n \"name\": \"$2\"\n }\n ]\n },\n {\n \"op\": \"=\",\n \"operands\": [\n {\n \"input\": 6,\n \"name\": \"$6\"\n },\n {\n \"input\": 0,\n \"name\": \"$0\"\n }\n ]\n }\n ]\n },\n \"joinType\": \"left\",\n \"algorithm\": \"none\",\n \"cost\": \"not available\",\n \"inputs\": [\n \"5\",\n \"8\"\n ],\n \"rowCount\": 20.0\n },\n {\n \"id\": \"10\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate\",\n \"group\": [],\n \"aggs\": [\n {\n \"agg\": \"count\",\n \"type\": {\n \"type\": \"BIGINT\",\n \"nullable\": true\n },\n \"distinct\": false,\n \"operands\": []\n }\n ],\n \"rowCount\": 1.0\n }\n ]\n}","optimizedSQL":"SELECT COUNT(*) AS `$f0`\nFROM (SELECT `cint`, `cbigint`, `cstring1`, `cstring2`\nFROM `default`.`small_alltypesorc_a_n1`) AS `t`\nLEFT JOIN (SELECT `cbigint`, `cstring2`\nFROM `default`.`small_alltypesorc_a_n1`\nWHERE `cbigint` IS NOT NULL AND `cstring2` IS NOT NULL) AS `t1` ON `t`.`cstring2` = `t1`.`cstring2` AND `t`.`cbigint` = `t1`.`cbigint`\nLEFT JOIN (SELECT `cint`, `cstring1`\nFROM `default`.`small_alltypesorc_a_n1`\nWHERE `cint` IS NOT NULL AND `cstring1` IS NOT NULL) AS `t3` ON `t`.`cstring1` = `t3`.`cstring1` AND `t`.`cint` = `t3`.`cint`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cbigint","cstring2"],"database:":"default","filterExpr:":"(cbigint is not null and cstring2 is not null) (type: boolean)","Statistics:":"Num rows: 20 Data size: 1616 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_2","children":{"Filter Operator":{"predicate:":"(cbigint is not null and cstring2 is not null) (type: boolean)","Statistics:":"Num rows: 11 Data size: 909 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"FIL_20","children":{"Select Operator":{"expressions:":"cbigint (type: bigint), cstring2 (type: string)","columnExprMap:":{"_col0":"cbigint","_col1":"cstring2"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 11 Data size: 909 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_4","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: bigint), _col3 (type: string)","1":"_col0 (type: bigint), _col1 (type: string)"},"OperatorId:":"HASHTABLESINK_30"}}}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["cint","cstring1"],"database:":"default","filterExpr:":"(cint is not null and cstring1 is not null) (type: boolean)","Statistics:":"Num rows: 20 Data size: 1034 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_a_n1","isTempTable:":"false","OperatorId:":"TS_5","children":{"Filter Operator":{"predicate:":"(cint is not null and cstring1 is not null) (type: boolean)","Statistics:":"Num rows: 5 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"FIL_21","children":{"Select Operator":{"expressions:":"cint (type: int), cstring1 (type: string)","columnExprMap:":{"_col0":"cint","_col1":"cstring1"},"outputColumnNames:":["_col0","_col1"],"Statistics:":"Num rows: 5 Data size: 282 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_7","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: int), _col2 (type: string)","1":"_col0 (type: int), _col1 (type: string)"},"OperatorId:":"HASHTABLESINK_28"}}}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["cint","cbigint","cstring1","cstring2"],"database:":"default","Statistics:":"Num rows: 20 Data size: 2650 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_a_n1","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"cint (type: int), cbigint (type: bigint), cstring1 (type: string), cstring2 (type: string)","columnExprMap:":{"_col0":"cint","_col1":"cbigint","_col2":"cstring1","_col3":"cstring2"},"outputColumnNames:":["_col0","_col1","_col2","_col3"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[2, 3, 6, 7]"},"Statistics:":"Num rows: 20 Data size: 2650 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_32","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0","_col2":"0:_col2"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: bigint), _col3 (type: string)","1":"_col0 (type: bigint), _col1 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 3:bigint","col 7:string"],"bigTableValueExpressions:":["col 2:int","col 6:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col2"],"Statistics:":"Num rows: 26 Data size: 1598 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_33","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: int), _col2 (type: string)","1":"_col0 (type: int), _col1 (type: string)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:int","col 1:string"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 42 Data size: 336 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_34","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"minReductionHashAggr:":"0.99","mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"GBY_35","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_36"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[2, 3, 6, 7]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"GBY_17","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_19"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_37"}}}}}} PREHOOK: query: select count(*) from (select c.cstring1 from small_alltypesorc_a_n1 c left outer join small_alltypesorc_a_n1 cd diff --git a/ql/src/test/results/clientpositive/vector_outer_join4.q.out b/ql/src/test/results/clientpositive/vector_outer_join4.q.out index d4bf60a755..65c1ecd4b5 100644 --- a/ql/src/test/results/clientpositive/vector_outer_join4.q.out +++ b/ql/src/test/results/clientpositive/vector_outer_join4.q.out @@ -262,7 +262,7 @@ left outer join small_alltypesorc_b cd POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_b #### A masked pattern was here #### -{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_b\"\n ],\n \"table:alias\": \"c\",\n \"inputs\": [],\n \"rowCount\": 30.0,\n \"avgRowSize\": 139.86666666666667,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n },\n {\n \"type\": \"SMALLINT\",\n \"nullable\": true,\n \"name\": \"csmallint\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"cint\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"cbigint\"\n },\n {\n \"type\": \"FLOAT\",\n \"nullable\": true,\n \"name\": \"cfloat\"\n },\n {\n \"type\": \"DOUBLE\",\n \"nullable\": true,\n \"name\": \"cdouble\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring1\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring2\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp1\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp2\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean1\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean2\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__INSIDE__FILE\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"INPUT__FILE__NAME\"\n },\n {\n \"fields\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"writeid\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"bucketid\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"rowid\"\n }\n ],\n \"name\": \"ROW__ID\"\n }\n ],\n \"colStats\": [\n {\n \"name\": \"ctinyint\",\n \"ndv\": 1,\n \"minValue\": -64,\n \"maxValue\": -64\n },\n {\n \"name\": \"csmallint\",\n \"ndv\": 21,\n \"minValue\": -16379,\n \"maxValue\": -200\n },\n {\n \"name\": \"cint\",\n \"ndv\": 13,\n \"minValue\": -894716315,\n \"maxValue\": 626923679\n },\n {\n \"name\": \"cbigint\",\n \"ndv\": 13,\n \"minValue\": -1887561756,\n \"maxValue\": 2118653994\n },\n {\n \"name\": \"cfloat\",\n \"ndv\": 1,\n \"minValue\": -64.0,\n \"maxValue\": -64.0\n },\n {\n \"name\": \"cdouble\",\n \"ndv\": 21,\n \"minValue\": -16379.0,\n \"maxValue\": -200.0\n },\n {\n \"name\": \"cstring1\",\n \"ndv\": 13\n },\n {\n \"name\": \"cstring2\",\n \"ndv\": 13\n },\n {\n \"name\": \"ctimestamp1\",\n \"ndv\": 0\n },\n {\n \"name\": \"ctimestamp2\",\n \"ndv\": 0\n },\n {\n \"name\": \"cboolean1\",\n \"ndv\": 2\n },\n {\n \"name\": \"cboolean2\",\n \"ndv\": 2\n }\n ]\n },\n {\n \"id\": \"1\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"ctinyint\",\n \"csmallint\",\n \"cint\",\n \"cbigint\",\n \"cfloat\",\n \"cdouble\",\n \"cstring1\",\n \"cstring2\",\n \"ctimestamp1\",\n \"ctimestamp2\",\n \"cboolean1\",\n \"cboolean2\"\n ],\n \"exprs\": [\n {\n \"input\": 0,\n \"name\": \"$0\"\n },\n {\n \"input\": 1,\n \"name\": \"$1\"\n },\n {\n \"input\": 2,\n \"name\": \"$2\"\n },\n {\n \"input\": 3,\n \"name\": \"$3\"\n },\n {\n \"input\": 4,\n \"name\": \"$4\"\n },\n {\n \"input\": 5,\n \"name\": \"$5\"\n },\n {\n \"input\": 6,\n \"name\": \"$6\"\n },\n {\n \"input\": 7,\n \"name\": \"$7\"\n },\n {\n \"input\": 8,\n \"name\": \"$8\"\n },\n {\n \"input\": 9,\n \"name\": \"$9\"\n },\n {\n \"input\": 10,\n \"name\": \"$10\"\n },\n {\n \"input\": 11,\n \"name\": \"$11\"\n }\n ],\n \"rowCount\": 30.0\n },\n {\n \"id\": \"2\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_b\"\n ],\n \"table:alias\": \"cd\",\n \"inputs\": [],\n \"rowCount\": 30.0,\n \"avgRowSize\": 139.86666666666667,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n },\n {\n \"type\": \"SMALLINT\",\n \"nullable\": true,\n \"name\": \"csmallint\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"cint\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"cbigint\"\n },\n {\n \"type\": \"FLOAT\",\n \"nullable\": true,\n \"name\": \"cfloat\"\n },\n {\n \"type\": \"DOUBLE\",\n \"nullable\": true,\n \"name\": \"cdouble\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring1\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring2\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp1\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp2\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean1\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean2\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__INSIDE__FILE\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"INPUT__FILE__NAME\"\n },\n {\n \"fields\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"writeid\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"bucketid\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"rowid\"\n }\n ],\n \"name\": \"ROW__ID\"\n }\n ],\n \"colStats\": [\n {\n \"name\": \"ctinyint\",\n \"ndv\": 1,\n \"minValue\": -64,\n \"maxValue\": -64\n },\n {\n \"name\": \"csmallint\",\n \"ndv\": 21,\n \"minValue\": -16379,\n \"maxValue\": -200\n },\n {\n \"name\": \"cint\",\n \"ndv\": 13,\n \"minValue\": -894716315,\n \"maxValue\": 626923679\n },\n {\n \"name\": \"cbigint\",\n \"ndv\": 13,\n \"minValue\": -1887561756,\n \"maxValue\": 2118653994\n },\n {\n \"name\": \"cfloat\",\n \"ndv\": 1,\n \"minValue\": -64.0,\n \"maxValue\": -64.0\n },\n {\n \"name\": \"cdouble\",\n \"ndv\": 21,\n \"minValue\": -16379.0,\n \"maxValue\": -200.0\n },\n {\n \"name\": \"cstring1\",\n \"ndv\": 13\n },\n {\n \"name\": \"cstring2\",\n \"ndv\": 13\n },\n {\n \"name\": \"ctimestamp1\",\n \"ndv\": 0\n },\n {\n \"name\": \"ctimestamp2\",\n \"ndv\": 0\n },\n {\n \"name\": \"cboolean1\",\n \"ndv\": 2\n },\n {\n \"name\": \"cboolean2\",\n \"ndv\": 2\n }\n ]\n },\n {\n \"id\": \"3\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter\",\n \"condition\": {\n \"op\": \"IS NOT NULL\",\n \"operands\": [\n {\n \"input\": 2,\n \"name\": \"$2\"\n }\n ]\n },\n \"rowCount\": 20.0\n },\n {\n \"id\": \"4\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"ctinyint\",\n \"csmallint\",\n \"cint\",\n \"cbigint\",\n \"cfloat\",\n \"cdouble\",\n \"cstring1\",\n \"cstring2\",\n \"ctimestamp1\",\n \"ctimestamp2\",\n \"cboolean1\",\n \"cboolean2\"\n ],\n \"exprs\": [\n {\n \"input\": 0,\n \"name\": \"$0\"\n },\n {\n \"input\": 1,\n \"name\": \"$1\"\n },\n {\n \"input\": 2,\n \"name\": \"$2\"\n },\n {\n \"input\": 3,\n \"name\": \"$3\"\n },\n {\n \"input\": 4,\n \"name\": \"$4\"\n },\n {\n \"input\": 5,\n \"name\": \"$5\"\n },\n {\n \"input\": 6,\n \"name\": \"$6\"\n },\n {\n \"input\": 7,\n \"name\": \"$7\"\n },\n {\n \"input\": 8,\n \"name\": \"$8\"\n },\n {\n \"input\": 9,\n \"name\": \"$9\"\n },\n {\n \"input\": 10,\n \"name\": \"$10\"\n },\n {\n \"input\": 11,\n \"name\": \"$11\"\n }\n ],\n \"rowCount\": 20.0\n },\n {\n \"id\": \"5\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin\",\n \"condition\": {\n \"op\": \"=\",\n \"operands\": [\n {\n \"input\": 14,\n \"name\": \"$14\"\n },\n {\n \"input\": 2,\n \"name\": \"$2\"\n }\n ]\n },\n \"joinType\": \"left\",\n \"algorithm\": \"none\",\n \"cost\": \"not available\",\n \"inputs\": [\n \"1\",\n \"4\"\n ],\n \"rowCount\": 46.15384615384615\n }\n ]\n}","optimizedSQL":"SELECT *\nFROM (SELECT `ctinyint`, `csmallint`, `cint`, `cbigint`, `cfloat`, `cdouble`, `cstring1`, `cstring2`, `ctimestamp1`, `ctimestamp2`, `cboolean1`, `cboolean2`\nFROM `default`.`small_alltypesorc_b`) AS `t`\nLEFT JOIN (SELECT `ctinyint`, `csmallint`, `cint`, `cbigint`, `cfloat`, `cdouble`, `cstring1`, `cstring2`, `ctimestamp1`, `ctimestamp2`, `cboolean1`, `cboolean2`\nFROM `default`.`small_alltypesorc_b`\nWHERE `cint` IS NOT NULL) AS `t1` ON `t`.`cint` = `t1`.`cint`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-4":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-4"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-4":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["ctinyint","csmallint","cint","cbigint","cfloat","cdouble","cstring1","cstring2","ctimestamp1","ctimestamp2","cboolean1","cboolean2"],"database:":"default","filterExpr:":"cint is not null (type: boolean)","Statistics:":"Num rows: 30 Data size: 7296 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_b","isTempTable:":"false","OperatorId:":"TS_2","children":{"Filter Operator":{"predicate:":"cint is not null (type: boolean)","Statistics:":"Num rows: 20 Data size: 4864 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"FIL_10","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)","columnExprMap:":{"_col0":"ctinyint","_col1":"csmallint","_col10":"cboolean1","_col11":"cboolean2","_col2":"cint","_col3":"cbigint","_col4":"cfloat","_col5":"cdouble","_col6":"cstring1","_col7":"cstring2","_col8":"ctimestamp1","_col9":"ctimestamp2"},"outputColumnNames:":["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"],"Statistics:":"Num rows: 20 Data size: 4864 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_4","children":{"HashTable Sink Operator":{"keys:":{"0":"_col2 (type: int)","1":"_col2 (type: int)"},"OperatorId:":"HASHTABLESINK_12"}}}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["ctinyint","csmallint","cint","cbigint","cfloat","cdouble","cstring1","cstring2","ctimestamp1","ctimestamp2","cboolean1","cboolean2"],"database:":"default","Statistics:":"Num rows: 30 Data size: 7296 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_b","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)","columnExprMap:":{"_col0":"ctinyint","_col1":"csmallint","_col10":"cboolean1","_col11":"cboolean2","_col2":"cint","_col3":"cbigint","_col4":"cfloat","_col5":"cdouble","_col6":"cstring1","_col7":"cstring2","_col8":"ctimestamp1","_col9":"ctimestamp2"},"outputColumnNames:":["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"Statistics:":"Num rows: 30 Data size: 7296 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_14","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0","_col1":"0:_col1","_col10":"0:_col10","_col11":"0:_col11","_col12":"1:_col0","_col13":"1:_col1","_col14":"1:_col2","_col15":"1:_col3","_col16":"1:_col4","_col17":"1:_col5","_col18":"1:_col6","_col19":"1:_col7","_col2":"0:_col2","_col20":"1:_col8","_col21":"1:_col9","_col22":"1:_col10","_col23":"1:_col11","_col3":"0:_col3","_col4":"0:_col4","_col5":"0:_col5","_col6":"0:_col6","_col7":"0:_col7","_col8":"0:_col8","_col9":"0:_col9"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col2 (type: int)","1":"_col2 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 2:int"],"bigTableValueExpressions:":["col 0:tinyint","col 1:smallint","col 2:int","col 3:bigint","col 4:float","col 5:double","col 6:string","col 7:string","col 8:timestamp","col 9:timestamp","col 10:boolean","col 11:boolean"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"],"Statistics:":"Num rows: 76 Data size: 34428 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_15","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 76 Data size: 34428 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_16"}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[bigint, bigint, bigint, bigint, double, double, string, string, timestamp, timestamp, bigint, bigint]"}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_17"}}}}}} +{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_b\"\n ],\n \"table:alias\": \"c\",\n \"inputs\": [],\n \"rowCount\": 30.0,\n \"avgRowSize\": 139.86666666666667,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n },\n {\n \"type\": \"SMALLINT\",\n \"nullable\": true,\n \"name\": \"csmallint\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"cint\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"cbigint\"\n },\n {\n \"type\": \"FLOAT\",\n \"nullable\": true,\n \"name\": \"cfloat\"\n },\n {\n \"type\": \"DOUBLE\",\n \"nullable\": true,\n \"name\": \"cdouble\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring1\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring2\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp1\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp2\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean1\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean2\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__INSIDE__FILE\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"INPUT__FILE__NAME\"\n },\n {\n \"fields\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"writeid\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"bucketid\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"rowid\"\n }\n ],\n \"name\": \"ROW__ID\"\n }\n ],\n \"colStats\": [\n {\n \"name\": \"ctinyint\",\n \"ndv\": 1,\n \"minValue\": -64,\n \"maxValue\": -64\n },\n {\n \"name\": \"csmallint\",\n \"ndv\": 21,\n \"minValue\": -16379,\n \"maxValue\": -200\n },\n {\n \"name\": \"cint\",\n \"ndv\": 13,\n \"minValue\": -894716315,\n \"maxValue\": 626923679\n },\n {\n \"name\": \"cbigint\",\n \"ndv\": 13,\n \"minValue\": -1887561756,\n \"maxValue\": 2118653994\n },\n {\n \"name\": \"cfloat\",\n \"ndv\": 1,\n \"minValue\": -64.0,\n \"maxValue\": -64.0\n },\n {\n \"name\": \"cdouble\",\n \"ndv\": 21,\n \"minValue\": -16379.0,\n \"maxValue\": -200.0\n },\n {\n \"name\": \"cstring1\",\n \"ndv\": 13\n },\n {\n \"name\": \"cstring2\",\n \"ndv\": 13\n },\n {\n \"name\": \"ctimestamp1\",\n \"ndv\": 0,\n \"minValue\": -28813,\n \"maxValue\": -28788\n },\n {\n \"name\": \"ctimestamp2\",\n \"ndv\": 0,\n \"minValue\": -28816,\n \"maxValue\": -28785\n },\n {\n \"name\": \"cboolean1\",\n \"ndv\": 2\n },\n {\n \"name\": \"cboolean2\",\n \"ndv\": 2\n }\n ]\n },\n {\n \"id\": \"1\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"ctinyint\",\n \"csmallint\",\n \"cint\",\n \"cbigint\",\n \"cfloat\",\n \"cdouble\",\n \"cstring1\",\n \"cstring2\",\n \"ctimestamp1\",\n \"ctimestamp2\",\n \"cboolean1\",\n \"cboolean2\"\n ],\n \"exprs\": [\n {\n \"input\": 0,\n \"name\": \"$0\"\n },\n {\n \"input\": 1,\n \"name\": \"$1\"\n },\n {\n \"input\": 2,\n \"name\": \"$2\"\n },\n {\n \"input\": 3,\n \"name\": \"$3\"\n },\n {\n \"input\": 4,\n \"name\": \"$4\"\n },\n {\n \"input\": 5,\n \"name\": \"$5\"\n },\n {\n \"input\": 6,\n \"name\": \"$6\"\n },\n {\n \"input\": 7,\n \"name\": \"$7\"\n },\n {\n \"input\": 8,\n \"name\": \"$8\"\n },\n {\n \"input\": 9,\n \"name\": \"$9\"\n },\n {\n \"input\": 10,\n \"name\": \"$10\"\n },\n {\n \"input\": 11,\n \"name\": \"$11\"\n }\n ],\n \"rowCount\": 30.0\n },\n {\n \"id\": \"2\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_b\"\n ],\n \"table:alias\": \"cd\",\n \"inputs\": [],\n \"rowCount\": 30.0,\n \"avgRowSize\": 139.86666666666667,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n },\n {\n \"type\": \"SMALLINT\",\n \"nullable\": true,\n \"name\": \"csmallint\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"cint\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"cbigint\"\n },\n {\n \"type\": \"FLOAT\",\n \"nullable\": true,\n \"name\": \"cfloat\"\n },\n {\n \"type\": \"DOUBLE\",\n \"nullable\": true,\n \"name\": \"cdouble\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring1\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring2\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp1\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp2\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean1\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean2\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__INSIDE__FILE\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"INPUT__FILE__NAME\"\n },\n {\n \"fields\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"writeid\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"bucketid\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"rowid\"\n }\n ],\n \"name\": \"ROW__ID\"\n }\n ],\n \"colStats\": [\n {\n \"name\": \"ctinyint\",\n \"ndv\": 1,\n \"minValue\": -64,\n \"maxValue\": -64\n },\n {\n \"name\": \"csmallint\",\n \"ndv\": 21,\n \"minValue\": -16379,\n \"maxValue\": -200\n },\n {\n \"name\": \"cint\",\n \"ndv\": 13,\n \"minValue\": -894716315,\n \"maxValue\": 626923679\n },\n {\n \"name\": \"cbigint\",\n \"ndv\": 13,\n \"minValue\": -1887561756,\n \"maxValue\": 2118653994\n },\n {\n \"name\": \"cfloat\",\n \"ndv\": 1,\n \"minValue\": -64.0,\n \"maxValue\": -64.0\n },\n {\n \"name\": \"cdouble\",\n \"ndv\": 21,\n \"minValue\": -16379.0,\n \"maxValue\": -200.0\n },\n {\n \"name\": \"cstring1\",\n \"ndv\": 13\n },\n {\n \"name\": \"cstring2\",\n \"ndv\": 13\n },\n {\n \"name\": \"ctimestamp1\",\n \"ndv\": 0,\n \"minValue\": -28813,\n \"maxValue\": -28788\n },\n {\n \"name\": \"ctimestamp2\",\n \"ndv\": 0,\n \"minValue\": -28816,\n \"maxValue\": -28785\n },\n {\n \"name\": \"cboolean1\",\n \"ndv\": 2\n },\n {\n \"name\": \"cboolean2\",\n \"ndv\": 2\n }\n ]\n },\n {\n \"id\": \"3\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter\",\n \"condition\": {\n \"op\": \"IS NOT NULL\",\n \"operands\": [\n {\n \"input\": 2,\n \"name\": \"$2\"\n }\n ]\n },\n \"rowCount\": 20.0\n },\n {\n \"id\": \"4\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"ctinyint\",\n \"csmallint\",\n \"cint\",\n \"cbigint\",\n \"cfloat\",\n \"cdouble\",\n \"cstring1\",\n \"cstring2\",\n \"ctimestamp1\",\n \"ctimestamp2\",\n \"cboolean1\",\n \"cboolean2\"\n ],\n \"exprs\": [\n {\n \"input\": 0,\n \"name\": \"$0\"\n },\n {\n \"input\": 1,\n \"name\": \"$1\"\n },\n {\n \"input\": 2,\n \"name\": \"$2\"\n },\n {\n \"input\": 3,\n \"name\": \"$3\"\n },\n {\n \"input\": 4,\n \"name\": \"$4\"\n },\n {\n \"input\": 5,\n \"name\": \"$5\"\n },\n {\n \"input\": 6,\n \"name\": \"$6\"\n },\n {\n \"input\": 7,\n \"name\": \"$7\"\n },\n {\n \"input\": 8,\n \"name\": \"$8\"\n },\n {\n \"input\": 9,\n \"name\": \"$9\"\n },\n {\n \"input\": 10,\n \"name\": \"$10\"\n },\n {\n \"input\": 11,\n \"name\": \"$11\"\n }\n ],\n \"rowCount\": 20.0\n },\n {\n \"id\": \"5\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin\",\n \"condition\": {\n \"op\": \"=\",\n \"operands\": [\n {\n \"input\": 14,\n \"name\": \"$14\"\n },\n {\n \"input\": 2,\n \"name\": \"$2\"\n }\n ]\n },\n \"joinType\": \"left\",\n \"algorithm\": \"none\",\n \"cost\": \"not available\",\n \"inputs\": [\n \"1\",\n \"4\"\n ],\n \"rowCount\": 46.15384615384615\n }\n ]\n}","optimizedSQL":"SELECT *\nFROM (SELECT `ctinyint`, `csmallint`, `cint`, `cbigint`, `cfloat`, `cdouble`, `cstring1`, `cstring2`, `ctimestamp1`, `ctimestamp2`, `cboolean1`, `cboolean2`\nFROM `default`.`small_alltypesorc_b`) AS `t`\nLEFT JOIN (SELECT `ctinyint`, `csmallint`, `cint`, `cbigint`, `cfloat`, `cdouble`, `cstring1`, `cstring2`, `ctimestamp1`, `ctimestamp2`, `cboolean1`, `cboolean2`\nFROM `default`.`small_alltypesorc_b`\nWHERE `cint` IS NOT NULL) AS `t1` ON `t`.`cint` = `t1`.`cint`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-4":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-4"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-4":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["ctinyint","csmallint","cint","cbigint","cfloat","cdouble","cstring1","cstring2","ctimestamp1","ctimestamp2","cboolean1","cboolean2"],"database:":"default","filterExpr:":"cint is not null (type: boolean)","Statistics:":"Num rows: 30 Data size: 6896 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_b","isTempTable:":"false","OperatorId:":"TS_2","children":{"Filter Operator":{"predicate:":"cint is not null (type: boolean)","Statistics:":"Num rows: 20 Data size: 4624 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"FIL_10","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)","columnExprMap:":{"_col0":"ctinyint","_col1":"csmallint","_col10":"cboolean1","_col11":"cboolean2","_col2":"cint","_col3":"cbigint","_col4":"cfloat","_col5":"cdouble","_col6":"cstring1","_col7":"cstring2","_col8":"ctimestamp1","_col9":"ctimestamp2"},"outputColumnNames:":["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"],"Statistics:":"Num rows: 20 Data size: 4624 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_4","children":{"HashTable Sink Operator":{"keys:":{"0":"_col2 (type: int)","1":"_col2 (type: int)"},"OperatorId:":"HASHTABLESINK_12"}}}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["ctinyint","csmallint","cint","cbigint","cfloat","cdouble","cstring1","cstring2","ctimestamp1","ctimestamp2","cboolean1","cboolean2"],"database:":"default","Statistics:":"Num rows: 30 Data size: 6896 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_b","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int), cbigint (type: bigint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), cstring2 (type: string), ctimestamp1 (type: timestamp), ctimestamp2 (type: timestamp), cboolean1 (type: boolean), cboolean2 (type: boolean)","columnExprMap:":{"_col0":"ctinyint","_col1":"csmallint","_col10":"cboolean1","_col11":"cboolean2","_col2":"cint","_col3":"cbigint","_col4":"cfloat","_col5":"cdouble","_col6":"cstring1","_col7":"cstring2","_col8":"ctimestamp1","_col9":"ctimestamp2"},"outputColumnNames:":["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]"},"Statistics:":"Num rows: 30 Data size: 6896 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_14","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0","_col1":"0:_col1","_col10":"0:_col10","_col11":"0:_col11","_col12":"1:_col0","_col13":"1:_col1","_col14":"1:_col2","_col15":"1:_col3","_col16":"1:_col4","_col17":"1:_col5","_col18":"1:_col6","_col19":"1:_col7","_col2":"0:_col2","_col20":"1:_col8","_col21":"1:_col9","_col22":"1:_col10","_col23":"1:_col11","_col3":"0:_col3","_col4":"0:_col4","_col5":"0:_col5","_col6":"0:_col6","_col7":"0:_col7","_col8":"0:_col8","_col9":"0:_col9"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col2 (type: int)","1":"_col2 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 2:int"],"bigTableValueExpressions:":["col 0:tinyint","col 1:smallint","col 2:int","col 3:bigint","col 4:float","col 5:double","col 6:string","col 7:string","col 8:timestamp","col 9:timestamp","col 10:boolean","col 11:boolean"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0","_col1","_col2","_col3","_col4","_col5","_col6","_col7","_col8","_col9","_col10","_col11","_col12","_col13","_col14","_col15","_col16","_col17","_col18","_col19","_col20","_col21","_col22","_col23"],"Statistics:":"Num rows: 76 Data size: 33748 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_15","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 76 Data size: 33748 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_16"}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[bigint, bigint, bigint, bigint, double, double, string, string, timestamp, timestamp, bigint, bigint]"}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_17"}}}}}} PREHOOK: query: select * from small_alltypesorc_b c left outer join small_alltypesorc_b cd @@ -347,7 +347,7 @@ left outer join small_alltypesorc_b hd POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_b #### A masked pattern was here #### -{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_b\"\n ],\n \"table:alias\": \"c\",\n \"inputs\": [],\n \"rowCount\": 30.0,\n \"avgRowSize\": 4.0,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n },\n {\n \"type\": \"SMALLINT\",\n \"nullable\": true,\n \"name\": \"csmallint\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"cint\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"cbigint\"\n },\n {\n \"type\": \"FLOAT\",\n \"nullable\": true,\n \"name\": \"cfloat\"\n },\n {\n \"type\": \"DOUBLE\",\n \"nullable\": true,\n \"name\": \"cdouble\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring1\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring2\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp1\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp2\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean1\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean2\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__INSIDE__FILE\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"INPUT__FILE__NAME\"\n },\n {\n \"fields\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"writeid\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"bucketid\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"rowid\"\n }\n ],\n \"name\": \"ROW__ID\"\n }\n ],\n \"colStats\": [\n {\n \"name\": \"ctinyint\",\n \"ndv\": 1,\n \"minValue\": -64,\n \"maxValue\": -64\n },\n {\n \"name\": \"csmallint\",\n \"ndv\": 21,\n \"minValue\": -16379,\n \"maxValue\": -200\n },\n {\n \"name\": \"cint\",\n \"ndv\": 13,\n \"minValue\": -894716315,\n \"maxValue\": 626923679\n },\n {\n \"name\": \"cbigint\",\n \"ndv\": 13,\n \"minValue\": -1887561756,\n \"maxValue\": 2118653994\n },\n {\n \"name\": \"cfloat\",\n \"ndv\": 1,\n \"minValue\": -64.0,\n \"maxValue\": -64.0\n },\n {\n \"name\": \"cdouble\",\n \"ndv\": 21,\n \"minValue\": -16379.0,\n \"maxValue\": -200.0\n },\n {\n \"name\": \"cstring1\",\n \"ndv\": 13\n },\n {\n \"name\": \"cstring2\",\n \"ndv\": 13\n },\n {\n \"name\": \"ctimestamp1\",\n \"ndv\": 0\n },\n {\n \"name\": \"ctimestamp2\",\n \"ndv\": 0\n },\n {\n \"name\": \"cboolean1\",\n \"ndv\": 2\n },\n {\n \"name\": \"cboolean2\",\n \"ndv\": 2\n }\n ]\n },\n {\n \"id\": \"1\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"ctinyint\"\n ],\n \"exprs\": [\n {\n \"input\": 0,\n \"name\": \"$0\"\n }\n ],\n \"rowCount\": 30.0\n },\n {\n \"id\": \"2\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_b\"\n ],\n \"table:alias\": \"hd\",\n \"inputs\": [],\n \"rowCount\": 30.0,\n \"avgRowSize\": 4.0,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n },\n {\n \"type\": \"SMALLINT\",\n \"nullable\": true,\n \"name\": \"csmallint\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"cint\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"cbigint\"\n },\n {\n \"type\": \"FLOAT\",\n \"nullable\": true,\n \"name\": \"cfloat\"\n },\n {\n \"type\": \"DOUBLE\",\n \"nullable\": true,\n \"name\": \"cdouble\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring1\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring2\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp1\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp2\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean1\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean2\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__INSIDE__FILE\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"INPUT__FILE__NAME\"\n },\n {\n \"fields\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"writeid\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"bucketid\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"rowid\"\n }\n ],\n \"name\": \"ROW__ID\"\n }\n ],\n \"colStats\": [\n {\n \"name\": \"ctinyint\",\n \"ndv\": 1,\n \"minValue\": -64,\n \"maxValue\": -64\n },\n {\n \"name\": \"csmallint\",\n \"ndv\": 21,\n \"minValue\": -16379,\n \"maxValue\": -200\n },\n {\n \"name\": \"cint\",\n \"ndv\": 13,\n \"minValue\": -894716315,\n \"maxValue\": 626923679\n },\n {\n \"name\": \"cbigint\",\n \"ndv\": 13,\n \"minValue\": -1887561756,\n \"maxValue\": 2118653994\n },\n {\n \"name\": \"cfloat\",\n \"ndv\": 1,\n \"minValue\": -64.0,\n \"maxValue\": -64.0\n },\n {\n \"name\": \"cdouble\",\n \"ndv\": 21,\n \"minValue\": -16379.0,\n \"maxValue\": -200.0\n },\n {\n \"name\": \"cstring1\",\n \"ndv\": 13\n },\n {\n \"name\": \"cstring2\",\n \"ndv\": 13\n },\n {\n \"name\": \"ctimestamp1\",\n \"ndv\": 0\n },\n {\n \"name\": \"ctimestamp2\",\n \"ndv\": 0\n },\n {\n \"name\": \"cboolean1\",\n \"ndv\": 2\n },\n {\n \"name\": \"cboolean2\",\n \"ndv\": 2\n }\n ]\n },\n {\n \"id\": \"3\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter\",\n \"condition\": {\n \"op\": \"IS NOT NULL\",\n \"operands\": [\n {\n \"input\": 0,\n \"name\": \"$0\"\n }\n ]\n },\n \"rowCount\": 20.0\n },\n {\n \"id\": \"4\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"ctinyint\"\n ],\n \"exprs\": [\n {\n \"input\": 0,\n \"name\": \"$0\"\n }\n ],\n \"rowCount\": 20.0\n },\n {\n \"id\": \"5\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin\",\n \"condition\": {\n \"op\": \"=\",\n \"operands\": [\n {\n \"input\": 1,\n \"name\": \"$1\"\n },\n {\n \"input\": 0,\n \"name\": \"$0\"\n }\n ]\n },\n \"joinType\": \"left\",\n \"algorithm\": \"none\",\n \"cost\": \"not available\",\n \"inputs\": [\n \"1\",\n \"4\"\n ],\n \"rowCount\": 600.0\n },\n {\n \"id\": \"6\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"ctinyint\"\n ],\n \"exprs\": [\n {\n \"input\": 0,\n \"name\": \"$0\"\n }\n ],\n \"rowCount\": 600.0\n }\n ]\n}","optimizedSQL":"SELECT `t`.`ctinyint`\nFROM (SELECT `ctinyint`\nFROM `default`.`small_alltypesorc_b`) AS `t`\nLEFT JOIN (SELECT `ctinyint`\nFROM `default`.`small_alltypesorc_b`\nWHERE `ctinyint` IS NOT NULL) AS `t1` ON `t`.`ctinyint` = `t1`.`ctinyint`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-4":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-4"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-4":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:hd":{"TableScan":{"alias:":"hd","columns:":["ctinyint"],"database:":"default","filterExpr:":"ctinyint is not null (type: boolean)","Statistics:":"Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_b","isTempTable:":"false","OperatorId:":"TS_2","children":{"Filter Operator":{"predicate:":"ctinyint is not null (type: boolean)","Statistics:":"Num rows: 20 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"FIL_10","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","columnExprMap:":{"_col0":"ctinyint"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_4","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"OperatorId:":"HASHTABLESINK_12"}}}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["ctinyint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_b","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","columnExprMap:":{"_col0":"ctinyint"},"outputColumnNames:":["_col0"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0]"},"Statistics:":"Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_14","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:tinyint"],"bigTableValueExpressions:":["col 0:tinyint"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 600 Data size: 2364 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_15","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 600 Data size: 2364 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_16"}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[0]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_17"}}}}}} +{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_b\"\n ],\n \"table:alias\": \"c\",\n \"inputs\": [],\n \"rowCount\": 30.0,\n \"avgRowSize\": 4.0,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n },\n {\n \"type\": \"SMALLINT\",\n \"nullable\": true,\n \"name\": \"csmallint\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"cint\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"cbigint\"\n },\n {\n \"type\": \"FLOAT\",\n \"nullable\": true,\n \"name\": \"cfloat\"\n },\n {\n \"type\": \"DOUBLE\",\n \"nullable\": true,\n \"name\": \"cdouble\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring1\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring2\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp1\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp2\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean1\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean2\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__INSIDE__FILE\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"INPUT__FILE__NAME\"\n },\n {\n \"fields\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"writeid\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"bucketid\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"rowid\"\n }\n ],\n \"name\": \"ROW__ID\"\n }\n ],\n \"colStats\": [\n {\n \"name\": \"ctinyint\",\n \"ndv\": 1,\n \"minValue\": -64,\n \"maxValue\": -64\n },\n {\n \"name\": \"csmallint\",\n \"ndv\": 21,\n \"minValue\": -16379,\n \"maxValue\": -200\n },\n {\n \"name\": \"cint\",\n \"ndv\": 13,\n \"minValue\": -894716315,\n \"maxValue\": 626923679\n },\n {\n \"name\": \"cbigint\",\n \"ndv\": 13,\n \"minValue\": -1887561756,\n \"maxValue\": 2118653994\n },\n {\n \"name\": \"cfloat\",\n \"ndv\": 1,\n \"minValue\": -64.0,\n \"maxValue\": -64.0\n },\n {\n \"name\": \"cdouble\",\n \"ndv\": 21,\n \"minValue\": -16379.0,\n \"maxValue\": -200.0\n },\n {\n \"name\": \"cstring1\",\n \"ndv\": 13\n },\n {\n \"name\": \"cstring2\",\n \"ndv\": 13\n },\n {\n \"name\": \"ctimestamp1\",\n \"ndv\": 0,\n \"minValue\": -28813,\n \"maxValue\": -28788\n },\n {\n \"name\": \"ctimestamp2\",\n \"ndv\": 0,\n \"minValue\": -28816,\n \"maxValue\": -28785\n },\n {\n \"name\": \"cboolean1\",\n \"ndv\": 2\n },\n {\n \"name\": \"cboolean2\",\n \"ndv\": 2\n }\n ]\n },\n {\n \"id\": \"1\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"ctinyint\"\n ],\n \"exprs\": [\n {\n \"input\": 0,\n \"name\": \"$0\"\n }\n ],\n \"rowCount\": 30.0\n },\n {\n \"id\": \"2\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_b\"\n ],\n \"table:alias\": \"hd\",\n \"inputs\": [],\n \"rowCount\": 30.0,\n \"avgRowSize\": 4.0,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n },\n {\n \"type\": \"SMALLINT\",\n \"nullable\": true,\n \"name\": \"csmallint\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"cint\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"cbigint\"\n },\n {\n \"type\": \"FLOAT\",\n \"nullable\": true,\n \"name\": \"cfloat\"\n },\n {\n \"type\": \"DOUBLE\",\n \"nullable\": true,\n \"name\": \"cdouble\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring1\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring2\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp1\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp2\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean1\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean2\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__INSIDE__FILE\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"INPUT__FILE__NAME\"\n },\n {\n \"fields\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"writeid\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"bucketid\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"rowid\"\n }\n ],\n \"name\": \"ROW__ID\"\n }\n ],\n \"colStats\": [\n {\n \"name\": \"ctinyint\",\n \"ndv\": 1,\n \"minValue\": -64,\n \"maxValue\": -64\n },\n {\n \"name\": \"csmallint\",\n \"ndv\": 21,\n \"minValue\": -16379,\n \"maxValue\": -200\n },\n {\n \"name\": \"cint\",\n \"ndv\": 13,\n \"minValue\": -894716315,\n \"maxValue\": 626923679\n },\n {\n \"name\": \"cbigint\",\n \"ndv\": 13,\n \"minValue\": -1887561756,\n \"maxValue\": 2118653994\n },\n {\n \"name\": \"cfloat\",\n \"ndv\": 1,\n \"minValue\": -64.0,\n \"maxValue\": -64.0\n },\n {\n \"name\": \"cdouble\",\n \"ndv\": 21,\n \"minValue\": -16379.0,\n \"maxValue\": -200.0\n },\n {\n \"name\": \"cstring1\",\n \"ndv\": 13\n },\n {\n \"name\": \"cstring2\",\n \"ndv\": 13\n },\n {\n \"name\": \"ctimestamp1\",\n \"ndv\": 0,\n \"minValue\": -28813,\n \"maxValue\": -28788\n },\n {\n \"name\": \"ctimestamp2\",\n \"ndv\": 0,\n \"minValue\": -28816,\n \"maxValue\": -28785\n },\n {\n \"name\": \"cboolean1\",\n \"ndv\": 2\n },\n {\n \"name\": \"cboolean2\",\n \"ndv\": 2\n }\n ]\n },\n {\n \"id\": \"3\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter\",\n \"condition\": {\n \"op\": \"IS NOT NULL\",\n \"operands\": [\n {\n \"input\": 0,\n \"name\": \"$0\"\n }\n ]\n },\n \"rowCount\": 20.0\n },\n {\n \"id\": \"4\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"ctinyint\"\n ],\n \"exprs\": [\n {\n \"input\": 0,\n \"name\": \"$0\"\n }\n ],\n \"rowCount\": 20.0\n },\n {\n \"id\": \"5\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin\",\n \"condition\": {\n \"op\": \"=\",\n \"operands\": [\n {\n \"input\": 1,\n \"name\": \"$1\"\n },\n {\n \"input\": 0,\n \"name\": \"$0\"\n }\n ]\n },\n \"joinType\": \"left\",\n \"algorithm\": \"none\",\n \"cost\": \"not available\",\n \"inputs\": [\n \"1\",\n \"4\"\n ],\n \"rowCount\": 600.0\n },\n {\n \"id\": \"6\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"ctinyint\"\n ],\n \"exprs\": [\n {\n \"input\": 0,\n \"name\": \"$0\"\n }\n ],\n \"rowCount\": 600.0\n }\n ]\n}","optimizedSQL":"SELECT `t`.`ctinyint`\nFROM (SELECT `ctinyint`\nFROM `default`.`small_alltypesorc_b`) AS `t`\nLEFT JOIN (SELECT `ctinyint`\nFROM `default`.`small_alltypesorc_b`\nWHERE `ctinyint` IS NOT NULL) AS `t1` ON `t`.`ctinyint` = `t1`.`ctinyint`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-4":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-4"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-4":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:hd":{"TableScan":{"alias:":"hd","columns:":["ctinyint"],"database:":"default","filterExpr:":"ctinyint is not null (type: boolean)","Statistics:":"Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_b","isTempTable:":"false","OperatorId:":"TS_2","children":{"Filter Operator":{"predicate:":"ctinyint is not null (type: boolean)","Statistics:":"Num rows: 20 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"FIL_10","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","columnExprMap:":{"_col0":"ctinyint"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_4","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"OperatorId:":"HASHTABLESINK_12"}}}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["ctinyint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_b","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","columnExprMap:":{"_col0":"ctinyint"},"outputColumnNames:":["_col0"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0]"},"Statistics:":"Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_14","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:tinyint"],"bigTableValueExpressions:":["col 0:tinyint"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 600 Data size: 2364 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_15","children":{"File Output Operator":{"compressed:":"false","File Sink Vectorization:":{"className:":"VectorFileSinkOperator","native:":"false"},"Statistics:":"Num rows: 600 Data size: 2364 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_16"}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[0]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_17"}}}}}} PREHOOK: query: select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b hd @@ -794,7 +794,7 @@ left outer join small_alltypesorc_b hd POSTHOOK: type: QUERY POSTHOOK: Input: default@small_alltypesorc_b #### A masked pattern was here #### -{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_b\"\n ],\n \"table:alias\": \"c\",\n \"inputs\": [],\n \"rowCount\": 30.0,\n \"avgRowSize\": 8.0,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n },\n {\n \"type\": \"SMALLINT\",\n \"nullable\": true,\n \"name\": \"csmallint\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"cint\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"cbigint\"\n },\n {\n \"type\": \"FLOAT\",\n \"nullable\": true,\n \"name\": \"cfloat\"\n },\n {\n \"type\": \"DOUBLE\",\n \"nullable\": true,\n \"name\": \"cdouble\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring1\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring2\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp1\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp2\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean1\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean2\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__INSIDE__FILE\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"INPUT__FILE__NAME\"\n },\n {\n \"fields\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"writeid\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"bucketid\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"rowid\"\n }\n ],\n \"name\": \"ROW__ID\"\n }\n ],\n \"colStats\": [\n {\n \"name\": \"ctinyint\",\n \"ndv\": 1,\n \"minValue\": -64,\n \"maxValue\": -64\n },\n {\n \"name\": \"cint\",\n \"ndv\": 13,\n \"minValue\": -894716315,\n \"maxValue\": 626923679\n },\n {\n \"name\": \"csmallint\",\n \"ndv\": 21,\n \"minValue\": -16379,\n \"maxValue\": -200\n },\n {\n \"name\": \"cbigint\",\n \"ndv\": 13,\n \"minValue\": -1887561756,\n \"maxValue\": 2118653994\n },\n {\n \"name\": \"cfloat\",\n \"ndv\": 1,\n \"minValue\": -64.0,\n \"maxValue\": -64.0\n },\n {\n \"name\": \"cdouble\",\n \"ndv\": 21,\n \"minValue\": -16379.0,\n \"maxValue\": -200.0\n },\n {\n \"name\": \"cstring1\",\n \"ndv\": 13\n },\n {\n \"name\": \"cstring2\",\n \"ndv\": 13\n },\n {\n \"name\": \"ctimestamp1\",\n \"ndv\": 0\n },\n {\n \"name\": \"ctimestamp2\",\n \"ndv\": 0\n },\n {\n \"name\": \"cboolean1\",\n \"ndv\": 2\n },\n {\n \"name\": \"cboolean2\",\n \"ndv\": 2\n }\n ]\n },\n {\n \"id\": \"1\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"ctinyint\",\n \"cint\"\n ],\n \"exprs\": [\n {\n \"input\": 0,\n \"name\": \"$0\"\n },\n {\n \"input\": 2,\n \"name\": \"$2\"\n }\n ],\n \"rowCount\": 30.0\n },\n {\n \"id\": \"2\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_b\"\n ],\n \"table:alias\": \"cd\",\n \"inputs\": [],\n \"rowCount\": 30.0,\n \"avgRowSize\": 4.0,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n },\n {\n \"type\": \"SMALLINT\",\n \"nullable\": true,\n \"name\": \"csmallint\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"cint\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"cbigint\"\n },\n {\n \"type\": \"FLOAT\",\n \"nullable\": true,\n \"name\": \"cfloat\"\n },\n {\n \"type\": \"DOUBLE\",\n \"nullable\": true,\n \"name\": \"cdouble\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring1\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring2\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp1\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp2\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean1\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean2\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__INSIDE__FILE\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"INPUT__FILE__NAME\"\n },\n {\n \"fields\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"writeid\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"bucketid\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"rowid\"\n }\n ],\n \"name\": \"ROW__ID\"\n }\n ],\n \"colStats\": [\n {\n \"name\": \"cint\",\n \"ndv\": 13,\n \"minValue\": -894716315,\n \"maxValue\": 626923679\n },\n {\n \"name\": \"ctinyint\",\n \"ndv\": 1,\n \"minValue\": -64,\n \"maxValue\": -64\n },\n {\n \"name\": \"csmallint\",\n \"ndv\": 21,\n \"minValue\": -16379,\n \"maxValue\": -200\n },\n {\n \"name\": \"cbigint\",\n \"ndv\": 13,\n \"minValue\": -1887561756,\n \"maxValue\": 2118653994\n },\n {\n \"name\": \"cfloat\",\n \"ndv\": 1,\n \"minValue\": -64.0,\n \"maxValue\": -64.0\n },\n {\n \"name\": \"cdouble\",\n \"ndv\": 21,\n \"minValue\": -16379.0,\n \"maxValue\": -200.0\n },\n {\n \"name\": \"cstring1\",\n \"ndv\": 13\n },\n {\n \"name\": \"cstring2\",\n \"ndv\": 13\n },\n {\n \"name\": \"ctimestamp1\",\n \"ndv\": 0\n },\n {\n \"name\": \"ctimestamp2\",\n \"ndv\": 0\n },\n {\n \"name\": \"cboolean1\",\n \"ndv\": 2\n },\n {\n \"name\": \"cboolean2\",\n \"ndv\": 2\n }\n ]\n },\n {\n \"id\": \"3\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter\",\n \"condition\": {\n \"op\": \"IS NOT NULL\",\n \"operands\": [\n {\n \"input\": 2,\n \"name\": \"$2\"\n }\n ]\n },\n \"rowCount\": 20.0\n },\n {\n \"id\": \"4\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"cint\"\n ],\n \"exprs\": [\n {\n \"input\": 2,\n \"name\": \"$2\"\n }\n ],\n \"rowCount\": 20.0\n },\n {\n \"id\": \"5\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin\",\n \"condition\": {\n \"op\": \"=\",\n \"operands\": [\n {\n \"input\": 2,\n \"name\": \"$2\"\n },\n {\n \"input\": 1,\n \"name\": \"$1\"\n }\n ]\n },\n \"joinType\": \"left\",\n \"algorithm\": \"none\",\n \"cost\": \"not available\",\n \"inputs\": [\n \"1\",\n \"4\"\n ],\n \"rowCount\": 46.15384615384615\n },\n {\n \"id\": \"6\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_b\"\n ],\n \"table:alias\": \"hd\",\n \"inputs\": [],\n \"rowCount\": 30.0,\n \"avgRowSize\": 4.0,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n },\n {\n \"type\": \"SMALLINT\",\n \"nullable\": true,\n \"name\": \"csmallint\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"cint\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"cbigint\"\n },\n {\n \"type\": \"FLOAT\",\n \"nullable\": true,\n \"name\": \"cfloat\"\n },\n {\n \"type\": \"DOUBLE\",\n \"nullable\": true,\n \"name\": \"cdouble\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring1\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring2\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp1\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp2\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean1\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean2\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__INSIDE__FILE\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"INPUT__FILE__NAME\"\n },\n {\n \"fields\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"writeid\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"bucketid\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"rowid\"\n }\n ],\n \"name\": \"ROW__ID\"\n }\n ],\n \"colStats\": [\n {\n \"name\": \"ctinyint\",\n \"ndv\": 1,\n \"minValue\": -64,\n \"maxValue\": -64\n },\n {\n \"name\": \"csmallint\",\n \"ndv\": 21,\n \"minValue\": -16379,\n \"maxValue\": -200\n },\n {\n \"name\": \"cint\",\n \"ndv\": 13,\n \"minValue\": -894716315,\n \"maxValue\": 626923679\n },\n {\n \"name\": \"cbigint\",\n \"ndv\": 13,\n \"minValue\": -1887561756,\n \"maxValue\": 2118653994\n },\n {\n \"name\": \"cfloat\",\n \"ndv\": 1,\n \"minValue\": -64.0,\n \"maxValue\": -64.0\n },\n {\n \"name\": \"cdouble\",\n \"ndv\": 21,\n \"minValue\": -16379.0,\n \"maxValue\": -200.0\n },\n {\n \"name\": \"cstring1\",\n \"ndv\": 13\n },\n {\n \"name\": \"cstring2\",\n \"ndv\": 13\n },\n {\n \"name\": \"ctimestamp1\",\n \"ndv\": 0\n },\n {\n \"name\": \"ctimestamp2\",\n \"ndv\": 0\n },\n {\n \"name\": \"cboolean1\",\n \"ndv\": 2\n },\n {\n \"name\": \"cboolean2\",\n \"ndv\": 2\n }\n ]\n },\n {\n \"id\": \"7\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter\",\n \"condition\": {\n \"op\": \"IS NOT NULL\",\n \"operands\": [\n {\n \"input\": 0,\n \"name\": \"$0\"\n }\n ]\n },\n \"rowCount\": 20.0\n },\n {\n \"id\": \"8\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"ctinyint\"\n ],\n \"exprs\": [\n {\n \"input\": 0,\n \"name\": \"$0\"\n }\n ],\n \"rowCount\": 20.0\n },\n {\n \"id\": \"9\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin\",\n \"condition\": {\n \"op\": \"=\",\n \"operands\": [\n {\n \"input\": 3,\n \"name\": \"$3\"\n },\n {\n \"input\": 0,\n \"name\": \"$0\"\n }\n ]\n },\n \"joinType\": \"left\",\n \"algorithm\": \"none\",\n \"cost\": \"not available\",\n \"inputs\": [\n \"5\",\n \"8\"\n ],\n \"rowCount\": 923.0769230769231\n },\n {\n \"id\": \"10\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate\",\n \"group\": [],\n \"aggs\": [\n {\n \"agg\": \"count\",\n \"type\": {\n \"type\": \"BIGINT\",\n \"nullable\": true\n },\n \"distinct\": false,\n \"operands\": []\n }\n ],\n \"rowCount\": 1.0\n }\n ]\n}","optimizedSQL":"SELECT COUNT(*) AS `$f0`\nFROM (SELECT `ctinyint`, `cint`\nFROM `default`.`small_alltypesorc_b`) AS `t`\nLEFT JOIN (SELECT `cint`\nFROM `default`.`small_alltypesorc_b`\nWHERE `cint` IS NOT NULL) AS `t1` ON `t`.`cint` = `t1`.`cint`\nLEFT JOIN (SELECT `ctinyint`\nFROM `default`.`small_alltypesorc_b`\nWHERE `ctinyint` IS NOT NULL) AS `t3` ON `t`.`ctinyint` = `t3`.`ctinyint`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cint"],"database:":"default","filterExpr:":"cint is not null (type: boolean)","Statistics:":"Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_b","isTempTable:":"false","OperatorId:":"TS_2","children":{"Filter Operator":{"predicate:":"cint is not null (type: boolean)","Statistics:":"Num rows: 20 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"FIL_20","children":{"Select Operator":{"expressions:":"cint (type: int)","columnExprMap:":{"_col0":"cint"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_4","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_30"}}}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["ctinyint"],"database:":"default","filterExpr:":"ctinyint is not null (type: boolean)","Statistics:":"Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_b","isTempTable:":"false","OperatorId:":"TS_5","children":{"Filter Operator":{"predicate:":"ctinyint is not null (type: boolean)","Statistics:":"Num rows: 20 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"FIL_21","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","columnExprMap:":{"_col0":"ctinyint"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_7","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"OperatorId:":"HASHTABLESINK_28"}}}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["ctinyint","cint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_b","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), cint (type: int)","columnExprMap:":{"_col0":"ctinyint","_col1":"cint"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 2]"},"Statistics:":"Num rows: 30 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_32","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 2:int"],"bigTableValueExpressions:":["col 0:tinyint"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 58 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_33","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:tinyint"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1160 Data size: 9280 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_34","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"minReductionHashAggr:":"0.99","mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"GBY_35","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_36"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[0, 2]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"GBY_17","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_19"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_37"}}}}}} +{"CBOPlan":"{\n \"rels\": [\n {\n \"id\": \"0\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_b\"\n ],\n \"table:alias\": \"c\",\n \"inputs\": [],\n \"rowCount\": 30.0,\n \"avgRowSize\": 8.0,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n },\n {\n \"type\": \"SMALLINT\",\n \"nullable\": true,\n \"name\": \"csmallint\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"cint\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"cbigint\"\n },\n {\n \"type\": \"FLOAT\",\n \"nullable\": true,\n \"name\": \"cfloat\"\n },\n {\n \"type\": \"DOUBLE\",\n \"nullable\": true,\n \"name\": \"cdouble\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring1\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring2\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp1\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp2\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean1\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean2\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__INSIDE__FILE\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"INPUT__FILE__NAME\"\n },\n {\n \"fields\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"writeid\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"bucketid\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"rowid\"\n }\n ],\n \"name\": \"ROW__ID\"\n }\n ],\n \"colStats\": [\n {\n \"name\": \"ctinyint\",\n \"ndv\": 1,\n \"minValue\": -64,\n \"maxValue\": -64\n },\n {\n \"name\": \"cint\",\n \"ndv\": 13,\n \"minValue\": -894716315,\n \"maxValue\": 626923679\n },\n {\n \"name\": \"csmallint\",\n \"ndv\": 21,\n \"minValue\": -16379,\n \"maxValue\": -200\n },\n {\n \"name\": \"cbigint\",\n \"ndv\": 13,\n \"minValue\": -1887561756,\n \"maxValue\": 2118653994\n },\n {\n \"name\": \"cfloat\",\n \"ndv\": 1,\n \"minValue\": -64.0,\n \"maxValue\": -64.0\n },\n {\n \"name\": \"cdouble\",\n \"ndv\": 21,\n \"minValue\": -16379.0,\n \"maxValue\": -200.0\n },\n {\n \"name\": \"cstring1\",\n \"ndv\": 13\n },\n {\n \"name\": \"cstring2\",\n \"ndv\": 13\n },\n {\n \"name\": \"ctimestamp1\",\n \"ndv\": 0,\n \"minValue\": -28813,\n \"maxValue\": -28788\n },\n {\n \"name\": \"ctimestamp2\",\n \"ndv\": 0,\n \"minValue\": -28816,\n \"maxValue\": -28785\n },\n {\n \"name\": \"cboolean1\",\n \"ndv\": 2\n },\n {\n \"name\": \"cboolean2\",\n \"ndv\": 2\n }\n ]\n },\n {\n \"id\": \"1\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"ctinyint\",\n \"cint\"\n ],\n \"exprs\": [\n {\n \"input\": 0,\n \"name\": \"$0\"\n },\n {\n \"input\": 2,\n \"name\": \"$2\"\n }\n ],\n \"rowCount\": 30.0\n },\n {\n \"id\": \"2\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_b\"\n ],\n \"table:alias\": \"cd\",\n \"inputs\": [],\n \"rowCount\": 30.0,\n \"avgRowSize\": 4.0,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n },\n {\n \"type\": \"SMALLINT\",\n \"nullable\": true,\n \"name\": \"csmallint\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"cint\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"cbigint\"\n },\n {\n \"type\": \"FLOAT\",\n \"nullable\": true,\n \"name\": \"cfloat\"\n },\n {\n \"type\": \"DOUBLE\",\n \"nullable\": true,\n \"name\": \"cdouble\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring1\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring2\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp1\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp2\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean1\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean2\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__INSIDE__FILE\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"INPUT__FILE__NAME\"\n },\n {\n \"fields\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"writeid\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"bucketid\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"rowid\"\n }\n ],\n \"name\": \"ROW__ID\"\n }\n ],\n \"colStats\": [\n {\n \"name\": \"cint\",\n \"ndv\": 13,\n \"minValue\": -894716315,\n \"maxValue\": 626923679\n },\n {\n \"name\": \"ctinyint\",\n \"ndv\": 1,\n \"minValue\": -64,\n \"maxValue\": -64\n },\n {\n \"name\": \"csmallint\",\n \"ndv\": 21,\n \"minValue\": -16379,\n \"maxValue\": -200\n },\n {\n \"name\": \"cbigint\",\n \"ndv\": 13,\n \"minValue\": -1887561756,\n \"maxValue\": 2118653994\n },\n {\n \"name\": \"cfloat\",\n \"ndv\": 1,\n \"minValue\": -64.0,\n \"maxValue\": -64.0\n },\n {\n \"name\": \"cdouble\",\n \"ndv\": 21,\n \"minValue\": -16379.0,\n \"maxValue\": -200.0\n },\n {\n \"name\": \"cstring1\",\n \"ndv\": 13\n },\n {\n \"name\": \"cstring2\",\n \"ndv\": 13\n },\n {\n \"name\": \"ctimestamp1\",\n \"ndv\": 0,\n \"minValue\": -28813,\n \"maxValue\": -28788\n },\n {\n \"name\": \"ctimestamp2\",\n \"ndv\": 0,\n \"minValue\": -28816,\n \"maxValue\": -28785\n },\n {\n \"name\": \"cboolean1\",\n \"ndv\": 2\n },\n {\n \"name\": \"cboolean2\",\n \"ndv\": 2\n }\n ]\n },\n {\n \"id\": \"3\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter\",\n \"condition\": {\n \"op\": \"IS NOT NULL\",\n \"operands\": [\n {\n \"input\": 2,\n \"name\": \"$2\"\n }\n ]\n },\n \"rowCount\": 20.0\n },\n {\n \"id\": \"4\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"cint\"\n ],\n \"exprs\": [\n {\n \"input\": 2,\n \"name\": \"$2\"\n }\n ],\n \"rowCount\": 20.0\n },\n {\n \"id\": \"5\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin\",\n \"condition\": {\n \"op\": \"=\",\n \"operands\": [\n {\n \"input\": 2,\n \"name\": \"$2\"\n },\n {\n \"input\": 1,\n \"name\": \"$1\"\n }\n ]\n },\n \"joinType\": \"left\",\n \"algorithm\": \"none\",\n \"cost\": \"not available\",\n \"inputs\": [\n \"1\",\n \"4\"\n ],\n \"rowCount\": 46.15384615384615\n },\n {\n \"id\": \"6\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveTableScan\",\n \"table\": [\n \"default\",\n \"small_alltypesorc_b\"\n ],\n \"table:alias\": \"hd\",\n \"inputs\": [],\n \"rowCount\": 30.0,\n \"avgRowSize\": 4.0,\n \"rowType\": [\n {\n \"type\": \"TINYINT\",\n \"nullable\": true,\n \"name\": \"ctinyint\"\n },\n {\n \"type\": \"SMALLINT\",\n \"nullable\": true,\n \"name\": \"csmallint\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"cint\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"cbigint\"\n },\n {\n \"type\": \"FLOAT\",\n \"nullable\": true,\n \"name\": \"cfloat\"\n },\n {\n \"type\": \"DOUBLE\",\n \"nullable\": true,\n \"name\": \"cdouble\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring1\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"cstring2\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp1\"\n },\n {\n \"type\": \"TIMESTAMP\",\n \"nullable\": true,\n \"precision\": 9,\n \"name\": \"ctimestamp2\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean1\"\n },\n {\n \"type\": \"BOOLEAN\",\n \"nullable\": true,\n \"name\": \"cboolean2\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"BLOCK__OFFSET__INSIDE__FILE\"\n },\n {\n \"type\": \"VARCHAR\",\n \"nullable\": true,\n \"precision\": 2147483647,\n \"name\": \"INPUT__FILE__NAME\"\n },\n {\n \"fields\": [\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"writeid\"\n },\n {\n \"type\": \"INTEGER\",\n \"nullable\": true,\n \"name\": \"bucketid\"\n },\n {\n \"type\": \"BIGINT\",\n \"nullable\": true,\n \"name\": \"rowid\"\n }\n ],\n \"name\": \"ROW__ID\"\n }\n ],\n \"colStats\": [\n {\n \"name\": \"ctinyint\",\n \"ndv\": 1,\n \"minValue\": -64,\n \"maxValue\": -64\n },\n {\n \"name\": \"csmallint\",\n \"ndv\": 21,\n \"minValue\": -16379,\n \"maxValue\": -200\n },\n {\n \"name\": \"cint\",\n \"ndv\": 13,\n \"minValue\": -894716315,\n \"maxValue\": 626923679\n },\n {\n \"name\": \"cbigint\",\n \"ndv\": 13,\n \"minValue\": -1887561756,\n \"maxValue\": 2118653994\n },\n {\n \"name\": \"cfloat\",\n \"ndv\": 1,\n \"minValue\": -64.0,\n \"maxValue\": -64.0\n },\n {\n \"name\": \"cdouble\",\n \"ndv\": 21,\n \"minValue\": -16379.0,\n \"maxValue\": -200.0\n },\n {\n \"name\": \"cstring1\",\n \"ndv\": 13\n },\n {\n \"name\": \"cstring2\",\n \"ndv\": 13\n },\n {\n \"name\": \"ctimestamp1\",\n \"ndv\": 0,\n \"minValue\": -28813,\n \"maxValue\": -28788\n },\n {\n \"name\": \"ctimestamp2\",\n \"ndv\": 0,\n \"minValue\": -28816,\n \"maxValue\": -28785\n },\n {\n \"name\": \"cboolean1\",\n \"ndv\": 2\n },\n {\n \"name\": \"cboolean2\",\n \"ndv\": 2\n }\n ]\n },\n {\n \"id\": \"7\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveFilter\",\n \"condition\": {\n \"op\": \"IS NOT NULL\",\n \"operands\": [\n {\n \"input\": 0,\n \"name\": \"$0\"\n }\n ]\n },\n \"rowCount\": 20.0\n },\n {\n \"id\": \"8\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveProject\",\n \"fields\": [\n \"ctinyint\"\n ],\n \"exprs\": [\n {\n \"input\": 0,\n \"name\": \"$0\"\n }\n ],\n \"rowCount\": 20.0\n },\n {\n \"id\": \"9\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveJoin\",\n \"condition\": {\n \"op\": \"=\",\n \"operands\": [\n {\n \"input\": 3,\n \"name\": \"$3\"\n },\n {\n \"input\": 0,\n \"name\": \"$0\"\n }\n ]\n },\n \"joinType\": \"left\",\n \"algorithm\": \"none\",\n \"cost\": \"not available\",\n \"inputs\": [\n \"5\",\n \"8\"\n ],\n \"rowCount\": 923.0769230769231\n },\n {\n \"id\": \"10\",\n \"relOp\": \"org.apache.hadoop.hive.ql.optimizer.calcite.reloperators.HiveAggregate\",\n \"group\": [],\n \"aggs\": [\n {\n \"agg\": \"count\",\n \"type\": {\n \"type\": \"BIGINT\",\n \"nullable\": true\n },\n \"distinct\": false,\n \"operands\": []\n }\n ],\n \"rowCount\": 1.0\n }\n ]\n}","optimizedSQL":"SELECT COUNT(*) AS `$f0`\nFROM (SELECT `ctinyint`, `cint`\nFROM `default`.`small_alltypesorc_b`) AS `t`\nLEFT JOIN (SELECT `cint`\nFROM `default`.`small_alltypesorc_b`\nWHERE `cint` IS NOT NULL) AS `t1` ON `t`.`cint` = `t1`.`cint`\nLEFT JOIN (SELECT `ctinyint`\nFROM `default`.`small_alltypesorc_b`\nWHERE `ctinyint` IS NOT NULL) AS `t3` ON `t`.`ctinyint` = `t3`.`ctinyint`","PLAN VECTORIZATION":{"enabled":true,"enabledConditionsMet":["hive.vectorized.execution.enabled IS true"]},"cboInfo":"Plan optimized by CBO.","STAGE DEPENDENCIES":{"Stage-8":{"ROOT STAGE":"TRUE"},"Stage-3":{"DEPENDENT STAGES":"Stage-8"},"Stage-0":{"DEPENDENT STAGES":"Stage-3"}},"STAGE PLANS":{"Stage-8":{"Map Reduce Local Work":{"Alias -> Map Local Tables:":{"$hdt$_1:cd":{"Fetch Operator":{"limit:":"-1"}},"$hdt$_2:hd":{"Fetch Operator":{"limit:":"-1"}}},"Alias -> Map Local Operator Tree:":{"$hdt$_1:cd":{"TableScan":{"alias:":"cd","columns:":["cint"],"database:":"default","filterExpr:":"cint is not null (type: boolean)","Statistics:":"Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_b","isTempTable:":"false","OperatorId:":"TS_2","children":{"Filter Operator":{"predicate:":"cint is not null (type: boolean)","Statistics:":"Num rows: 20 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"FIL_20","children":{"Select Operator":{"expressions:":"cint (type: int)","columnExprMap:":{"_col0":"cint"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_4","children":{"HashTable Sink Operator":{"keys:":{"0":"_col1 (type: int)","1":"_col0 (type: int)"},"OperatorId:":"HASHTABLESINK_30"}}}}}}}},"$hdt$_2:hd":{"TableScan":{"alias:":"hd","columns:":["ctinyint"],"database:":"default","filterExpr:":"ctinyint is not null (type: boolean)","Statistics:":"Num rows: 30 Data size: 84 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_b","isTempTable:":"false","OperatorId:":"TS_5","children":{"Filter Operator":{"predicate:":"ctinyint is not null (type: boolean)","Statistics:":"Num rows: 20 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"FIL_21","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint)","columnExprMap:":{"_col0":"ctinyint"},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 20 Data size: 56 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_7","children":{"HashTable Sink Operator":{"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"OperatorId:":"HASHTABLESINK_28"}}}}}}}}}}},"Stage-3":{"Map Reduce":{"Map Operator Tree:":[{"TableScan":{"alias:":"c","columns:":["ctinyint","cint"],"database:":"default","Statistics:":"Num rows: 30 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE","table:":"small_alltypesorc_b","TableScan Vectorization:":{"native:":"true","vectorizationSchemaColumns:":"[0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct]"},"isTempTable:":"false","OperatorId:":"TS_0","children":{"Select Operator":{"expressions:":"ctinyint (type: tinyint), cint (type: int)","columnExprMap:":{"_col0":"ctinyint","_col1":"cint"},"outputColumnNames:":["_col0","_col1"],"Select Vectorization:":{"className:":"VectorSelectOperator","native:":"true","projectedOutputColumnNums:":"[0, 2]"},"Statistics:":"Num rows: 30 Data size: 168 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"SEL_32","children":{"Map Join Operator":{"columnExprMap:":{"_col0":"0:_col0"},"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col1 (type: int)","1":"_col0 (type: int)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 2:int"],"bigTableValueExpressions:":["col 0:tinyint"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"outputColumnNames:":["_col0"],"Statistics:":"Num rows: 58 Data size: 196 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_33","children":{"Map Join Operator":{"condition map:":[{"":"Left Outer Join 0 to 1"}],"keys:":{"0":"_col0 (type: tinyint)","1":"_col0 (type: tinyint)"},"Map Join Vectorization:":{"bigTableKeyExpressions:":["col 0:tinyint"],"className:":"VectorMapJoinOperator","native:":"false","nativeConditionsMet:":["hive.mapjoin.optimized.hashtable IS true","hive.vectorized.execution.mapjoin.native.enabled IS true","One MapJoin Condition IS true","No nullsafe IS true","Small table vectorizes IS true","Outer Join has keys IS true","Optimized Table and Supports Key Types IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1160 Data size: 9280 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"MAPJOIN_34","children":{"Group By Operator":{"aggregations:":["count()"],"Group By Vectorization:":{"aggregators:":["VectorUDAFCountStar(*) -> bigint"],"className:":"VectorGroupByOperator","groupByMode:":"HASH","native:":"false","vectorProcessingMode:":"HASH","projectedOutputColumnNums:":"[0]"},"minReductionHashAggr:":"0.99","mode:":"hash","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"GBY_35","children":{"Reduce Output Operator":{"columnExprMap:":{"VALUE._col0":"_col0"},"sort order:":"","Reduce Sink Vectorization:":{"className:":"VectorReduceSinkOperator","native:":"false","nativeConditionsMet:":["hive.vectorized.execution.reducesink.new.enabled IS true","No PTF TopN IS true","No DISTINCT columns IS true","BinarySortableSerDe for keys IS true","LazyBinarySerDe for values IS true"],"nativeConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","value expressions:":"_col0 (type: bigint)","OperatorId:":"RS_36"}}}}}}}}}}}}],"Execution mode:":"vectorized","Map Vectorization:":{"enabled:":"true","enabledConditionsMet:":["hive.vectorized.use.vectorized.input.format IS true"],"inputFormatFeatureSupport:":"[DECIMAL_64]","featureSupportInUse:":"[DECIMAL_64]","inputFileFormats:":["org.apache.hadoop.hive.ql.io.orc.OrcInputFormat"],"allNative:":"false","usesVectorUDFAdaptor:":"false","vectorized:":"true","rowBatchContext:":{"dataColumnCount:":"12","includeColumns:":"[0, 2]","dataColumns:":["ctinyint:tinyint","csmallint:smallint","cint:int","cbigint:bigint","cfloat:float","cdouble:double","cstring1:string","cstring2:string","ctimestamp1:timestamp","ctimestamp2:timestamp","cboolean1:boolean","cboolean2:boolean"],"partitionColumnCount:":"0","scratchColumnTypeNames:":"[]"}},"Local Work:":{"Map Reduce Local Work":{}},"Reduce Vectorization:":{"enabled:":"false","enableConditionsMet:":["hive.vectorized.execution.reduce.enabled IS true"],"enableConditionsNotMet:":["hive.execution.engine mr IN [tez, spark] IS false"]},"Reduce Operator Tree:":{"Group By Operator":{"aggregations:":["count(VALUE._col0)"],"mode:":"mergepartial","outputColumnNames:":["_col0"],"Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","OperatorId:":"GBY_17","children":{"File Output Operator":{"compressed:":"false","Statistics:":"Num rows: 1 Data size: 8 Basic stats: COMPLETE Column stats: COMPLETE","table:":{"input format:":"org.apache.hadoop.mapred.SequenceFileInputFormat","output format:":"org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat","serde:":"org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe"},"OperatorId:":"FS_19"}}}}}},"Stage-0":{"Fetch Operator":{"limit:":"-1","Processor Tree:":{"ListSink":{"OperatorId:":"LIST_SINK_37"}}}}}} PREHOOK: query: select count(*) from (select c.ctinyint from small_alltypesorc_b c left outer join small_alltypesorc_b cd diff --git a/ql/src/test/results/clientpositive/vector_udf_trunc.q.out b/ql/src/test/results/clientpositive/vector_udf_trunc.q.out index a6ce91f50f..222093705d 100644 --- a/ql/src/test/results/clientpositive/vector_udf_trunc.q.out +++ b/ql/src/test/results/clientpositive/vector_udf_trunc.q.out @@ -97,7 +97,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 491520 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 366960 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -219,7 +219,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 491520 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 366960 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -341,7 +341,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc - Statistics: Num rows: 12288 Data size: 491520 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 366960 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] diff --git a/ql/src/test/results/clientpositive/vectorization_10.q.out b/ql/src/test/results/clientpositive/vectorization_10.q.out index d13536b560..601e401fe1 100644 --- a/ql/src/test/results/clientpositive/vectorization_10.q.out +++ b/ql/src/test/results/clientpositive/vectorization_10.q.out @@ -65,7 +65,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: ((cstring2 <= '10') or ((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or ((cdouble > 6981.0D) and ((CAST( csmallint AS decimal(11,4)) = 9763215.5639) or (cstring1 like '%a')))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2491562 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2367002 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -75,7 +75,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterStringGroupColLessEqualStringScalar(col 7:string, val 10), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterDecimalColLessEqualDecimalScalar(col 14:decimal(6,2), val -5638.15)(children: CastLongToDecimal(col 0:tinyint) -> 14:decimal(6,2))), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 6981.0), FilterExprOrExpr(children: FilterDecimalColEqualDecimalScalar(col 15:decimal(11,4), val 9763215.5639)(children: CastLongToDecimal(col 1:smallint) -> 15:decimal(11,4)), FilterStringColLikeStringScalar(col 6:string, pattern %a)))) predicate: ((cstring2 <= '10') or ((UDFToDouble(ctinyint) > cdouble) and (CAST( ctinyint AS decimal(6,2)) <= -5638.15)) or ((cdouble > 6981.0D) and ((CAST( csmallint AS decimal(11,4)) = 9763215.5639) or (cstring1 like '%a')))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2491562 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2367002 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cdouble (type: double), ctimestamp1 (type: timestamp), ctinyint (type: tinyint), cboolean1 (type: boolean), cstring1 (type: string), (- cdouble) (type: double), (cdouble + UDFToDouble(csmallint)) (type: double), ((cdouble + UDFToDouble(csmallint)) % 33.0D) (type: double), (- cdouble) (type: double), (UDFToDouble(ctinyint) % cdouble) (type: double), (UDFToShort(ctinyint) % csmallint) (type: smallint), (- cdouble) (type: double), (cbigint * UDFToLong((UDFToShort(ctinyint) % csmallint))) (type: bigint), (9763215.5639D - (cdouble + UDFToDouble(csmallint))) (type: double), (- (- cdouble)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -84,13 +84,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [5, 8, 0, 10, 6, 16, 18, 21, 22, 24, 25, 26, 28, 31, 33] selectExpressions: DoubleColUnaryMinus(col 5:double) -> 16:double, DoubleColAddDoubleColumn(col 5:double, col 17:double)(children: CastLongToDouble(col 1:smallint) -> 17:double) -> 18:double, DoubleColModuloDoubleScalar(col 20:double, val 33.0)(children: DoubleColAddDoubleColumn(col 5:double, col 19:double)(children: CastLongToDouble(col 1:smallint) -> 19:double) -> 20:double) -> 21:double, DoubleColUnaryMinus(col 5:double) -> 22:double, DoubleColModuloDoubleColumn(col 23:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 23:double) -> 24:double, LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 25:smallint, DoubleColUnaryMinus(col 5:double) -> 26:double, LongColMultiplyLongColumn(col 3:bigint, col 27:bigint)(children: LongColModuloLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint) -> 27:smallint) -> 28:bigint, DoubleScalarSubtractDoubleColumn(val 9763215.5639, col 30:double)(children: DoubleColAddDoubleColumn(col 5:double, col 29:double)(children: CastLongToDouble(col 1:smallint) -> 29:double) -> 30:double) -> 31:double, DoubleColUnaryMinus(col 32:double)(children: DoubleColUnaryMinus(col 5:double) -> 32:double) -> 33:double - Statistics: Num rows: 12288 Data size: 2434654 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2310094 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 12288 Data size: 2434654 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2310094 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/vectorization_11.q.out b/ql/src/test/results/clientpositive/vectorization_11.q.out index 1040d3aefa..14978547ff 100644 --- a/ql/src/test/results/clientpositive/vectorization_11.q.out +++ b/ql/src/test/results/clientpositive/vectorization_11.q.out @@ -47,7 +47,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: ((ctimestamp1 is null and (cstring1 like '%a')) or (cstring2 = cstring1)) (type: boolean) - Statistics: Num rows: 12288 Data size: 2381474 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2256914 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -57,7 +57,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: SelectColumnIsNull(col 8:timestamp), FilterStringColLikeStringScalar(col 6:string, pattern %a)), FilterStringGroupColEqualStringGroupColumn(col 7:string, col 6:string)) predicate: ((ctimestamp1 is null and (cstring1 like '%a')) or (cstring2 = cstring1)) (type: boolean) - Statistics: Num rows: 6144 Data size: 1190792 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7701 Data size: 1414500 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), ctimestamp1 (type: timestamp), (-3728 * UDFToInteger(csmallint)) (type: int), (cdouble - 9763215.5639D) (type: double), (- cdouble) (type: double), ((- cdouble) + 6981.0D) (type: double), (cdouble * -5638.15D) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -66,13 +66,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [6, 10, 5, 8, 13, 14, 15, 17, 18] selectExpressions: LongScalarMultiplyLongColumn(val -3728, col 1:int)(children: col 1:smallint) -> 13:int, DoubleColSubtractDoubleScalar(col 5:double, val 9763215.5639) -> 14:double, DoubleColUnaryMinus(col 5:double) -> 15:double, DoubleColAddDoubleScalar(col 16:double, val 6981.0)(children: DoubleColUnaryMinus(col 5:double) -> 16:double) -> 17:double, DoubleColMultiplyDoubleScalar(col 5:double, val -5638.15) -> 18:double - Statistics: Num rows: 6144 Data size: 953272 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7701 Data size: 1116736 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6144 Data size: 953272 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 7701 Data size: 1116736 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/vectorization_12.q.out b/ql/src/test/results/clientpositive/vectorization_12.q.out index 977372a4a1..c00a6fda34 100644 --- a/ql/src/test/results/clientpositive/vectorization_12.q.out +++ b/ql/src/test/results/clientpositive/vectorization_12.q.out @@ -82,7 +82,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: (((cstring1 like '%a') or ((cboolean2 <= 1) and (cbigint >= UDFToLong(csmallint)))) and ((cboolean1 >= cboolean2) or (UDFToShort(ctinyint) <> csmallint)) and ctimestamp1 is null) (type: boolean) - Statistics: Num rows: 12288 Data size: 1647554 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 1522994 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -92,7 +92,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 6:string, pattern %a), FilterExprAndExpr(children: FilterLongColLessEqualLongScalar(col 11:boolean, val 1), FilterLongColGreaterEqualLongColumn(col 3:bigint, col 1:bigint)(children: col 1:smallint))), FilterExprOrExpr(children: FilterLongColGreaterEqualLongColumn(col 10:boolean, col 11:boolean), FilterLongColNotEqualLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint)), SelectColumnIsNull(col 8:timestamp)) predicate: (((cstring1 like '%a') or ((cboolean2 <= 1) and (cbigint >= UDFToLong(csmallint)))) and ((cboolean1 >= cboolean2) or (UDFToShort(ctinyint) <> csmallint)) and ctimestamp1 is null) (type: boolean) - Statistics: Num rows: 1 Data size: 166 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1903 Data size: 236052 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cbigint (type: bigint), cboolean1 (type: boolean), cstring1 (type: string), cdouble (type: double), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double), (cdouble * cdouble) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -101,7 +101,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [3, 10, 6, 5, 13, 16, 17] selectExpressions: CastLongToDouble(col 3:bigint) -> 13:double, DoubleColMultiplyDoubleColumn(col 14:double, col 15:double)(children: CastLongToDouble(col 3:bigint) -> 14:double, CastLongToDouble(col 3:bigint) -> 15:double) -> 16:double, DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 17:double - Statistics: Num rows: 1 Data size: 166 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1903 Data size: 236052 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col0), sum(_col5), sum(_col4), sum(_col3), count(_col3), sum(_col0), sum(_col6) Group By Vectorization: @@ -116,7 +116,7 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1903 Data size: 268676 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: double), _col1 (type: bigint), _col2 (type: string), _col3 (type: boolean) sort order: ++++ @@ -126,7 +126,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1903 Data size: 268676 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col4 (type: bigint), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: bigint), _col9 (type: bigint), _col10 (type: double) Execution mode: vectorized Map Vectorization: @@ -154,11 +154,11 @@ STAGE PLANS: keys: KEY._col0 (type: double), KEY._col1 (type: bigint), KEY._col2 (type: string), KEY._col3 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 - Statistics: Num rows: 1 Data size: 170 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1903 Data size: 268676 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col1 (type: bigint), _col3 (type: boolean), _col2 (type: string), _col0 (type: double), (-6432.0D * _col0) (type: double), (- _col1) (type: bigint), _col4 (type: bigint), (_col1 * _col4) (type: bigint), power(((_col5 - ((_col6 * _col6) / _col4)) / CASE WHEN ((_col4 = 1L)) THEN (null) ELSE ((_col4 - 1)) END), 0.5) (type: double), ((-6432.0D * _col0) / -6432.0D) (type: double), (- ((-6432.0D * _col0) / -6432.0D)) (type: double), (_col7 / _col8) (type: double), (- (-6432.0D * _col0)) (type: double), (-5638.15 + CAST( _col1 AS decimal(19,0))) (type: decimal(22,2)), _col9 (type: bigint), ((_col7 / _col8) / (-6432.0D * _col0)) (type: double), (- (- ((-6432.0D * _col0) / -6432.0D))) (type: double), (((-6432.0D * _col0) / -6432.0D) + (- (-6432.0D * _col0))) (type: double), power(((_col10 - ((_col7 * _col7) / _col8)) / _col8), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col17, _col18, _col19 - Statistics: Num rows: 1 Data size: 346 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1903 Data size: 603604 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -181,7 +181,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 1 Data size: 346 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1903 Data size: 603604 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: boolean), _col4 (type: double), _col5 (type: bigint), _col6 (type: bigint), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: decimal(22,2)), _col14 (type: bigint), _col15 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double) Execution mode: vectorized Map Vectorization: @@ -207,10 +207,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: bigint), VALUE._col0 (type: boolean), KEY.reducesinkkey2 (type: string), null (type: timestamp), KEY.reducesinkkey0 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: bigint), VALUE._col3 (type: bigint), VALUE._col4 (type: bigint), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: decimal(22,2)), VALUE._col11 (type: bigint), VALUE._col12 (type: double), VALUE._col8 (type: double), VALUE._col13 (type: double), VALUE._col14 (type: double), VALUE._col15 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 1 Data size: 386 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1903 Data size: 603644 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1 Data size: 386 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1903 Data size: 603644 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/vectorization_13.q.out b/ql/src/test/results/clientpositive/vectorization_13.q.out index 3552007f8d..cc01a73bb9 100644 --- a/ql/src/test/results/clientpositive/vectorization_13.q.out +++ b/ql/src/test/results/clientpositive/vectorization_13.q.out @@ -84,7 +84,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: (((cfloat < 3569.0) and (cdouble <= 10.175D) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > -28789.0D) and (UDFToDouble(ctimestamp2) <> -28788.0D) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2028982 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 1779902 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -94,7 +94,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 4:float, val 3569.0), FilterDoubleColLessEqualDoubleScalar(col 5:double, val 10.175), FilterLongColNotEqualLongScalar(col 10:boolean, val 1)), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 13:double, val -28789.0)(children: CastTimestampToDouble(col 8:timestamp) -> 13:double), FilterDoubleColNotEqualDoubleScalar(col 14:double, val -28788.0)(children: CastTimestampToDouble(col 9:timestamp) -> 14:double), FilterDecimalColLessDecimalScalar(col 15:decimal(11,4), val 9763215.5639)(children: CastLongToDecimal(col 0:tinyint) -> 15:decimal(11,4)))) predicate: (((cfloat < 3569.0) and (cdouble <= 10.175D) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > -28789.0D) and (UDFToDouble(ctimestamp2) <> -28788.0D) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean) - Statistics: Num rows: 1386 Data size: 228984 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1386 Data size: 200984 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -103,7 +103,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [10, 0, 8, 4, 6, 4, 16, 17, 20] selectExpressions: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 16:double, CastLongToDouble(col 0:tinyint) -> 17:double, DoubleColMultiplyDoubleColumn(col 18:double, col 19:double)(children: CastLongToDouble(col 0:tinyint) -> 18:double, CastLongToDouble(col 0:tinyint) -> 19:double) -> 20:double - Statistics: Num rows: 1386 Data size: 228984 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1386 Data size: 200984 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(_col1), sum(_col3), sum(_col6), sum(_col5), count(_col3), sum(_col8), sum(_col7), count(_col1), max(_col3), min(_col1) Group By Vectorization: @@ -118,7 +118,7 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 693 Data size: 129752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 693 Data size: 122752 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) sort order: +++++ @@ -128,7 +128,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 693 Data size: 129752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 693 Data size: 122752 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: float), _col14 (type: tinyint) Execution mode: vectorized Map Vectorization: @@ -156,11 +156,11 @@ STAGE PLANS: keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 346 Data size: 64822 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 693 Data size: 122752 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), power(((_col7 - ((_col8 * _col8) / _col9)) / _col9), 0.5) (type: double), (- _col6) (type: double), power(((_col10 - ((_col11 * _col11) / _col12)) / _col12), 0.5) (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28D / (- (- _col6))) (type: double), _col13 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 346 Data size: 113262 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 693 Data size: 219772 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -183,7 +183,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 346 Data size: 113262 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 693 Data size: 219772 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -209,13 +209,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: tinyint), KEY.reducesinkkey6 (type: tinyint), KEY.reducesinkkey7 (type: tinyint), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey14 (type: double), KEY.reducesinkkey15 (type: decimal(7,3)), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: float), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 346 Data size: 113262 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 693 Data size: 219772 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 40 - Statistics: Num rows: 40 Data size: 13206 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 12846 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 40 Data size: 13206 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 12846 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -421,7 +421,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: (((cfloat < 3569.0) and (cdouble <= 10.175D) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > -28801.388D) and (UDFToDouble(ctimestamp2) <> -28801.336D) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2028982 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 1779902 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -430,7 +430,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleScalar(col 4:float, val 3569.0), FilterDoubleColLessEqualDoubleScalar(col 5:double, val 10.175), FilterLongColNotEqualLongScalar(col 10:boolean, val 1)), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 13:double, val -28801.388)(children: CastTimestampToDouble(col 8:timestamp) -> 13:double), FilterDoubleColNotEqualDoubleScalar(col 14:double, val -28801.336)(children: CastTimestampToDouble(col 9:timestamp) -> 14:double), FilterDecimalColLessDecimalScalar(col 15:decimal(11,4), val 9763215.5639)(children: CastLongToDecimal(col 0:tinyint) -> 15:decimal(11,4)))) predicate: (((cfloat < 3569.0) and (cdouble <= 10.175D) and (cboolean1 <> 1)) or ((UDFToDouble(ctimestamp1) > -28801.388D) and (UDFToDouble(ctimestamp2) <> -28801.336D) and (CAST( ctinyint AS decimal(11,4)) < 9763215.5639))) (type: boolean) - Statistics: Num rows: 1386 Data size: 228984 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1386 Data size: 200984 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cboolean1 (type: boolean), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -439,7 +439,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [10, 0, 8, 4, 6, 4, 16, 17, 20] selectExpressions: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 16:double, CastLongToDouble(col 0:tinyint) -> 17:double, DoubleColMultiplyDoubleColumn(col 18:double, col 19:double)(children: CastLongToDouble(col 0:tinyint) -> 18:double, CastLongToDouble(col 0:tinyint) -> 19:double) -> 20:double - Statistics: Num rows: 1386 Data size: 228984 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 1386 Data size: 200984 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(_col1), sum(_col3), sum(_col6), sum(_col5), count(_col3), sum(_col8), sum(_col7), count(_col1), max(_col3), min(_col1) Group By Vectorization: @@ -454,7 +454,7 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 693 Data size: 129752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 693 Data size: 122752 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string) sort order: +++++ @@ -464,7 +464,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 693 Data size: 129752 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 693 Data size: 122752 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: tinyint), _col6 (type: double), _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: bigint), _col13 (type: float), _col14 (type: tinyint) Execution mode: vectorized Map Vectorization: @@ -486,11 +486,11 @@ STAGE PLANS: keys: KEY._col0 (type: boolean), KEY._col1 (type: tinyint), KEY._col2 (type: timestamp), KEY._col3 (type: float), KEY._col4 (type: string) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 346 Data size: 64822 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 693 Data size: 122752 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: boolean), _col1 (type: tinyint), _col2 (type: timestamp), _col3 (type: float), _col4 (type: string), (- _col1) (type: tinyint), _col5 (type: tinyint), ((- _col1) + _col5) (type: tinyint), _col6 (type: double), (_col6 * UDFToDouble(((- _col1) + _col5))) (type: double), (- _col6) (type: double), (79.553 * _col3) (type: float), power(((_col7 - ((_col8 * _col8) / _col9)) / _col9), 0.5) (type: double), (- _col6) (type: double), power(((_col10 - ((_col11 * _col11) / _col12)) / _col12), 0.5) (type: double), (CAST( ((- _col1) + _col5) AS decimal(3,0)) - 10.175) (type: decimal(7,3)), (- (- _col6)) (type: double), (-26.28D / (- (- _col6))) (type: double), _col13 (type: float), ((_col6 * UDFToDouble(((- _col1) + _col5))) / UDFToDouble(_col1)) (type: double), _col14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 346 Data size: 113262 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 693 Data size: 219772 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -512,7 +512,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 346 Data size: 113262 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 693 Data size: 219772 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -532,13 +532,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: tinyint), KEY.reducesinkkey2 (type: timestamp), KEY.reducesinkkey3 (type: float), KEY.reducesinkkey4 (type: string), KEY.reducesinkkey5 (type: tinyint), KEY.reducesinkkey6 (type: tinyint), KEY.reducesinkkey7 (type: tinyint), KEY.reducesinkkey8 (type: double), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: double), KEY.reducesinkkey10 (type: double), KEY.reducesinkkey14 (type: double), KEY.reducesinkkey15 (type: decimal(7,3)), KEY.reducesinkkey16 (type: double), KEY.reducesinkkey17 (type: double), KEY.reducesinkkey18 (type: float), KEY.reducesinkkey19 (type: double), KEY.reducesinkkey20 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 346 Data size: 113262 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 693 Data size: 219772 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 40 - Statistics: Num rows: 40 Data size: 13206 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 12846 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 40 Data size: 13206 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 40 Data size: 12846 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/vectorization_14.q.out b/ql/src/test/results/clientpositive/vectorization_14.q.out index c73cf60821..72e7e3b120 100644 --- a/ql/src/test/results/clientpositive/vectorization_14.q.out +++ b/ql/src/test/results/clientpositive/vectorization_14.q.out @@ -84,7 +84,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: ((UDFToLong(ctinyint) <= cbigint) and (cdouble < UDFToDouble(ctinyint)) and ((cbigint > -257L) or (cfloat < UDFToFloat(cint))) and ((UDFToDouble(cint) <= cdouble) or (ctimestamp2 < ctimestamp1))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2139070 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 1889990 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -94,7 +94,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColLessEqualLongColumn(col 0:bigint, col 3:bigint)(children: col 0:tinyint), FilterDoubleColLessDoubleColumn(col 5:double, col 13:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterExprOrExpr(children: FilterLongColGreaterLongScalar(col 3:bigint, val -257), FilterDoubleColLessDoubleColumn(col 4:float, col 14:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 14:float)), FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleColumn(col 15:double, col 5:double)(children: CastLongToDouble(col 2:int) -> 15:double), FilterTimestampColLessTimestampColumn(col 9:timestamp, col 8:timestamp))) predicate: ((UDFToLong(ctinyint) <= cbigint) and (cdouble < UDFToDouble(ctinyint)) and ((cbigint > -257L) or (cfloat < UDFToFloat(cint))) and ((UDFToDouble(cint) <= cdouble) or (ctimestamp2 < ctimestamp1))) (type: boolean) - Statistics: Num rows: 758 Data size: 132082 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 758 Data size: 116802 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctimestamp1 (type: timestamp), cfloat (type: float), cstring1 (type: string), cboolean1 (type: boolean), cdouble (type: double), (- (-26.28D + cdouble)) (type: double), ((- (-26.28D + cdouble)) * (- (-26.28D + cdouble))) (type: double), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 @@ -103,7 +103,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [8, 4, 6, 10, 5, 17, 22, 4, 23] selectExpressions: DoubleColUnaryMinus(col 16:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 16:double) -> 17:double, DoubleColMultiplyDoubleColumn(col 19:double, col 21:double)(children: DoubleColUnaryMinus(col 18:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 18:double) -> 19:double, DoubleColUnaryMinus(col 20:double)(children: DoubleScalarAddDoubleColumn(val -26.28, col 5:double) -> 20:double) -> 21:double) -> 22:double, DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 23:double - Statistics: Num rows: 758 Data size: 132082 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 758 Data size: 116802 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col6), sum(_col5), count(_col5), max(_col1), sum(_col8), sum(_col7), count(_col1) Group By Vectorization: @@ -118,7 +118,7 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 379 Data size: 66108 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 379 Data size: 62308 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: float), _col2 (type: double), _col3 (type: timestamp), _col4 (type: boolean) sort order: +++++ @@ -128,7 +128,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 379 Data size: 66108 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 379 Data size: 62308 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col5 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -156,11 +156,11 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: float), KEY._col2 (type: double), KEY._col3 (type: timestamp), KEY._col4 (type: boolean) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 - Statistics: Num rows: 189 Data size: 33008 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 379 Data size: 62308 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col3 (type: timestamp), _col1 (type: float), _col0 (type: string), _col4 (type: boolean), _col2 (type: double), (-26.28D + _col2) (type: double), (- (-26.28D + _col2)) (type: double), power(((_col5 - ((_col6 * _col6) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5) (type: double), (_col1 * -26.28) (type: float), _col8 (type: float), (- _col1) (type: float), (- _col8) (type: float), ((- (-26.28D + _col2)) / 10.175D) (type: double), power(((_col9 - ((_col10 * _col10) / _col11)) / _col11), 0.5) (type: double), _col11 (type: bigint), (- ((- (-26.28D + _col2)) / 10.175D)) (type: double), (-1.389D % power(((_col5 - ((_col6 * _col6) / _col7)) / CASE WHEN ((_col7 = 1L)) THEN (null) ELSE ((_col7 - 1)) END), 0.5)) (type: double), (UDFToDouble(_col1) - _col2) (type: double), ((_col9 - ((_col10 * _col10) / _col11)) / _col11) (type: double), (((_col9 - ((_col10 * _col10) / _col11)) / _col11) % 10.175D) (type: double), ((_col9 - ((_col10 * _col10) / _col11)) / CASE WHEN ((_col11 = 1L)) THEN (null) ELSE ((_col11 - 1)) END) (type: double), (- (UDFToDouble(_col1) - _col2)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 - Statistics: Num rows: 189 Data size: 45860 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 379 Data size: 88080 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -183,7 +183,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 189 Data size: 45860 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 379 Data size: 88080 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: boolean), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: float), _col10 (type: float), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: bigint), _col15 (type: double), _col16 (type: double), _col17 (type: double), _col18 (type: double), _col19 (type: double), _col20 (type: double), _col21 (type: double) Execution mode: vectorized Map Vectorization: @@ -209,10 +209,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey3 (type: timestamp), KEY.reducesinkkey1 (type: float), KEY.reducesinkkey0 (type: string), VALUE._col0 (type: boolean), KEY.reducesinkkey2 (type: double), VALUE._col1 (type: double), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: float), VALUE._col6 (type: float), VALUE._col7 (type: float), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: bigint), VALUE._col11 (type: double), VALUE._col12 (type: double), VALUE._col13 (type: double), VALUE._col14 (type: double), VALUE._col15 (type: double), VALUE._col16 (type: double), VALUE._col17 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21 - Statistics: Num rows: 189 Data size: 45860 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 379 Data size: 88080 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 189 Data size: 45860 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 379 Data size: 88080 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/vectorization_15.q.out b/ql/src/test/results/clientpositive/vectorization_15.q.out index 1a5de65593..dbfa0d83a4 100644 --- a/ql/src/test/results/clientpositive/vectorization_15.q.out +++ b/ql/src/test/results/clientpositive/vectorization_15.q.out @@ -80,7 +80,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: ((cstring1 like '10%') or (cstring2 like '%ss%') or ((cint >= -75) and (UDFToShort(ctinyint) = csmallint) and (cdouble >= -3728.0D))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2491562 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2367002 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -90,7 +90,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterStringColLikeStringScalar(col 6:string, pattern 10%), FilterStringColLikeStringScalar(col 7:string, pattern %ss%), FilterExprAndExpr(children: FilterLongColGreaterEqualLongScalar(col 2:int, val -75), FilterLongColEqualLongColumn(col 0:smallint, col 1:smallint)(children: col 0:tinyint), FilterDoubleColGreaterEqualDoubleScalar(col 5:double, val -3728.0))) predicate: ((cstring1 like '10%') or (cstring2 like '%ss%') or ((cint >= -75) and (UDFToShort(ctinyint) = csmallint) and (cdouble >= -3728.0D))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2491562 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2367002 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cfloat (type: float), cboolean1 (type: boolean), cdouble (type: double), cstring1 (type: string), ctinyint (type: tinyint), cint (type: int), ctimestamp1 (type: timestamp), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 @@ -99,7 +99,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4, 10, 5, 6, 0, 2, 8, 4, 13, 14, 17, 18, 21] selectExpressions: DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 13:double, CastLongToDouble(col 0:tinyint) -> 14:double, DoubleColMultiplyDoubleColumn(col 15:double, col 16:double)(children: CastLongToDouble(col 0:tinyint) -> 15:double, CastLongToDouble(col 0:tinyint) -> 16:double) -> 17:double, CastLongToDouble(col 2:int) -> 18:double, DoubleColMultiplyDoubleColumn(col 19:double, col 20:double)(children: CastLongToDouble(col 2:int) -> 19:double, CastLongToDouble(col 2:int) -> 20:double) -> 21:double - Statistics: Num rows: 12288 Data size: 2491562 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2367002 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col8), sum(_col7), count(_col0), min(_col2), sum(_col10), sum(_col9), count(_col4), sum(_col12), sum(_col11), count(_col5) Group By Vectorization: @@ -114,7 +114,7 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 - Statistics: Num rows: 6144 Data size: 1278652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 1216372 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp) sort order: +++++++ @@ -124,7 +124,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 6144 Data size: 1278652 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 1216372 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col7 (type: double), _col8 (type: double), _col9 (type: bigint), _col10 (type: double), _col11 (type: double), _col12 (type: double), _col13 (type: bigint), _col14 (type: double), _col15 (type: double), _col16 (type: bigint) Execution mode: vectorized Map Vectorization: @@ -151,11 +151,11 @@ STAGE PLANS: keys: KEY._col0 (type: float), KEY._col1 (type: boolean), KEY._col2 (type: double), KEY._col3 (type: string), KEY._col4 (type: tinyint), KEY._col5 (type: int), KEY._col6 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 - Statistics: Num rows: 3072 Data size: 639332 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 1216372 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: float), _col1 (type: boolean), _col2 (type: double), _col3 (type: string), _col4 (type: tinyint), _col5 (type: int), _col6 (type: timestamp), power(((_col7 - ((_col8 * _col8) / _col9)) / CASE WHEN ((_col9 = 1L)) THEN (null) ELSE ((_col9 - 1)) END), 0.5) (type: double), (-26.28 - CAST( _col5 AS decimal(10,0))) (type: decimal(13,2)), _col10 (type: double), (_col2 * 79.553D) (type: double), (33.0 % _col0) (type: float), power(((_col11 - ((_col12 * _col12) / _col13)) / CASE WHEN ((_col13 = 1L)) THEN (null) ELSE ((_col13 - 1)) END), 0.5) (type: double), ((_col11 - ((_col12 * _col12) / _col13)) / _col13) (type: double), (-23.0D % _col2) (type: double), (- _col4) (type: tinyint), ((_col14 - ((_col15 * _col15) / _col16)) / CASE WHEN ((_col16 = 1L)) THEN (null) ELSE ((_col16 - 1)) END) (type: double), (UDFToFloat(_col5) - _col0) (type: float), (-23 % UDFToInteger(_col4)) (type: int), (- (-26.28 - CAST( _col5 AS decimal(10,0)))) (type: decimal(13,2)), power(((_col14 - ((_col15 * _col15) / _col16)) / _col16), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 3072 Data size: 1327460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 2592628 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false table: @@ -178,7 +178,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 3072 Data size: 1327460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 2592628 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col7 (type: double), _col8 (type: decimal(13,2)), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: double), _col13 (type: double), _col14 (type: double), _col15 (type: tinyint), _col16 (type: double), _col17 (type: float), _col18 (type: int), _col19 (type: decimal(13,2)), _col20 (type: double) Execution mode: vectorized Map Vectorization: @@ -203,10 +203,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: float), KEY.reducesinkkey1 (type: boolean), KEY.reducesinkkey2 (type: double), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: tinyint), KEY.reducesinkkey5 (type: int), KEY.reducesinkkey6 (type: timestamp), VALUE._col0 (type: double), VALUE._col1 (type: decimal(13,2)), VALUE._col2 (type: double), VALUE._col3 (type: double), VALUE._col4 (type: float), VALUE._col5 (type: double), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: tinyint), VALUE._col9 (type: double), VALUE._col10 (type: float), VALUE._col11 (type: int), VALUE._col12 (type: decimal(13,2)), VALUE._col13 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20 - Statistics: Num rows: 3072 Data size: 1327460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 2592628 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 3072 Data size: 1327460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 2592628 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/vectorization_16.q.out b/ql/src/test/results/clientpositive/vectorization_16.q.out index 2dfcc775ad..9c782f230b 100644 --- a/ql/src/test/results/clientpositive/vectorization_16.q.out +++ b/ql/src/test/results/clientpositive/vectorization_16.q.out @@ -57,7 +57,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: ((cstring2 like '%b%') and ((cdouble >= -1.389D) or (cstring1 < 'a'))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2308074 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2183514 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -67,7 +67,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %b%), FilterExprOrExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5:double, val -1.389), FilterStringGroupColLessStringScalar(col 6:string, val a))) predicate: ((cstring2 like '%b%') and ((cdouble >= -1.389D) or (cstring1 < 'a'))) (type: boolean) - Statistics: Num rows: 6144 Data size: 1154088 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 1091808 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cstring1 (type: string), cdouble (type: double), ctimestamp1 (type: timestamp), (cdouble * cdouble) (type: double) outputColumnNames: _col0, _col1, _col2, _col3 @@ -76,7 +76,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [6, 5, 8, 13] selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 13:double - Statistics: Num rows: 6144 Data size: 1154088 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 1091808 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col1), sum(_col3), sum(_col1), min(_col1) Group By Vectorization: @@ -91,7 +91,7 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 3072 Data size: 455172 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3072 Data size: 424052 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) sort order: +++ @@ -101,7 +101,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 3072 Data size: 455172 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3072 Data size: 424052 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double) Execution mode: vectorized Map Vectorization: @@ -129,14 +129,14 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: double), KEY._col2 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1536 Data size: 227586 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3072 Data size: 424052 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp), (_col1 - 9763215.5639D) (type: double), (- (_col1 - 9763215.5639D)) (type: double), _col3 (type: bigint), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), (- power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5)) (type: double), (power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) * UDFToDouble(_col3)) (type: double), _col6 (type: double), (9763215.5639D / _col1) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 1536 Data size: 461058 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3072 Data size: 890996 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1536 Data size: 461058 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3072 Data size: 890996 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/vectorization_17.q.out b/ql/src/test/results/clientpositive/vectorization_17.q.out index ea65db132a..68fcd18b25 100644 --- a/ql/src/test/results/clientpositive/vectorization_17.q.out +++ b/ql/src/test/results/clientpositive/vectorization_17.q.out @@ -65,7 +65,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: ((cbigint > -23L) and ((ctinyint >= 33Y) or (UDFToLong(csmallint) >= cbigint) or (UDFToDouble(cfloat) = cdouble)) and ((cdouble <> 988888.0D) or (CAST( cint AS decimal(13,3)) > -863.257))) (type: boolean) - Statistics: Num rows: 12288 Data size: 1647550 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 1522990 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -75,7 +75,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterLongColGreaterLongScalar(col 3:bigint, val -23), FilterExprOrExpr(children: FilterLongColGreaterEqualLongScalar(col 0:tinyint, val 33), FilterLongColGreaterEqualLongColumn(col 1:bigint, col 3:bigint)(children: col 1:smallint), FilterDoubleColEqualDoubleColumn(col 4:double, col 5:double)(children: col 4:float)), FilterExprOrExpr(children: FilterDoubleColNotEqualDoubleScalar(col 5:double, val 988888.0), FilterDecimalColGreaterDecimalScalar(col 13:decimal(13,3), val -863.257)(children: CastLongToDecimal(col 2:int) -> 13:decimal(13,3)))) predicate: ((cbigint > -23L) and ((ctinyint >= 33Y) or (UDFToLong(csmallint) >= cbigint) or (UDFToDouble(cfloat) = cdouble)) and ((cdouble <> 988888.0D) or (CAST( cint AS decimal(13,3)) > -863.257))) (type: boolean) - Statistics: Num rows: 6141 Data size: 823456 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6141 Data size: 761216 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cfloat (type: float), cstring1 (type: string), cint (type: int), ctimestamp1 (type: timestamp), cdouble (type: double), cbigint (type: bigint), (UDFToDouble(cfloat) / UDFToDouble(ctinyint)) (type: double), (UDFToLong(cint) % cbigint) (type: bigint), (- cdouble) (type: double), (cdouble + (UDFToDouble(cfloat) / UDFToDouble(ctinyint))) (type: double), (cdouble / UDFToDouble(cint)) (type: double), (- (- cdouble)) (type: double), (9763215.5639 % CAST( cbigint AS decimal(19,0))) (type: decimal(11,4)), (2563.58D + (- (- cdouble))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 @@ -84,7 +84,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [4, 6, 2, 8, 5, 3, 15, 16, 17, 20, 22, 24, 26, 29] selectExpressions: DoubleColDivideDoubleColumn(col 4:double, col 14:double)(children: col 4:float, CastLongToDouble(col 0:tinyint) -> 14:double) -> 15:double, LongColModuloLongColumn(col 2:bigint, col 3:bigint)(children: col 2:int) -> 16:bigint, DoubleColUnaryMinus(col 5:double) -> 17:double, DoubleColAddDoubleColumn(col 5:double, col 19:double)(children: DoubleColDivideDoubleColumn(col 4:double, col 18:double)(children: col 4:float, CastLongToDouble(col 0:tinyint) -> 18:double) -> 19:double) -> 20:double, DoubleColDivideDoubleColumn(col 5:double, col 21:double)(children: CastLongToDouble(col 2:int) -> 21:double) -> 22:double, DoubleColUnaryMinus(col 23:double)(children: DoubleColUnaryMinus(col 5:double) -> 23:double) -> 24:double, DecimalScalarModuloDecimalColumn(val 9763215.5639, col 25:decimal(19,0))(children: CastLongToDecimal(col 3:bigint) -> 25:decimal(19,0)) -> 26:decimal(11,4), DoubleScalarAddDoubleColumn(val 2563.58, col 28:double)(children: DoubleColUnaryMinus(col 27:double)(children: DoubleColUnaryMinus(col 5:double) -> 27:double) -> 28:double) -> 29:double - Statistics: Num rows: 6141 Data size: 1818460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6141 Data size: 1756220 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col5 (type: bigint), _col0 (type: float) sort order: ++ @@ -93,7 +93,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 6141 Data size: 1818460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6141 Data size: 1756220 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: string), _col2 (type: int), _col3 (type: timestamp), _col4 (type: double), _col6 (type: double), _col7 (type: bigint), _col8 (type: double), _col9 (type: double), _col10 (type: double), _col11 (type: double), _col12 (type: decimal(11,4)), _col13 (type: double) Execution mode: vectorized Map Vectorization: @@ -119,10 +119,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey1 (type: float), VALUE._col0 (type: string), VALUE._col1 (type: int), VALUE._col2 (type: timestamp), VALUE._col3 (type: double), KEY.reducesinkkey0 (type: bigint), VALUE._col4 (type: double), VALUE._col5 (type: bigint), VALUE._col6 (type: double), VALUE._col7 (type: double), VALUE._col8 (type: double), VALUE._col9 (type: double), VALUE._col10 (type: decimal(11,4)), VALUE._col11 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 6141 Data size: 1818460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6141 Data size: 1756220 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 6141 Data size: 1818460 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6141 Data size: 1756220 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/vectorization_2.q.out b/ql/src/test/results/clientpositive/vectorization_2.q.out index a5527a4be7..cdb904cbe4 100644 --- a/ql/src/test/results/clientpositive/vectorization_2.q.out +++ b/ql/src/test/results/clientpositive/vectorization_2.q.out @@ -63,7 +63,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: (((cdouble < UDFToDouble(ctinyint)) and ((UDFToDouble(ctimestamp2) <> -10669.0D) or (cint < 359))) or ((ctimestamp1 < ctimestamp2) and (cstring2 like 'b%') and (cfloat <= -5638.15))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2157324 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 1908244 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -73,7 +73,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessDoubleColumn(col 5:double, col 13:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterExprOrExpr(children: FilterDoubleColNotEqualDoubleScalar(col 14:double, val -10669.0)(children: CastTimestampToDouble(col 9:timestamp) -> 14:double), FilterLongColLessLongScalar(col 2:int, val 359))), FilterExprAndExpr(children: FilterTimestampColLessTimestampColumn(col 8:timestamp, col 9:timestamp), FilterStringColLikeStringScalar(col 7:string, pattern b%), FilterDoubleColLessEqualDoubleScalar(col 4:float, val -5638.14990234375))) predicate: (((cdouble < UDFToDouble(ctinyint)) and ((UDFToDouble(ctimestamp2) <> -10669.0D) or (cint < 359))) or ((ctimestamp1 < ctimestamp2) and (cstring2 like 'b%') and (cfloat <= -5638.15))) (type: boolean) - Statistics: Num rows: 4096 Data size: 719232 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4096 Data size: 636272 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: csmallint (type: smallint), cfloat (type: float), cbigint (type: bigint), ctinyint (type: tinyint), cdouble (type: double), UDFToDouble(cbigint) (type: double), (UDFToDouble(cbigint) * UDFToDouble(cbigint)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 @@ -82,7 +82,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 4, 3, 0, 5, 15, 18] selectExpressions: CastLongToDouble(col 3:bigint) -> 15:double, DoubleColMultiplyDoubleColumn(col 16:double, col 17:double)(children: CastLongToDouble(col 3:bigint) -> 16:double, CastLongToDouble(col 3:bigint) -> 17:double) -> 18:double - Statistics: Num rows: 4096 Data size: 719232 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 4096 Data size: 636272 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), count(_col0), sum(_col1), sum(_col6), sum(_col5), count(_col2), count(), min(_col3), sum(_col4), count(_col4) Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/vectorization_3.q.out b/ql/src/test/results/clientpositive/vectorization_3.q.out index add0a8d345..0a9971b3cb 100644 --- a/ql/src/test/results/clientpositive/vectorization_3.q.out +++ b/ql/src/test/results/clientpositive/vectorization_3.q.out @@ -68,7 +68,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: (((UDFToFloat(cint) <= cfloat) and (CAST( cbigint AS decimal(22,3)) <> 79.553) and (UDFToDouble(ctimestamp2) = -29071.0D)) or ((UDFToDouble(cbigint) > cdouble) and (CAST( csmallint AS decimal(8,3)) >= 79.553) and (ctimestamp1 > ctimestamp2))) (type: boolean) - Statistics: Num rows: 12288 Data size: 1276620 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 1027540 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -78,7 +78,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleColumn(col 13:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 13:float), FilterDecimalColNotEqualDecimalScalar(col 14:decimal(22,3), val 79.553)(children: CastLongToDecimal(col 3:bigint) -> 14:decimal(22,3)), FilterDoubleColEqualDoubleScalar(col 15:double, val -29071.0)(children: CastTimestampToDouble(col 9:timestamp) -> 15:double)), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleColumn(col 16:double, col 5:double)(children: CastLongToDouble(col 3:bigint) -> 16:double), FilterDecimalColGreaterEqualDecimalScalar(col 17:decimal(8,3), val 79.553)(children: CastLongToDecimal(col 1:smallint) -> 17:decimal(8,3)), FilterTimestampColGreaterTimestampColumn(col 8:timestamp, col 9:timestamp))) predicate: (((UDFToFloat(cint) <= cfloat) and (CAST( cbigint AS decimal(22,3)) <> 79.553) and (UDFToDouble(ctimestamp2) = -29071.0D)) or ((UDFToDouble(cbigint) > cdouble) and (CAST( csmallint AS decimal(8,3)) >= 79.553) and (ctimestamp1 > ctimestamp2))) (type: boolean) - Statistics: Num rows: 2503 Data size: 260060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2503 Data size: 209380 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: csmallint (type: smallint), ctinyint (type: tinyint), cfloat (type: float), cint (type: int), UDFToDouble(csmallint) (type: double), (UDFToDouble(csmallint) * UDFToDouble(csmallint)) (type: double), UDFToDouble(ctinyint) (type: double), (UDFToDouble(ctinyint) * UDFToDouble(ctinyint)) (type: double), UDFToDouble(cfloat) (type: double), (UDFToDouble(cfloat) * UDFToDouble(cfloat)) (type: double), UDFToDouble(cint) (type: double), (UDFToDouble(cint) * UDFToDouble(cint)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11 @@ -87,7 +87,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 0, 4, 2, 18, 21, 22, 25, 4, 26, 27, 30] selectExpressions: CastLongToDouble(col 1:smallint) -> 18:double, DoubleColMultiplyDoubleColumn(col 19:double, col 20:double)(children: CastLongToDouble(col 1:smallint) -> 19:double, CastLongToDouble(col 1:smallint) -> 20:double) -> 21:double, CastLongToDouble(col 0:tinyint) -> 22:double, DoubleColMultiplyDoubleColumn(col 23:double, col 24:double)(children: CastLongToDouble(col 0:tinyint) -> 23:double, CastLongToDouble(col 0:tinyint) -> 24:double) -> 25:double, DoubleColMultiplyDoubleColumn(col 4:double, col 4:double)(children: col 4:float, col 4:float) -> 26:double, CastLongToDouble(col 2:int) -> 27:double, DoubleColMultiplyDoubleColumn(col 28:double, col 29:double)(children: CastLongToDouble(col 2:int) -> 28:double, CastLongToDouble(col 2:int) -> 29:double) -> 30:double - Statistics: Num rows: 2503 Data size: 260060 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 2503 Data size: 209380 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col5), sum(_col4), count(_col0), sum(_col7), sum(_col6), count(_col1), sum(_col9), sum(_col8), count(_col2), sum(_col2), sum(_col3), count(_col3), sum(_col11), sum(_col10) Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/vectorization_5.q.out b/ql/src/test/results/clientpositive/vectorization_5.q.out index 8a1d404903..ef485cb200 100644 --- a/ql/src/test/results/clientpositive/vectorization_5.q.out +++ b/ql/src/test/results/clientpositive/vectorization_5.q.out @@ -57,7 +57,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: (((cstring1 like '%b%') and cboolean2 is not null) or ((UDFToDouble(ctinyint) = cdouble) and (cstring2 like 'a') and ctimestamp2 is not null)) (type: boolean) - Statistics: Num rows: 12288 Data size: 2454862 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2330342 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -67,7 +67,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 6:string, pattern %b%), SelectColumnIsNotNull(col 11:boolean)), FilterExprAndExpr(children: FilterDoubleColEqualDoubleColumn(col 13:double, col 5:double)(children: CastLongToDouble(col 0:tinyint) -> 13:double), FilterStringColLikeStringScalar(col 7:string, pattern a), SelectColumnIsNotNull(col 9:timestamp))) predicate: (((cstring1 like '%b%') and cboolean2 is not null) or ((UDFToDouble(ctinyint) = cdouble) and (cstring2 like 'a') and ctimestamp2 is not null)) (type: boolean) - Statistics: Num rows: 7658 Data size: 1529972 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6879 Data size: 1304690 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctinyint (type: tinyint), csmallint (type: smallint), cint (type: int) outputColumnNames: ctinyint, csmallint, cint @@ -75,7 +75,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [0, 1, 2] - Statistics: Num rows: 7658 Data size: 1529972 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6879 Data size: 1304690 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: max(csmallint), count(), min(csmallint), sum(cint), max(ctinyint) Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/vectorization_7.q.out b/ql/src/test/results/clientpositive/vectorization_7.q.out index 880a4c3fd1..c27d13f0dd 100644 --- a/ql/src/test/results/clientpositive/vectorization_7.q.out +++ b/ql/src/test/results/clientpositive/vectorization_7.q.out @@ -71,7 +71,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: (((cdouble > 988888.0D) or ((UDFToDouble(ctimestamp2) > -28815.0D) and (cdouble <= 3569.0D))) and ((UDFToDouble(ctimestamp1) <= -28800.0D) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and (ctinyint <> 0Y)) (type: boolean) - Statistics: Num rows: 12288 Data size: 3019778 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2770698 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -81,7 +81,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterExprOrExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 988888.0), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 13:double, val -28815.0)(children: CastTimestampToDouble(col 9:timestamp) -> 13:double), FilterDoubleColLessEqualDoubleScalar(col 5:double, val 3569.0))), FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleScalar(col 14:double, val -28800.0)(children: CastTimestampToDouble(col 8:timestamp) -> 14:double), FilterLongColEqualLongColumn(col 0:int, col 2:int)(children: col 0:tinyint), FilterStringColLikeStringScalar(col 7:string, pattern ss)), FilterLongColNotEqualLongScalar(col 0:tinyint, val 0)) predicate: (((cdouble > 988888.0D) or ((UDFToDouble(ctimestamp2) > -28815.0D) and (cdouble <= 3569.0D))) and ((UDFToDouble(ctimestamp1) <= -28800.0D) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and (ctinyint <> 0Y)) (type: boolean) - Statistics: Num rows: 11033 Data size: 2711364 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11033 Data size: 2487724 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -90,7 +90,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [10, 3, 1, 0, 8, 6, 15, 16, 17, 18, 20, 22, 23, 24, 26] selectExpressions: LongColAddLongColumn(col 3:bigint, col 3:bigint) -> 15:bigint, LongColModuloLongScalar(col 1:int, val -257)(children: col 1:smallint) -> 16:int, LongColUnaryMinus(col 1:smallint) -> 17:smallint, LongColUnaryMinus(col 0:tinyint) -> 18:tinyint, LongColAddLongScalar(col 19:int, val 17)(children: LongColUnaryMinus(col 0:tinyint) -> 19:tinyint) -> 20:int, LongColMultiplyLongColumn(col 3:bigint, col 21:bigint)(children: LongColUnaryMinus(col 1:smallint) -> 21:smallint) -> 22:bigint, LongColModuloLongColumn(col 2:int, col 1:int)(children: col 1:smallint) -> 23:int, LongColUnaryMinus(col 0:tinyint) -> 24:tinyint, LongColModuloLongColumn(col 25:tinyint, col 0:tinyint)(children: LongColUnaryMinus(col 0:tinyint) -> 25:tinyint) -> 26:tinyint - Statistics: Num rows: 11033 Data size: 1865892 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11033 Data size: 1754052 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint) sort order: +++++++++++++++ @@ -99,7 +99,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 11033 Data size: 1865892 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11033 Data size: 1754052 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -125,13 +125,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: smallint), KEY.reducesinkkey3 (type: tinyint), KEY.reducesinkkey4 (type: timestamp), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: bigint), KEY.reducesinkkey7 (type: int), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 11033 Data size: 1865892 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11033 Data size: 1754052 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 - Statistics: Num rows: 25 Data size: 4380 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4180 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 25 Data size: 4380 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4180 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -297,7 +297,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: (((cdouble > 988888.0D) or ((UDFToDouble(ctimestamp2) > -28792.315D) and (cdouble <= 3569.0D))) and ((UDFToDouble(ctimestamp1) <= -28800.0D) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and (ctinyint <> 0Y)) (type: boolean) - Statistics: Num rows: 12288 Data size: 3019778 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2770698 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -306,7 +306,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterExprOrExpr(children: FilterDoubleColGreaterDoubleScalar(col 5:double, val 988888.0), FilterExprAndExpr(children: FilterDoubleColGreaterDoubleScalar(col 13:double, val -28792.315)(children: CastTimestampToDouble(col 9:timestamp) -> 13:double), FilterDoubleColLessEqualDoubleScalar(col 5:double, val 3569.0))), FilterExprOrExpr(children: FilterDoubleColLessEqualDoubleScalar(col 14:double, val -28800.0)(children: CastTimestampToDouble(col 8:timestamp) -> 14:double), FilterLongColEqualLongColumn(col 0:int, col 2:int)(children: col 0:tinyint), FilterStringColLikeStringScalar(col 7:string, pattern ss)), FilterLongColNotEqualLongScalar(col 0:tinyint, val 0)) predicate: (((cdouble > 988888.0D) or ((UDFToDouble(ctimestamp2) > -28792.315D) and (cdouble <= 3569.0D))) and ((UDFToDouble(ctimestamp1) <= -28800.0D) or (UDFToInteger(ctinyint) = cint) or (cstring2 like 'ss')) and (ctinyint <> 0Y)) (type: boolean) - Statistics: Num rows: 11033 Data size: 2711364 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11033 Data size: 2487724 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cboolean1 (type: boolean), cbigint (type: bigint), csmallint (type: smallint), ctinyint (type: tinyint), ctimestamp1 (type: timestamp), cstring1 (type: string), (cbigint + cbigint) (type: bigint), (UDFToInteger(csmallint) % -257) (type: int), (- csmallint) (type: smallint), (- ctinyint) (type: tinyint), (UDFToInteger((- ctinyint)) + 17) (type: int), (cbigint * UDFToLong((- csmallint))) (type: bigint), (cint % UDFToInteger(csmallint)) (type: int), (- ctinyint) (type: tinyint), ((- ctinyint) % ctinyint) (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 @@ -315,7 +315,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [10, 3, 1, 0, 8, 6, 15, 16, 17, 18, 20, 22, 23, 24, 26] selectExpressions: LongColAddLongColumn(col 3:bigint, col 3:bigint) -> 15:bigint, LongColModuloLongScalar(col 1:int, val -257)(children: col 1:smallint) -> 16:int, LongColUnaryMinus(col 1:smallint) -> 17:smallint, LongColUnaryMinus(col 0:tinyint) -> 18:tinyint, LongColAddLongScalar(col 19:int, val 17)(children: LongColUnaryMinus(col 0:tinyint) -> 19:tinyint) -> 20:int, LongColMultiplyLongColumn(col 3:bigint, col 21:bigint)(children: LongColUnaryMinus(col 1:smallint) -> 21:smallint) -> 22:bigint, LongColModuloLongColumn(col 2:int, col 1:int)(children: col 1:smallint) -> 23:int, LongColUnaryMinus(col 0:tinyint) -> 24:tinyint, LongColModuloLongColumn(col 25:tinyint, col 0:tinyint)(children: LongColUnaryMinus(col 0:tinyint) -> 25:tinyint) -> 26:tinyint - Statistics: Num rows: 11033 Data size: 1865892 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11033 Data size: 1754052 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: boolean), _col1 (type: bigint), _col2 (type: smallint), _col3 (type: tinyint), _col4 (type: timestamp), _col5 (type: string), _col6 (type: bigint), _col7 (type: int), _col8 (type: smallint), _col9 (type: tinyint), _col10 (type: int), _col11 (type: bigint), _col12 (type: int), _col13 (type: tinyint), _col14 (type: tinyint) sort order: +++++++++++++++ @@ -324,7 +324,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 11033 Data size: 1865892 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11033 Data size: 1754052 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -344,13 +344,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: boolean), KEY.reducesinkkey1 (type: bigint), KEY.reducesinkkey2 (type: smallint), KEY.reducesinkkey3 (type: tinyint), KEY.reducesinkkey4 (type: timestamp), KEY.reducesinkkey5 (type: string), KEY.reducesinkkey6 (type: bigint), KEY.reducesinkkey7 (type: int), KEY.reducesinkkey8 (type: smallint), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey10 (type: int), KEY.reducesinkkey11 (type: bigint), KEY.reducesinkkey12 (type: int), KEY.reducesinkkey9 (type: tinyint), KEY.reducesinkkey14 (type: tinyint) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14 - Statistics: Num rows: 11033 Data size: 1865892 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 11033 Data size: 1754052 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 25 - Statistics: Num rows: 25 Data size: 4380 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4180 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 25 Data size: 4380 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 25 Data size: 4180 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/vectorization_8.q.out b/ql/src/test/results/clientpositive/vectorization_8.q.out index 847064bc2a..d5f635f81b 100644 --- a/ql/src/test/results/clientpositive/vectorization_8.q.out +++ b/ql/src/test/results/clientpositive/vectorization_8.q.out @@ -67,7 +67,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: ((cfloat < -6432.0) or ((UDFToDouble(ctimestamp1) <= 10.0D) and (UDFToDouble(ctimestamp2) <> 16.0D) and cstring2 is not null) or ((cdouble = 988888.0D) and cboolean1 is not null)) (type: boolean) - Statistics: Num rows: 12288 Data size: 2983078 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2733998 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -77,7 +77,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterDoubleColLessDoubleScalar(col 4:float, val -6432.0), FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleScalar(col 13:double, val 10.0)(children: CastTimestampToDouble(col 8:timestamp) -> 13:double), FilterDoubleColNotEqualDoubleScalar(col 14:double, val 16.0)(children: CastTimestampToDouble(col 9:timestamp) -> 14:double), SelectColumnIsNotNull(col 7:string)), FilterExprAndExpr(children: FilterDoubleColEqualDoubleScalar(col 5:double, val 988888.0), SelectColumnIsNotNull(col 10:boolean))) predicate: ((cfloat < -6432.0) or ((UDFToDouble(ctimestamp1) <= 10.0D) and (UDFToDouble(ctimestamp2) <> 16.0D) and cstring2 is not null) or ((cdouble = 988888.0D) and cboolean1 is not null)) (type: boolean) - Statistics: Num rows: 3059 Data size: 742850 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3059 Data size: 680930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15D - cdouble) (type: double), (cdouble * -257.0D) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - cfloat) (type: float), (- cfloat) (type: float), ((-5638.15D - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 @@ -86,7 +86,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [8, 5, 10, 6, 4, 15, 16, 17, 19, 22, 23, 24, 25, 29] selectExpressions: DoubleColUnaryMinus(col 5:double) -> 15:double, DoubleScalarSubtractDoubleColumn(val -5638.15, col 5:double) -> 16:double, DoubleColMultiplyDoubleScalar(col 5:double, val -257.0) -> 17:double, DoubleColAddDoubleColumn(col 18:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 18:float) -> 19:float, DoubleColAddDoubleColumn(col 20:double, col 21:double)(children: DoubleColUnaryMinus(col 5:double) -> 20:double, CastLongToDouble(col 3:bigint) -> 21:double) -> 22:double, DoubleColUnaryMinus(col 5:double) -> 23:double, DoubleScalarSubtractDoubleColumn(val -1.3890000581741333, col 4:float) -> 24:float, DoubleColUnaryMinus(col 4:float) -> 25:float, DoubleColAddDoubleColumn(col 26:double, col 28:double)(children: DoubleScalarSubtractDoubleColumn(val -5638.15, col 5:double) -> 26:double, DoubleColAddDoubleColumn(col 27:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 27:float) -> 28:float) -> 29:double - Statistics: Num rows: 3059 Data size: 557250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3059 Data size: 526290 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: float), _col13 (type: double) sort order: ++++++++++++++ @@ -95,7 +95,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 3059 Data size: 557250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3059 Data size: 526290 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -121,13 +121,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: boolean), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: float), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: float), KEY.reducesinkkey13 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 3059 Data size: 557250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3059 Data size: 526290 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -280,7 +280,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: ((cfloat < -6432.0) or ((UDFToDouble(ctimestamp1) <= 12.503D) and (UDFToDouble(ctimestamp2) <> 11.998D) and cstring2 is not null) or ((cdouble = 988888.0D) and cboolean1 is not null)) (type: boolean) - Statistics: Num rows: 12288 Data size: 2983078 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2733998 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -289,7 +289,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprOrExpr(children: FilterDoubleColLessDoubleScalar(col 4:float, val -6432.0), FilterExprAndExpr(children: FilterDoubleColLessEqualDoubleScalar(col 13:double, val 12.503)(children: CastTimestampToDouble(col 8:timestamp) -> 13:double), FilterDoubleColNotEqualDoubleScalar(col 14:double, val 11.998)(children: CastTimestampToDouble(col 9:timestamp) -> 14:double), SelectColumnIsNotNull(col 7:string)), FilterExprAndExpr(children: FilterDoubleColEqualDoubleScalar(col 5:double, val 988888.0), SelectColumnIsNotNull(col 10:boolean))) predicate: ((cfloat < -6432.0) or ((UDFToDouble(ctimestamp1) <= 12.503D) and (UDFToDouble(ctimestamp2) <> 11.998D) and cstring2 is not null) or ((cdouble = 988888.0D) and cboolean1 is not null)) (type: boolean) - Statistics: Num rows: 3059 Data size: 742850 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3059 Data size: 680930 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: ctimestamp1 (type: timestamp), cdouble (type: double), cboolean1 (type: boolean), cstring1 (type: string), cfloat (type: float), (- cdouble) (type: double), (-5638.15D - cdouble) (type: double), (cdouble * -257.0D) (type: double), (UDFToFloat(cint) + cfloat) (type: float), ((- cdouble) + UDFToDouble(cbigint)) (type: double), (- cdouble) (type: double), (-1.389 - cfloat) (type: float), (- cfloat) (type: float), ((-5638.15D - cdouble) + UDFToDouble((UDFToFloat(cint) + cfloat))) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 @@ -298,7 +298,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [8, 5, 10, 6, 4, 15, 16, 17, 19, 22, 23, 24, 25, 29] selectExpressions: DoubleColUnaryMinus(col 5:double) -> 15:double, DoubleScalarSubtractDoubleColumn(val -5638.15, col 5:double) -> 16:double, DoubleColMultiplyDoubleScalar(col 5:double, val -257.0) -> 17:double, DoubleColAddDoubleColumn(col 18:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 18:float) -> 19:float, DoubleColAddDoubleColumn(col 20:double, col 21:double)(children: DoubleColUnaryMinus(col 5:double) -> 20:double, CastLongToDouble(col 3:bigint) -> 21:double) -> 22:double, DoubleColUnaryMinus(col 5:double) -> 23:double, DoubleScalarSubtractDoubleColumn(val -1.3890000581741333, col 4:float) -> 24:float, DoubleColUnaryMinus(col 4:float) -> 25:float, DoubleColAddDoubleColumn(col 26:double, col 28:double)(children: DoubleScalarSubtractDoubleColumn(val -5638.15, col 5:double) -> 26:double, DoubleColAddDoubleColumn(col 27:float, col 4:float)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 27:float) -> 28:float) -> 29:double - Statistics: Num rows: 3059 Data size: 557250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3059 Data size: 526290 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: timestamp), _col1 (type: double), _col2 (type: boolean), _col3 (type: string), _col4 (type: float), _col5 (type: double), _col6 (type: double), _col7 (type: double), _col8 (type: float), _col9 (type: double), _col10 (type: double), _col11 (type: float), _col12 (type: float), _col13 (type: double) sort order: ++++++++++++++ @@ -307,7 +307,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 3059 Data size: 557250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3059 Data size: 526290 Basic stats: COMPLETE Column stats: COMPLETE TopN Hash Memory Usage: 0.1 Execution mode: vectorized Map Vectorization: @@ -327,13 +327,13 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: timestamp), KEY.reducesinkkey1 (type: double), KEY.reducesinkkey2 (type: boolean), KEY.reducesinkkey3 (type: string), KEY.reducesinkkey4 (type: float), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey6 (type: double), KEY.reducesinkkey7 (type: double), KEY.reducesinkkey8 (type: float), KEY.reducesinkkey9 (type: double), KEY.reducesinkkey5 (type: double), KEY.reducesinkkey11 (type: float), KEY.reducesinkkey12 (type: float), KEY.reducesinkkey13 (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13 - Statistics: Num rows: 3059 Data size: 557250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3059 Data size: 526290 Basic stats: COMPLETE Column stats: COMPLETE Limit Number of rows: 20 - Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3600 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 20 Data size: 3760 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 20 Data size: 3600 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/vectorization_9.q.out b/ql/src/test/results/clientpositive/vectorization_9.q.out index 2dfcc775ad..9c782f230b 100644 --- a/ql/src/test/results/clientpositive/vectorization_9.q.out +++ b/ql/src/test/results/clientpositive/vectorization_9.q.out @@ -57,7 +57,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: ((cstring2 like '%b%') and ((cdouble >= -1.389D) or (cstring1 < 'a'))) (type: boolean) - Statistics: Num rows: 12288 Data size: 2308074 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 2183514 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -67,7 +67,7 @@ STAGE PLANS: native: true predicateExpression: FilterExprAndExpr(children: FilterStringColLikeStringScalar(col 7:string, pattern %b%), FilterExprOrExpr(children: FilterDoubleColGreaterEqualDoubleScalar(col 5:double, val -1.389), FilterStringGroupColLessStringScalar(col 6:string, val a))) predicate: ((cstring2 like '%b%') and ((cdouble >= -1.389D) or (cstring1 < 'a'))) (type: boolean) - Statistics: Num rows: 6144 Data size: 1154088 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 1091808 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: cstring1 (type: string), cdouble (type: double), ctimestamp1 (type: timestamp), (cdouble * cdouble) (type: double) outputColumnNames: _col0, _col1, _col2, _col3 @@ -76,7 +76,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [6, 5, 8, 13] selectExpressions: DoubleColMultiplyDoubleColumn(col 5:double, col 5:double) -> 13:double - Statistics: Num rows: 6144 Data size: 1154088 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 1091808 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: count(_col1), sum(_col3), sum(_col1), min(_col1) Group By Vectorization: @@ -91,7 +91,7 @@ STAGE PLANS: minReductionHashAggr: 0.99 mode: hash outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 3072 Data size: 455172 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3072 Data size: 424052 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp) sort order: +++ @@ -101,7 +101,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 3072 Data size: 455172 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3072 Data size: 424052 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: double) Execution mode: vectorized Map Vectorization: @@ -129,14 +129,14 @@ STAGE PLANS: keys: KEY._col0 (type: string), KEY._col1 (type: double), KEY._col2 (type: timestamp) mode: mergepartial outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 - Statistics: Num rows: 1536 Data size: 227586 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3072 Data size: 424052 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: _col0 (type: string), _col1 (type: double), _col2 (type: timestamp), (_col1 - 9763215.5639D) (type: double), (- (_col1 - 9763215.5639D)) (type: double), _col3 (type: bigint), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double), (- power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5)) (type: double), (power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) * UDFToDouble(_col3)) (type: double), _col6 (type: double), (9763215.5639D / _col1) (type: double), (CAST( _col3 AS decimal(19,0)) / -1.389) (type: decimal(28,6)), power(((_col4 - ((_col5 * _col5) / _col3)) / CASE WHEN ((_col3 = 1L)) THEN (null) ELSE ((_col3 - 1)) END), 0.5) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12 - Statistics: Num rows: 1536 Data size: 461058 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3072 Data size: 890996 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 1536 Data size: 461058 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 3072 Data size: 890996 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/vectorized_casts.q.out b/ql/src/test/results/clientpositive/vectorized_casts.q.out index 4e991dd2a1..13bbb934f7 100644 --- a/ql/src/test/results/clientpositive/vectorized_casts.q.out +++ b/ql/src/test/results/clientpositive/vectorized_casts.q.out @@ -167,7 +167,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: ((cbigint % 250L) = 0L) (type: boolean) - Statistics: Num rows: 12288 Data size: 1684250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 1559690 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true vectorizationSchemaColumns: [0:ctinyint:tinyint, 1:csmallint:smallint, 2:cint:int, 3:cbigint:bigint, 4:cfloat:float, 5:cdouble:double, 6:cstring1:string, 7:cstring2:string, 8:ctimestamp1:timestamp, 9:ctimestamp2:timestamp, 10:cboolean1:boolean, 11:cboolean2:boolean, 12:ROW__ID:struct] @@ -177,7 +177,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColEqualLongScalar(col 13:bigint, val 0)(children: LongColModuloLongScalar(col 3:bigint, val 250) -> 13:bigint) predicate: ((cbigint % 250L) = 0L) (type: boolean) - Statistics: Num rows: 6144 Data size: 842180 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 779900 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: UDFToBoolean(ctinyint) (type: boolean), UDFToBoolean(csmallint) (type: boolean), UDFToBoolean(cint) (type: boolean), UDFToBoolean(cbigint) (type: boolean), UDFToBoolean(cfloat) (type: boolean), UDFToBoolean(cdouble) (type: boolean), cboolean1 (type: boolean), ((cbigint * 0L) <> 0L) (type: boolean), UDFToBoolean(ctimestamp1) (type: boolean), UDFToBoolean(cstring1) (type: boolean), UDFToInteger(ctinyint) (type: int), UDFToInteger(csmallint) (type: int), cint (type: int), UDFToInteger(cbigint) (type: int), UDFToInteger(cfloat) (type: int), UDFToInteger(cdouble) (type: int), UDFToInteger(cboolean1) (type: int), UDFToInteger(ctimestamp1) (type: int), UDFToInteger(cstring1) (type: int), UDFToInteger(substr(cstring1, 1, 1)) (type: int), UDFToByte(cfloat) (type: tinyint), UDFToShort(cfloat) (type: smallint), UDFToLong(cfloat) (type: bigint), UDFToDouble(ctinyint) (type: double), UDFToDouble(csmallint) (type: double), UDFToDouble(cint) (type: double), UDFToDouble(cbigint) (type: double), UDFToDouble(cfloat) (type: double), cdouble (type: double), UDFToDouble(cboolean1) (type: double), UDFToDouble(ctimestamp1) (type: double), UDFToDouble(cstring1) (type: double), UDFToDouble(substr(cstring1, 1, 1)) (type: double), UDFToFloat(cint) (type: float), UDFToFloat(cdouble) (type: float), CAST( ctinyint AS TIMESTAMP) (type: timestamp), CAST( csmallint AS TIMESTAMP) (type: timestamp), CAST( cint AS TIMESTAMP) (type: timestamp), CAST( cbigint AS TIMESTAMP) (type: timestamp), CAST( cfloat AS TIMESTAMP) (type: timestamp), CAST( cdouble AS TIMESTAMP) (type: timestamp), CAST( cboolean1 AS TIMESTAMP) (type: timestamp), CAST( (cbigint * 0L) AS TIMESTAMP) (type: timestamp), CAST( CAST( ctimestamp1 AS DATE) AS TIMESTAMP) (type: timestamp), ctimestamp1 (type: timestamp), CAST( cstring1 AS TIMESTAMP) (type: timestamp), CAST( substr(cstring1, 1, 1) AS TIMESTAMP) (type: timestamp), CAST( ctinyint AS STRING) (type: string), CAST( csmallint AS STRING) (type: string), CAST( cint AS STRING) (type: string), CAST( cbigint AS STRING) (type: string), CAST( cfloat AS STRING) (type: string), CAST( cdouble AS STRING) (type: string), CAST( cboolean1 AS STRING) (type: string), CAST( (cbigint * 0L) AS STRING) (type: string), CAST( ctimestamp1 AS STRING) (type: string), cstring1 (type: string), CAST( CAST( cstring1 AS CHAR(10)) AS STRING) (type: string), CAST( CAST( cstring1 AS varchar(10)) AS STRING) (type: string), UDFToFloat(UDFToInteger(cfloat)) (type: float), UDFToDouble((cint * 2)) (type: double), CAST( sin(cfloat) AS STRING) (type: string), (UDFToDouble(UDFToFloat(cint)) + UDFToDouble(cboolean1)) (type: double) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23, _col24, _col25, _col26, _col27, _col28, _col29, _col30, _col31, _col32, _col33, _col34, _col35, _col36, _col37, _col38, _col39, _col40, _col41, _col42, _col43, _col44, _col45, _col46, _col47, _col48, _col49, _col50, _col51, _col52, _col53, _col54, _col55, _col56, _col57, _col58, _col59, _col60, _col61, _col62 @@ -186,13 +186,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [14, 15, 16, 17, 18, 19, 10, 21, 22, 23, 0, 1, 2, 3, 24, 25, 10, 26, 27, 29, 30, 31, 32, 33, 34, 35, 36, 4, 5, 37, 38, 39, 41, 42, 5, 44, 46, 48, 50, 51, 52, 54, 58, 60, 8, 61, 63, 64, 65, 66, 67, 68, 69, 70, 72, 73, 6, 74, 75, 77, 79, 81, 84] selectExpressions: CastLongToBooleanViaLongToLong(col 0:tinyint) -> 14:boolean, CastLongToBooleanViaLongToLong(col 1:smallint) -> 15:boolean, CastLongToBooleanViaLongToLong(col 2:int) -> 16:boolean, CastLongToBooleanViaLongToLong(col 3:bigint) -> 17:boolean, CastDoubleToBooleanViaDoubleToLong(col 4:float) -> 18:boolean, CastDoubleToBooleanViaDoubleToLong(col 5:double) -> 19:boolean, LongColNotEqualLongScalar(col 20:bigint, val 0)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 20:bigint) -> 21:boolean, CastTimestampToBoolean(col 8:timestamp) -> 22:boolean, CastStringToBoolean(col 6) -> 23:boolean, CastDoubleToLong(col 4:float) -> 24:int, CastDoubleToLong(col 5:double) -> 25:int, CastTimestampToLong(col 8:timestamp) -> 26:int, CastStringToLong(col 6:string) -> 27:int, CastStringToLong(col 28:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 28:string) -> 29:int, CastDoubleToLong(col 4:float) -> 30:tinyint, CastDoubleToLong(col 4:float) -> 31:smallint, CastDoubleToLong(col 4:float) -> 32:bigint, CastLongToDouble(col 0:tinyint) -> 33:double, CastLongToDouble(col 1:smallint) -> 34:double, CastLongToDouble(col 2:int) -> 35:double, CastLongToDouble(col 3:bigint) -> 36:double, CastLongToDouble(col 10:boolean) -> 37:double, CastTimestampToDouble(col 8:timestamp) -> 38:double, CastStringToDouble(col 6:string) -> 39:double, CastStringToDouble(col 40:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 40:string) -> 41:double, CastLongToFloatViaLongToDouble(col 2:int) -> 42:float, CastMillisecondsLongToTimestamp(col 0:tinyint) -> 44:timestamp, CastMillisecondsLongToTimestamp(col 1:smallint) -> 46:timestamp, CastMillisecondsLongToTimestamp(col 2:int) -> 48:timestamp, CastMillisecondsLongToTimestamp(col 3:bigint) -> 50:timestamp, CastDoubleToTimestamp(col 4:float) -> 51:timestamp, CastDoubleToTimestamp(col 5:double) -> 52:timestamp, CastMillisecondsLongToTimestamp(col 10:boolean) -> 54:timestamp, CastMillisecondsLongToTimestamp(col 57:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 57:bigint) -> 58:timestamp, CastDateToTimestamp(col 59:date)(children: CastTimestampToDate(col 8:timestamp) -> 59:date) -> 60:timestamp, CastStringToTimestamp(col 6:string) -> 61:timestamp, CastStringToTimestamp(col 62:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 62:string) -> 63:timestamp, CastLongToString(col 0:tinyint) -> 64:string, CastLongToString(col 1:smallint) -> 65:string, CastLongToString(col 2:int) -> 66:string, CastLongToString(col 3:bigint) -> 67:string, CastFloatToString(col 4:float) -> 68:string, CastDoubleToString(col 5:double) -> 69:string, CastBooleanToStringViaLongToString(col 10:boolean) -> 70:string, CastLongToString(col 71:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 71:bigint) -> 72:string, CastTimestampToString(col 8:timestamp) -> 73:string, CastStringGroupToChar(col 6:string, maxLength 10) -> 74:char(10), CastStringGroupToVarChar(col 6:string, maxLength 10) -> 75:varchar(10), CastLongToFloatViaLongToDouble(col 76:int)(children: CastDoubleToLong(col 4:float) -> 76:int) -> 77:float, CastLongToDouble(col 78:int)(children: LongColMultiplyLongScalar(col 2:int, val 2) -> 78:int) -> 79:double, CastDoubleToString(col 80:double)(children: FuncSinDoubleToDouble(col 4:float) -> 80:double) -> 81:string, DoubleColAddDoubleColumn(col 82:double, col 83:double)(children: CastLongToFloatViaLongToDouble(col 2:int) -> 82:float, CastLongToDouble(col 10:boolean) -> 83:double) -> 84:double - Statistics: Num rows: 6144 Data size: 16362860 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 16014092 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6144 Data size: 16362860 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 16014092 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out b/ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out index 84c4255ea4..5c1fe268a5 100644 --- a/ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out +++ b/ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out @@ -257,7 +257,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 52 Data size: 4276 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 3956 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -268,7 +268,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 0, 1, 3, 13, 14, 15, 16, 17] selectExpressions: VectorUDFUnixTimeStampTimestamp(col 1:timestamp) -> 5:bigint, VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 6:int, VectorUDFMonthTimestamp(col 1:timestamp, field MONTH) -> 7:int, VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 8:int, VectorUDFWeekOfYearTimestamp(col 1:timestamp, field WEEK_OF_YEAR) -> 9:int, VectorUDFHourTimestamp(col 1:timestamp, field HOUR_OF_DAY) -> 10:int, VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 11:int, VectorUDFSecondTimestamp(col 1:timestamp, field SECOND) -> 12:int, IfExprTimestampColumnScalar(col 0:boolean, col 1:timestamp, val 1319-01-25 08:31:57.778) -> 13:timestamp, IfExprTimestampScalarColumn(col 0:boolean, val 2000-12-18 00:42:30.0005, col 1:timestamp) -> 14:timestamp, IfExprTimestampColumnColumn(col 0:boolean, col 1:timestampcol 3:timestamp) -> 15:timestamp, IfExprColumnNull(col 0:boolean, col 1:timestamp, null)(children: col 0:boolean, col 1:timestamp) -> 16:timestamp, IfExprNullColumn(col 0:boolean, null, col 3)(children: col 0:boolean, col 3:timestamp) -> 17:timestamp - Statistics: Num rows: 52 Data size: 16756 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 16436 Basic stats: COMPLETE Column stats: COMPLETE Reduce Output Operator key expressions: _col0 (type: bigint) sort order: + @@ -277,7 +277,7 @@ STAGE PLANS: native: false nativeConditionsMet: hive.vectorized.execution.reducesink.new.enabled IS true, No PTF TopN IS true, No DISTINCT columns IS true, BinarySortableSerDe for keys IS true, LazyBinarySerDe for values IS true nativeConditionsNotMet: hive.execution.engine mr IN [tez, spark] IS false - Statistics: Num rows: 52 Data size: 16756 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 16436 Basic stats: COMPLETE Column stats: COMPLETE value expressions: _col1 (type: int), _col2 (type: int), _col3 (type: int), _col5 (type: int), _col6 (type: int), _col7 (type: int), _col8 (type: int), _col9 (type: boolean), _col10 (type: timestamp), _col11 (type: timestamp), _col12 (type: timestamp), _col13 (type: timestamp), _col14 (type: timestamp), _col15 (type: timestamp), _col16 (type: timestamp) Execution mode: vectorized Map Vectorization: @@ -297,10 +297,10 @@ STAGE PLANS: Select Operator expressions: KEY.reducesinkkey0 (type: bigint), VALUE._col0 (type: int), VALUE._col1 (type: int), VALUE._col2 (type: int), VALUE._col2 (type: int), VALUE._col3 (type: int), VALUE._col4 (type: int), VALUE._col5 (type: int), VALUE._col6 (type: int), VALUE._col7 (type: boolean), VALUE._col8 (type: timestamp), VALUE._col9 (type: timestamp), VALUE._col10 (type: timestamp), VALUE._col11 (type: timestamp), VALUE._col12 (type: timestamp), VALUE._col13 (type: timestamp), VALUE._col14 (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, _col16 - Statistics: Num rows: 52 Data size: 16756 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 16436 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false - Statistics: Num rows: 52 Data size: 16756 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 16436 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -635,7 +635,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 52 Data size: 7617 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 7497 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -936,7 +936,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 52 Data size: 2080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 1960 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -946,7 +946,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1] - Statistics: Num rows: 52 Data size: 2080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 1960 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: min(ctimestamp1), max(ctimestamp1), count(ctimestamp1), count() Group By Vectorization: @@ -1048,7 +1048,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 52 Data size: 2080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 1960 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -1058,7 +1058,7 @@ STAGE PLANS: className: VectorSelectOperator native: true projectedOutputColumnNums: [1] - Statistics: Num rows: 52 Data size: 2080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 1960 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(ctimestamp1) Group By Vectorization: @@ -1172,7 +1172,7 @@ STAGE PLANS: Map Operator Tree: TableScan alias: alltypesorc_string - Statistics: Num rows: 52 Data size: 2080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 1960 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Select Operator @@ -1183,7 +1183,7 @@ STAGE PLANS: native: true projectedOutputColumnNums: [1, 5, 8] selectExpressions: CastTimestampToDouble(col 1:timestamp) -> 5:double, DoubleColMultiplyDoubleColumn(col 6:double, col 7:double)(children: CastTimestampToDouble(col 1:timestamp) -> 6:double, CastTimestampToDouble(col 1:timestamp) -> 7:double) -> 8:double - Statistics: Num rows: 52 Data size: 2080 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 52 Data size: 1960 Basic stats: COMPLETE Column stats: COMPLETE Group By Operator aggregations: sum(_col0), count(_col0), sum(_col2), sum(_col1) Group By Vectorization: diff --git a/ql/src/test/results/clientpositive/vectorized_timestamp_ints_casts.q.out b/ql/src/test/results/clientpositive/vectorized_timestamp_ints_casts.q.out index e9dbd881ef..f6057abb4d 100644 --- a/ql/src/test/results/clientpositive/vectorized_timestamp_ints_casts.q.out +++ b/ql/src/test/results/clientpositive/vectorized_timestamp_ints_casts.q.out @@ -53,7 +53,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: ((cbigint % 250L) = 0L) (type: boolean) - Statistics: Num rows: 12288 Data size: 1684250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 1559690 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -62,7 +62,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColEqualLongScalar(col 13:bigint, val 0)(children: LongColModuloLongScalar(col 3:bigint, val 250) -> 13:bigint) predicate: ((cbigint % 250L) = 0L) (type: boolean) - Statistics: Num rows: 6144 Data size: 842180 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 779900 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: CAST( ctinyint AS TIMESTAMP) (type: timestamp), CAST( csmallint AS TIMESTAMP) (type: timestamp), CAST( cint AS TIMESTAMP) (type: timestamp), CAST( cbigint AS TIMESTAMP) (type: timestamp), CAST( cfloat AS TIMESTAMP) (type: timestamp), CAST( cdouble AS TIMESTAMP) (type: timestamp), CAST( cboolean1 AS TIMESTAMP) (type: timestamp), CAST( (cbigint * 0L) AS TIMESTAMP) (type: timestamp), ctimestamp1 (type: timestamp), CAST( cstring1 AS TIMESTAMP) (type: timestamp), CAST( substr(cstring1, 1, 1) AS TIMESTAMP) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -71,13 +71,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [15, 17, 19, 21, 22, 23, 25, 29, 8, 30, 32] selectExpressions: CastMillisecondsLongToTimestamp(col 0:tinyint) -> 15:timestamp, CastMillisecondsLongToTimestamp(col 1:smallint) -> 17:timestamp, CastMillisecondsLongToTimestamp(col 2:int) -> 19:timestamp, CastMillisecondsLongToTimestamp(col 3:bigint) -> 21:timestamp, CastDoubleToTimestamp(col 4:float) -> 22:timestamp, CastDoubleToTimestamp(col 5:double) -> 23:timestamp, CastMillisecondsLongToTimestamp(col 10:boolean) -> 25:timestamp, CastMillisecondsLongToTimestamp(col 28:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 28:bigint) -> 29:timestamp, CastStringToTimestamp(col 6:string) -> 30:timestamp, CastStringToTimestamp(col 31:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 31:string) -> 32:timestamp - Statistics: Num rows: 6144 Data size: 2703360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 2641080 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6144 Data size: 2703360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 2641080 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat @@ -218,7 +218,7 @@ STAGE PLANS: TableScan alias: alltypesorc filterExpr: ((cbigint % 250L) = 0L) (type: boolean) - Statistics: Num rows: 12288 Data size: 1684250 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 12288 Data size: 1559690 Basic stats: COMPLETE Column stats: COMPLETE TableScan Vectorization: native: true Filter Operator @@ -227,7 +227,7 @@ STAGE PLANS: native: true predicateExpression: FilterLongColEqualLongScalar(col 13:bigint, val 0)(children: LongColModuloLongScalar(col 3:bigint, val 250) -> 13:bigint) predicate: ((cbigint % 250L) = 0L) (type: boolean) - Statistics: Num rows: 6144 Data size: 842180 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 779900 Basic stats: COMPLETE Column stats: COMPLETE Select Operator expressions: CAST( ctinyint AS TIMESTAMP) (type: timestamp), CAST( csmallint AS TIMESTAMP) (type: timestamp), CAST( cint AS TIMESTAMP) (type: timestamp), CAST( cbigint AS TIMESTAMP) (type: timestamp), CAST( cfloat AS TIMESTAMP) (type: timestamp), CAST( cdouble AS TIMESTAMP) (type: timestamp), CAST( cboolean1 AS TIMESTAMP) (type: timestamp), CAST( (cbigint * 0L) AS TIMESTAMP) (type: timestamp), ctimestamp1 (type: timestamp), CAST( cstring1 AS TIMESTAMP) (type: timestamp), CAST( substr(cstring1, 1, 1) AS TIMESTAMP) (type: timestamp) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 @@ -236,13 +236,13 @@ STAGE PLANS: native: true projectedOutputColumnNums: [14, 15, 16, 17, 18, 19, 20, 22, 8, 23, 25] selectExpressions: CastLongToTimestamp(col 0:tinyint) -> 14:timestamp, CastLongToTimestamp(col 1:smallint) -> 15:timestamp, CastLongToTimestamp(col 2:int) -> 16:timestamp, CastLongToTimestamp(col 3:bigint) -> 17:timestamp, CastDoubleToTimestamp(col 4:float) -> 18:timestamp, CastDoubleToTimestamp(col 5:double) -> 19:timestamp, CastLongToTimestamp(col 10:boolean) -> 20:timestamp, CastLongToTimestamp(col 21:bigint)(children: LongColMultiplyLongScalar(col 3:bigint, val 0) -> 21:bigint) -> 22:timestamp, CastStringToTimestamp(col 6:string) -> 23:timestamp, CastStringToTimestamp(col 24:string)(children: StringSubstrColStartLen(col 6:string, start 0, length 1) -> 24:string) -> 25:timestamp - Statistics: Num rows: 6144 Data size: 2703360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 2641080 Basic stats: COMPLETE Column stats: COMPLETE File Output Operator compressed: false File Sink Vectorization: className: VectorFileSinkOperator native: false - Statistics: Num rows: 6144 Data size: 2703360 Basic stats: COMPLETE Column stats: COMPLETE + Statistics: Num rows: 6144 Data size: 2641080 Basic stats: COMPLETE Column stats: COMPLETE table: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/ColumnStatisticsData.java b/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/ColumnStatisticsData.java index 9a2e4f4d56..a92a86758d 100644 --- a/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/ColumnStatisticsData.java +++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/ColumnStatisticsData.java @@ -43,6 +43,7 @@ private static final org.apache.thrift.protocol.TField BINARY_STATS_FIELD_DESC = new org.apache.thrift.protocol.TField("binaryStats", org.apache.thrift.protocol.TType.STRUCT, (short)5); private static final org.apache.thrift.protocol.TField DECIMAL_STATS_FIELD_DESC = new org.apache.thrift.protocol.TField("decimalStats", org.apache.thrift.protocol.TType.STRUCT, (short)6); private static final org.apache.thrift.protocol.TField DATE_STATS_FIELD_DESC = new org.apache.thrift.protocol.TField("dateStats", org.apache.thrift.protocol.TType.STRUCT, (short)7); + private static final org.apache.thrift.protocol.TField TIMESTAMP_STATS_FIELD_DESC = new org.apache.thrift.protocol.TField("timestampStats", org.apache.thrift.protocol.TType.STRUCT, (short)8); /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ public enum _Fields implements org.apache.thrift.TFieldIdEnum { @@ -52,7 +53,8 @@ STRING_STATS((short)4, "stringStats"), BINARY_STATS((short)5, "binaryStats"), DECIMAL_STATS((short)6, "decimalStats"), - DATE_STATS((short)7, "dateStats"); + DATE_STATS((short)7, "dateStats"), + TIMESTAMP_STATS((short)8, "timestampStats"); private static final Map byName = new HashMap(); @@ -81,6 +83,8 @@ public static _Fields findByThriftId(int fieldId) { return DECIMAL_STATS; case 7: // DATE_STATS return DATE_STATS; + case 8: // TIMESTAMP_STATS + return TIMESTAMP_STATS; default: return null; } @@ -137,6 +141,8 @@ public String getFieldName() { new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, DecimalColumnStatsData.class))); tmpMap.put(_Fields.DATE_STATS, new org.apache.thrift.meta_data.FieldMetaData("dateStats", org.apache.thrift.TFieldRequirementType.DEFAULT, new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, DateColumnStatsData.class))); + tmpMap.put(_Fields.TIMESTAMP_STATS, new org.apache.thrift.meta_data.FieldMetaData("timestampStats", org.apache.thrift.TFieldRequirementType.DEFAULT, + new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, TimestampColumnStatsData.class))); metaDataMap = Collections.unmodifiableMap(tmpMap); org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(ColumnStatisticsData.class, metaDataMap); } @@ -198,6 +204,12 @@ public static ColumnStatisticsData dateStats(DateColumnStatsData value) { return x; } + public static ColumnStatisticsData timestampStats(TimestampColumnStatsData value) { + ColumnStatisticsData x = new ColumnStatisticsData(); + x.setTimestampStats(value); + return x; + } + @Override protected void checkType(_Fields setField, Object value) throws ClassCastException { @@ -237,6 +249,11 @@ protected void checkType(_Fields setField, Object value) throws ClassCastExcepti break; } throw new ClassCastException("Was expecting value of type DateColumnStatsData for field 'dateStats', but got " + value.getClass().getSimpleName()); + case TIMESTAMP_STATS: + if (value instanceof TimestampColumnStatsData) { + break; + } + throw new ClassCastException("Was expecting value of type TimestampColumnStatsData for field 'timestampStats', but got " + value.getClass().getSimpleName()); default: throw new IllegalArgumentException("Unknown field id " + setField); } @@ -317,6 +334,16 @@ protected Object standardSchemeReadValue(org.apache.thrift.protocol.TProtocol ip org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); return null; } + case TIMESTAMP_STATS: + if (field.type == TIMESTAMP_STATS_FIELD_DESC.type) { + TimestampColumnStatsData timestampStats; + timestampStats = new TimestampColumnStatsData(); + timestampStats.read(iprot); + return timestampStats; + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, field.type); + return null; + } default: throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!"); } @@ -357,6 +384,10 @@ protected void standardSchemeWriteValue(org.apache.thrift.protocol.TProtocol opr DateColumnStatsData dateStats = (DateColumnStatsData)value_; dateStats.write(oprot); return; + case TIMESTAMP_STATS: + TimestampColumnStatsData timestampStats = (TimestampColumnStatsData)value_; + timestampStats.write(oprot); + return; default: throw new IllegalStateException("Cannot write union with unknown field " + setField_); } @@ -402,6 +433,11 @@ protected Object tupleSchemeReadValue(org.apache.thrift.protocol.TProtocol iprot dateStats = new DateColumnStatsData(); dateStats.read(iprot); return dateStats; + case TIMESTAMP_STATS: + TimestampColumnStatsData timestampStats; + timestampStats = new TimestampColumnStatsData(); + timestampStats.read(iprot); + return timestampStats; default: throw new IllegalStateException("setField wasn't null, but didn't match any of the case statements!"); } @@ -441,6 +477,10 @@ protected void tupleSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) DateColumnStatsData dateStats = (DateColumnStatsData)value_; dateStats.write(oprot); return; + case TIMESTAMP_STATS: + TimestampColumnStatsData timestampStats = (TimestampColumnStatsData)value_; + timestampStats.write(oprot); + return; default: throw new IllegalStateException("Cannot write union with unknown field " + setField_); } @@ -463,6 +503,8 @@ protected void tupleSchemeWriteValue(org.apache.thrift.protocol.TProtocol oprot) return DECIMAL_STATS_FIELD_DESC; case DATE_STATS: return DATE_STATS_FIELD_DESC; + case TIMESTAMP_STATS: + return TIMESTAMP_STATS_FIELD_DESC; default: throw new IllegalArgumentException("Unknown field id " + setField); } @@ -581,6 +623,20 @@ public void setDateStats(DateColumnStatsData value) { value_ = value; } + public TimestampColumnStatsData getTimestampStats() { + if (getSetField() == _Fields.TIMESTAMP_STATS) { + return (TimestampColumnStatsData)getFieldValue(); + } else { + throw new RuntimeException("Cannot get field 'timestampStats' because union is currently set to " + getFieldDesc(getSetField()).name); + } + } + + public void setTimestampStats(TimestampColumnStatsData value) { + if (value == null) throw new NullPointerException(); + setField_ = _Fields.TIMESTAMP_STATS; + value_ = value; + } + public boolean isSetBooleanStats() { return setField_ == _Fields.BOOLEAN_STATS; } @@ -616,6 +672,11 @@ public boolean isSetDateStats() { } + public boolean isSetTimestampStats() { + return setField_ == _Fields.TIMESTAMP_STATS; + } + + public boolean equals(Object other) { if (other instanceof ColumnStatisticsData) { return equals((ColumnStatisticsData)other); diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/Timestamp.java b/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/Timestamp.java new file mode 100644 index 0000000000..b43eb1ca1b --- /dev/null +++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/Timestamp.java @@ -0,0 +1,387 @@ +/** + * Autogenerated by Thrift Compiler (0.9.3) + * + * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING + * @generated + */ +package org.apache.hadoop.hive.metastore.api; + +import org.apache.thrift.scheme.IScheme; +import org.apache.thrift.scheme.SchemeFactory; +import org.apache.thrift.scheme.StandardScheme; + +import org.apache.thrift.scheme.TupleScheme; +import org.apache.thrift.protocol.TTupleProtocol; +import org.apache.thrift.protocol.TProtocolException; +import org.apache.thrift.EncodingUtils; +import org.apache.thrift.TException; +import org.apache.thrift.async.AsyncMethodCallback; +import org.apache.thrift.server.AbstractNonblockingServer.*; +import java.util.List; +import java.util.ArrayList; +import java.util.Map; +import java.util.HashMap; +import java.util.EnumMap; +import java.util.Set; +import java.util.HashSet; +import java.util.EnumSet; +import java.util.Collections; +import java.util.BitSet; +import java.nio.ByteBuffer; +import java.util.Arrays; +import javax.annotation.Generated; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) +@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") +@org.apache.hadoop.classification.InterfaceAudience.Public @org.apache.hadoop.classification.InterfaceStability.Stable public class Timestamp implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { + private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("Timestamp"); + + private static final org.apache.thrift.protocol.TField SECONDS_SINCE_EPOCH_FIELD_DESC = new org.apache.thrift.protocol.TField("secondsSinceEpoch", org.apache.thrift.protocol.TType.I64, (short)1); + + private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); + static { + schemes.put(StandardScheme.class, new TimestampStandardSchemeFactory()); + schemes.put(TupleScheme.class, new TimestampTupleSchemeFactory()); + } + + private long secondsSinceEpoch; // required + + /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ + public enum _Fields implements org.apache.thrift.TFieldIdEnum { + SECONDS_SINCE_EPOCH((short)1, "secondsSinceEpoch"); + + private static final Map byName = new HashMap(); + + static { + for (_Fields field : EnumSet.allOf(_Fields.class)) { + byName.put(field.getFieldName(), field); + } + } + + /** + * Find the _Fields constant that matches fieldId, or null if its not found. + */ + public static _Fields findByThriftId(int fieldId) { + switch(fieldId) { + case 1: // SECONDS_SINCE_EPOCH + return SECONDS_SINCE_EPOCH; + default: + return null; + } + } + + /** + * Find the _Fields constant that matches fieldId, throwing an exception + * if it is not found. + */ + public static _Fields findByThriftIdOrThrow(int fieldId) { + _Fields fields = findByThriftId(fieldId); + if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); + return fields; + } + + /** + * Find the _Fields constant that matches name, or null if its not found. + */ + public static _Fields findByName(String name) { + return byName.get(name); + } + + private final short _thriftId; + private final String _fieldName; + + _Fields(short thriftId, String fieldName) { + _thriftId = thriftId; + _fieldName = fieldName; + } + + public short getThriftFieldId() { + return _thriftId; + } + + public String getFieldName() { + return _fieldName; + } + } + + // isset id assignments + private static final int __SECONDSSINCEEPOCH_ISSET_ID = 0; + private byte __isset_bitfield = 0; + public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; + static { + Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); + tmpMap.put(_Fields.SECONDS_SINCE_EPOCH, new org.apache.thrift.meta_data.FieldMetaData("secondsSinceEpoch", org.apache.thrift.TFieldRequirementType.REQUIRED, + new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); + metaDataMap = Collections.unmodifiableMap(tmpMap); + org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(Timestamp.class, metaDataMap); + } + + public Timestamp() { + } + + public Timestamp( + long secondsSinceEpoch) + { + this(); + this.secondsSinceEpoch = secondsSinceEpoch; + setSecondsSinceEpochIsSet(true); + } + + /** + * Performs a deep copy on other. + */ + public Timestamp(Timestamp other) { + __isset_bitfield = other.__isset_bitfield; + this.secondsSinceEpoch = other.secondsSinceEpoch; + } + + public Timestamp deepCopy() { + return new Timestamp(this); + } + + @Override + public void clear() { + setSecondsSinceEpochIsSet(false); + this.secondsSinceEpoch = 0; + } + + public long getSecondsSinceEpoch() { + return this.secondsSinceEpoch; + } + + public void setSecondsSinceEpoch(long secondsSinceEpoch) { + this.secondsSinceEpoch = secondsSinceEpoch; + setSecondsSinceEpochIsSet(true); + } + + public void unsetSecondsSinceEpoch() { + __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __SECONDSSINCEEPOCH_ISSET_ID); + } + + /** Returns true if field secondsSinceEpoch is set (has been assigned a value) and false otherwise */ + public boolean isSetSecondsSinceEpoch() { + return EncodingUtils.testBit(__isset_bitfield, __SECONDSSINCEEPOCH_ISSET_ID); + } + + public void setSecondsSinceEpochIsSet(boolean value) { + __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __SECONDSSINCEEPOCH_ISSET_ID, value); + } + + public void setFieldValue(_Fields field, Object value) { + switch (field) { + case SECONDS_SINCE_EPOCH: + if (value == null) { + unsetSecondsSinceEpoch(); + } else { + setSecondsSinceEpoch((Long)value); + } + break; + + } + } + + public Object getFieldValue(_Fields field) { + switch (field) { + case SECONDS_SINCE_EPOCH: + return getSecondsSinceEpoch(); + + } + throw new IllegalStateException(); + } + + /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ + public boolean isSet(_Fields field) { + if (field == null) { + throw new IllegalArgumentException(); + } + + switch (field) { + case SECONDS_SINCE_EPOCH: + return isSetSecondsSinceEpoch(); + } + throw new IllegalStateException(); + } + + @Override + public boolean equals(Object that) { + if (that == null) + return false; + if (that instanceof Timestamp) + return this.equals((Timestamp)that); + return false; + } + + public boolean equals(Timestamp that) { + if (that == null) + return false; + + boolean this_present_secondsSinceEpoch = true; + boolean that_present_secondsSinceEpoch = true; + if (this_present_secondsSinceEpoch || that_present_secondsSinceEpoch) { + if (!(this_present_secondsSinceEpoch && that_present_secondsSinceEpoch)) + return false; + if (this.secondsSinceEpoch != that.secondsSinceEpoch) + return false; + } + + return true; + } + + @Override + public int hashCode() { + List list = new ArrayList(); + + boolean present_secondsSinceEpoch = true; + list.add(present_secondsSinceEpoch); + if (present_secondsSinceEpoch) + list.add(secondsSinceEpoch); + + return list.hashCode(); + } + + @Override + public int compareTo(Timestamp other) { + if (!getClass().equals(other.getClass())) { + return getClass().getName().compareTo(other.getClass().getName()); + } + + int lastComparison = 0; + + lastComparison = Boolean.valueOf(isSetSecondsSinceEpoch()).compareTo(other.isSetSecondsSinceEpoch()); + if (lastComparison != 0) { + return lastComparison; + } + if (isSetSecondsSinceEpoch()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.secondsSinceEpoch, other.secondsSinceEpoch); + if (lastComparison != 0) { + return lastComparison; + } + } + return 0; + } + + public _Fields fieldForId(int fieldId) { + return _Fields.findByThriftId(fieldId); + } + + public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { + schemes.get(iprot.getScheme()).getScheme().read(iprot, this); + } + + public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { + schemes.get(oprot.getScheme()).getScheme().write(oprot, this); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder("Timestamp("); + boolean first = true; + + sb.append("secondsSinceEpoch:"); + sb.append(this.secondsSinceEpoch); + first = false; + sb.append(")"); + return sb.toString(); + } + + public void validate() throws org.apache.thrift.TException { + // check for required fields + if (!isSetSecondsSinceEpoch()) { + throw new org.apache.thrift.protocol.TProtocolException("Required field 'secondsSinceEpoch' is unset! Struct:" + toString()); + } + + // check for sub-struct validity + } + + private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { + try { + write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); + } catch (org.apache.thrift.TException te) { + throw new java.io.IOException(te); + } + } + + private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { + try { + // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. + __isset_bitfield = 0; + read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); + } catch (org.apache.thrift.TException te) { + throw new java.io.IOException(te); + } + } + + private static class TimestampStandardSchemeFactory implements SchemeFactory { + public TimestampStandardScheme getScheme() { + return new TimestampStandardScheme(); + } + } + + private static class TimestampStandardScheme extends StandardScheme { + + public void read(org.apache.thrift.protocol.TProtocol iprot, Timestamp struct) throws org.apache.thrift.TException { + org.apache.thrift.protocol.TField schemeField; + iprot.readStructBegin(); + while (true) + { + schemeField = iprot.readFieldBegin(); + if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { + break; + } + switch (schemeField.id) { + case 1: // SECONDS_SINCE_EPOCH + if (schemeField.type == org.apache.thrift.protocol.TType.I64) { + struct.secondsSinceEpoch = iprot.readI64(); + struct.setSecondsSinceEpochIsSet(true); + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + break; + default: + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + iprot.readFieldEnd(); + } + iprot.readStructEnd(); + struct.validate(); + } + + public void write(org.apache.thrift.protocol.TProtocol oprot, Timestamp struct) throws org.apache.thrift.TException { + struct.validate(); + + oprot.writeStructBegin(STRUCT_DESC); + oprot.writeFieldBegin(SECONDS_SINCE_EPOCH_FIELD_DESC); + oprot.writeI64(struct.secondsSinceEpoch); + oprot.writeFieldEnd(); + oprot.writeFieldStop(); + oprot.writeStructEnd(); + } + + } + + private static class TimestampTupleSchemeFactory implements SchemeFactory { + public TimestampTupleScheme getScheme() { + return new TimestampTupleScheme(); + } + } + + private static class TimestampTupleScheme extends TupleScheme { + + @Override + public void write(org.apache.thrift.protocol.TProtocol prot, Timestamp struct) throws org.apache.thrift.TException { + TTupleProtocol oprot = (TTupleProtocol) prot; + oprot.writeI64(struct.secondsSinceEpoch); + } + + @Override + public void read(org.apache.thrift.protocol.TProtocol prot, Timestamp struct) throws org.apache.thrift.TException { + TTupleProtocol iprot = (TTupleProtocol) prot; + struct.secondsSinceEpoch = iprot.readI64(); + struct.setSecondsSinceEpochIsSet(true); + } + } + +} + diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/TimestampColumnStatsData.java b/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/TimestampColumnStatsData.java new file mode 100644 index 0000000000..3e250f5ff9 --- /dev/null +++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/TimestampColumnStatsData.java @@ -0,0 +1,823 @@ +/** + * Autogenerated by Thrift Compiler (0.9.3) + * + * DO NOT EDIT UNLESS YOU ARE SURE THAT YOU KNOW WHAT YOU ARE DOING + * @generated + */ +package org.apache.hadoop.hive.metastore.api; + +import org.apache.thrift.scheme.IScheme; +import org.apache.thrift.scheme.SchemeFactory; +import org.apache.thrift.scheme.StandardScheme; + +import org.apache.thrift.scheme.TupleScheme; +import org.apache.thrift.protocol.TTupleProtocol; +import org.apache.thrift.protocol.TProtocolException; +import org.apache.thrift.EncodingUtils; +import org.apache.thrift.TException; +import org.apache.thrift.async.AsyncMethodCallback; +import org.apache.thrift.server.AbstractNonblockingServer.*; +import java.util.List; +import java.util.ArrayList; +import java.util.Map; +import java.util.HashMap; +import java.util.EnumMap; +import java.util.Set; +import java.util.HashSet; +import java.util.EnumSet; +import java.util.Collections; +import java.util.BitSet; +import java.nio.ByteBuffer; +import java.util.Arrays; +import javax.annotation.Generated; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +@SuppressWarnings({"cast", "rawtypes", "serial", "unchecked"}) +@Generated(value = "Autogenerated by Thrift Compiler (0.9.3)") +@org.apache.hadoop.classification.InterfaceAudience.Public @org.apache.hadoop.classification.InterfaceStability.Stable public class TimestampColumnStatsData implements org.apache.thrift.TBase, java.io.Serializable, Cloneable, Comparable { + private static final org.apache.thrift.protocol.TStruct STRUCT_DESC = new org.apache.thrift.protocol.TStruct("TimestampColumnStatsData"); + + private static final org.apache.thrift.protocol.TField LOW_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("lowValue", org.apache.thrift.protocol.TType.STRUCT, (short)1); + private static final org.apache.thrift.protocol.TField HIGH_VALUE_FIELD_DESC = new org.apache.thrift.protocol.TField("highValue", org.apache.thrift.protocol.TType.STRUCT, (short)2); + private static final org.apache.thrift.protocol.TField NUM_NULLS_FIELD_DESC = new org.apache.thrift.protocol.TField("numNulls", org.apache.thrift.protocol.TType.I64, (short)3); + private static final org.apache.thrift.protocol.TField NUM_DVS_FIELD_DESC = new org.apache.thrift.protocol.TField("numDVs", org.apache.thrift.protocol.TType.I64, (short)4); + private static final org.apache.thrift.protocol.TField BIT_VECTORS_FIELD_DESC = new org.apache.thrift.protocol.TField("bitVectors", org.apache.thrift.protocol.TType.STRING, (short)5); + + private static final Map, SchemeFactory> schemes = new HashMap, SchemeFactory>(); + static { + schemes.put(StandardScheme.class, new TimestampColumnStatsDataStandardSchemeFactory()); + schemes.put(TupleScheme.class, new TimestampColumnStatsDataTupleSchemeFactory()); + } + + private Timestamp lowValue; // optional + private Timestamp highValue; // optional + private long numNulls; // required + private long numDVs; // required + private ByteBuffer bitVectors; // optional + + /** The set of fields this struct contains, along with convenience methods for finding and manipulating them. */ + public enum _Fields implements org.apache.thrift.TFieldIdEnum { + LOW_VALUE((short)1, "lowValue"), + HIGH_VALUE((short)2, "highValue"), + NUM_NULLS((short)3, "numNulls"), + NUM_DVS((short)4, "numDVs"), + BIT_VECTORS((short)5, "bitVectors"); + + private static final Map byName = new HashMap(); + + static { + for (_Fields field : EnumSet.allOf(_Fields.class)) { + byName.put(field.getFieldName(), field); + } + } + + /** + * Find the _Fields constant that matches fieldId, or null if its not found. + */ + public static _Fields findByThriftId(int fieldId) { + switch(fieldId) { + case 1: // LOW_VALUE + return LOW_VALUE; + case 2: // HIGH_VALUE + return HIGH_VALUE; + case 3: // NUM_NULLS + return NUM_NULLS; + case 4: // NUM_DVS + return NUM_DVS; + case 5: // BIT_VECTORS + return BIT_VECTORS; + default: + return null; + } + } + + /** + * Find the _Fields constant that matches fieldId, throwing an exception + * if it is not found. + */ + public static _Fields findByThriftIdOrThrow(int fieldId) { + _Fields fields = findByThriftId(fieldId); + if (fields == null) throw new IllegalArgumentException("Field " + fieldId + " doesn't exist!"); + return fields; + } + + /** + * Find the _Fields constant that matches name, or null if its not found. + */ + public static _Fields findByName(String name) { + return byName.get(name); + } + + private final short _thriftId; + private final String _fieldName; + + _Fields(short thriftId, String fieldName) { + _thriftId = thriftId; + _fieldName = fieldName; + } + + public short getThriftFieldId() { + return _thriftId; + } + + public String getFieldName() { + return _fieldName; + } + } + + // isset id assignments + private static final int __NUMNULLS_ISSET_ID = 0; + private static final int __NUMDVS_ISSET_ID = 1; + private byte __isset_bitfield = 0; + private static final _Fields optionals[] = {_Fields.LOW_VALUE,_Fields.HIGH_VALUE,_Fields.BIT_VECTORS}; + public static final Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> metaDataMap; + static { + Map<_Fields, org.apache.thrift.meta_data.FieldMetaData> tmpMap = new EnumMap<_Fields, org.apache.thrift.meta_data.FieldMetaData>(_Fields.class); + tmpMap.put(_Fields.LOW_VALUE, new org.apache.thrift.meta_data.FieldMetaData("lowValue", org.apache.thrift.TFieldRequirementType.OPTIONAL, + new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, Timestamp.class))); + tmpMap.put(_Fields.HIGH_VALUE, new org.apache.thrift.meta_data.FieldMetaData("highValue", org.apache.thrift.TFieldRequirementType.OPTIONAL, + new org.apache.thrift.meta_data.StructMetaData(org.apache.thrift.protocol.TType.STRUCT, Timestamp.class))); + tmpMap.put(_Fields.NUM_NULLS, new org.apache.thrift.meta_data.FieldMetaData("numNulls", org.apache.thrift.TFieldRequirementType.REQUIRED, + new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); + tmpMap.put(_Fields.NUM_DVS, new org.apache.thrift.meta_data.FieldMetaData("numDVs", org.apache.thrift.TFieldRequirementType.REQUIRED, + new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.I64))); + tmpMap.put(_Fields.BIT_VECTORS, new org.apache.thrift.meta_data.FieldMetaData("bitVectors", org.apache.thrift.TFieldRequirementType.OPTIONAL, + new org.apache.thrift.meta_data.FieldValueMetaData(org.apache.thrift.protocol.TType.STRING , true))); + metaDataMap = Collections.unmodifiableMap(tmpMap); + org.apache.thrift.meta_data.FieldMetaData.addStructMetaDataMap(TimestampColumnStatsData.class, metaDataMap); + } + + public TimestampColumnStatsData() { + } + + public TimestampColumnStatsData( + long numNulls, + long numDVs) + { + this(); + this.numNulls = numNulls; + setNumNullsIsSet(true); + this.numDVs = numDVs; + setNumDVsIsSet(true); + } + + /** + * Performs a deep copy on other. + */ + public TimestampColumnStatsData(TimestampColumnStatsData other) { + __isset_bitfield = other.__isset_bitfield; + if (other.isSetLowValue()) { + this.lowValue = new Timestamp(other.lowValue); + } + if (other.isSetHighValue()) { + this.highValue = new Timestamp(other.highValue); + } + this.numNulls = other.numNulls; + this.numDVs = other.numDVs; + if (other.isSetBitVectors()) { + this.bitVectors = org.apache.thrift.TBaseHelper.copyBinary(other.bitVectors); + } + } + + public TimestampColumnStatsData deepCopy() { + return new TimestampColumnStatsData(this); + } + + @Override + public void clear() { + this.lowValue = null; + this.highValue = null; + setNumNullsIsSet(false); + this.numNulls = 0; + setNumDVsIsSet(false); + this.numDVs = 0; + this.bitVectors = null; + } + + public Timestamp getLowValue() { + return this.lowValue; + } + + public void setLowValue(Timestamp lowValue) { + this.lowValue = lowValue; + } + + public void unsetLowValue() { + this.lowValue = null; + } + + /** Returns true if field lowValue is set (has been assigned a value) and false otherwise */ + public boolean isSetLowValue() { + return this.lowValue != null; + } + + public void setLowValueIsSet(boolean value) { + if (!value) { + this.lowValue = null; + } + } + + public Timestamp getHighValue() { + return this.highValue; + } + + public void setHighValue(Timestamp highValue) { + this.highValue = highValue; + } + + public void unsetHighValue() { + this.highValue = null; + } + + /** Returns true if field highValue is set (has been assigned a value) and false otherwise */ + public boolean isSetHighValue() { + return this.highValue != null; + } + + public void setHighValueIsSet(boolean value) { + if (!value) { + this.highValue = null; + } + } + + public long getNumNulls() { + return this.numNulls; + } + + public void setNumNulls(long numNulls) { + this.numNulls = numNulls; + setNumNullsIsSet(true); + } + + public void unsetNumNulls() { + __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __NUMNULLS_ISSET_ID); + } + + /** Returns true if field numNulls is set (has been assigned a value) and false otherwise */ + public boolean isSetNumNulls() { + return EncodingUtils.testBit(__isset_bitfield, __NUMNULLS_ISSET_ID); + } + + public void setNumNullsIsSet(boolean value) { + __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __NUMNULLS_ISSET_ID, value); + } + + public long getNumDVs() { + return this.numDVs; + } + + public void setNumDVs(long numDVs) { + this.numDVs = numDVs; + setNumDVsIsSet(true); + } + + public void unsetNumDVs() { + __isset_bitfield = EncodingUtils.clearBit(__isset_bitfield, __NUMDVS_ISSET_ID); + } + + /** Returns true if field numDVs is set (has been assigned a value) and false otherwise */ + public boolean isSetNumDVs() { + return EncodingUtils.testBit(__isset_bitfield, __NUMDVS_ISSET_ID); + } + + public void setNumDVsIsSet(boolean value) { + __isset_bitfield = EncodingUtils.setBit(__isset_bitfield, __NUMDVS_ISSET_ID, value); + } + + public byte[] getBitVectors() { + setBitVectors(org.apache.thrift.TBaseHelper.rightSize(bitVectors)); + return bitVectors == null ? null : bitVectors.array(); + } + + public ByteBuffer bufferForBitVectors() { + return org.apache.thrift.TBaseHelper.copyBinary(bitVectors); + } + + public void setBitVectors(byte[] bitVectors) { + this.bitVectors = bitVectors == null ? (ByteBuffer)null : ByteBuffer.wrap(Arrays.copyOf(bitVectors, bitVectors.length)); + } + + public void setBitVectors(ByteBuffer bitVectors) { + this.bitVectors = org.apache.thrift.TBaseHelper.copyBinary(bitVectors); + } + + public void unsetBitVectors() { + this.bitVectors = null; + } + + /** Returns true if field bitVectors is set (has been assigned a value) and false otherwise */ + public boolean isSetBitVectors() { + return this.bitVectors != null; + } + + public void setBitVectorsIsSet(boolean value) { + if (!value) { + this.bitVectors = null; + } + } + + public void setFieldValue(_Fields field, Object value) { + switch (field) { + case LOW_VALUE: + if (value == null) { + unsetLowValue(); + } else { + setLowValue((Timestamp)value); + } + break; + + case HIGH_VALUE: + if (value == null) { + unsetHighValue(); + } else { + setHighValue((Timestamp)value); + } + break; + + case NUM_NULLS: + if (value == null) { + unsetNumNulls(); + } else { + setNumNulls((Long)value); + } + break; + + case NUM_DVS: + if (value == null) { + unsetNumDVs(); + } else { + setNumDVs((Long)value); + } + break; + + case BIT_VECTORS: + if (value == null) { + unsetBitVectors(); + } else { + setBitVectors((ByteBuffer)value); + } + break; + + } + } + + public Object getFieldValue(_Fields field) { + switch (field) { + case LOW_VALUE: + return getLowValue(); + + case HIGH_VALUE: + return getHighValue(); + + case NUM_NULLS: + return getNumNulls(); + + case NUM_DVS: + return getNumDVs(); + + case BIT_VECTORS: + return getBitVectors(); + + } + throw new IllegalStateException(); + } + + /** Returns true if field corresponding to fieldID is set (has been assigned a value) and false otherwise */ + public boolean isSet(_Fields field) { + if (field == null) { + throw new IllegalArgumentException(); + } + + switch (field) { + case LOW_VALUE: + return isSetLowValue(); + case HIGH_VALUE: + return isSetHighValue(); + case NUM_NULLS: + return isSetNumNulls(); + case NUM_DVS: + return isSetNumDVs(); + case BIT_VECTORS: + return isSetBitVectors(); + } + throw new IllegalStateException(); + } + + @Override + public boolean equals(Object that) { + if (that == null) + return false; + if (that instanceof TimestampColumnStatsData) + return this.equals((TimestampColumnStatsData)that); + return false; + } + + public boolean equals(TimestampColumnStatsData that) { + if (that == null) + return false; + + boolean this_present_lowValue = true && this.isSetLowValue(); + boolean that_present_lowValue = true && that.isSetLowValue(); + if (this_present_lowValue || that_present_lowValue) { + if (!(this_present_lowValue && that_present_lowValue)) + return false; + if (!this.lowValue.equals(that.lowValue)) + return false; + } + + boolean this_present_highValue = true && this.isSetHighValue(); + boolean that_present_highValue = true && that.isSetHighValue(); + if (this_present_highValue || that_present_highValue) { + if (!(this_present_highValue && that_present_highValue)) + return false; + if (!this.highValue.equals(that.highValue)) + return false; + } + + boolean this_present_numNulls = true; + boolean that_present_numNulls = true; + if (this_present_numNulls || that_present_numNulls) { + if (!(this_present_numNulls && that_present_numNulls)) + return false; + if (this.numNulls != that.numNulls) + return false; + } + + boolean this_present_numDVs = true; + boolean that_present_numDVs = true; + if (this_present_numDVs || that_present_numDVs) { + if (!(this_present_numDVs && that_present_numDVs)) + return false; + if (this.numDVs != that.numDVs) + return false; + } + + boolean this_present_bitVectors = true && this.isSetBitVectors(); + boolean that_present_bitVectors = true && that.isSetBitVectors(); + if (this_present_bitVectors || that_present_bitVectors) { + if (!(this_present_bitVectors && that_present_bitVectors)) + return false; + if (!this.bitVectors.equals(that.bitVectors)) + return false; + } + + return true; + } + + @Override + public int hashCode() { + List list = new ArrayList(); + + boolean present_lowValue = true && (isSetLowValue()); + list.add(present_lowValue); + if (present_lowValue) + list.add(lowValue); + + boolean present_highValue = true && (isSetHighValue()); + list.add(present_highValue); + if (present_highValue) + list.add(highValue); + + boolean present_numNulls = true; + list.add(present_numNulls); + if (present_numNulls) + list.add(numNulls); + + boolean present_numDVs = true; + list.add(present_numDVs); + if (present_numDVs) + list.add(numDVs); + + boolean present_bitVectors = true && (isSetBitVectors()); + list.add(present_bitVectors); + if (present_bitVectors) + list.add(bitVectors); + + return list.hashCode(); + } + + @Override + public int compareTo(TimestampColumnStatsData other) { + if (!getClass().equals(other.getClass())) { + return getClass().getName().compareTo(other.getClass().getName()); + } + + int lastComparison = 0; + + lastComparison = Boolean.valueOf(isSetLowValue()).compareTo(other.isSetLowValue()); + if (lastComparison != 0) { + return lastComparison; + } + if (isSetLowValue()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.lowValue, other.lowValue); + if (lastComparison != 0) { + return lastComparison; + } + } + lastComparison = Boolean.valueOf(isSetHighValue()).compareTo(other.isSetHighValue()); + if (lastComparison != 0) { + return lastComparison; + } + if (isSetHighValue()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.highValue, other.highValue); + if (lastComparison != 0) { + return lastComparison; + } + } + lastComparison = Boolean.valueOf(isSetNumNulls()).compareTo(other.isSetNumNulls()); + if (lastComparison != 0) { + return lastComparison; + } + if (isSetNumNulls()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.numNulls, other.numNulls); + if (lastComparison != 0) { + return lastComparison; + } + } + lastComparison = Boolean.valueOf(isSetNumDVs()).compareTo(other.isSetNumDVs()); + if (lastComparison != 0) { + return lastComparison; + } + if (isSetNumDVs()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.numDVs, other.numDVs); + if (lastComparison != 0) { + return lastComparison; + } + } + lastComparison = Boolean.valueOf(isSetBitVectors()).compareTo(other.isSetBitVectors()); + if (lastComparison != 0) { + return lastComparison; + } + if (isSetBitVectors()) { + lastComparison = org.apache.thrift.TBaseHelper.compareTo(this.bitVectors, other.bitVectors); + if (lastComparison != 0) { + return lastComparison; + } + } + return 0; + } + + public _Fields fieldForId(int fieldId) { + return _Fields.findByThriftId(fieldId); + } + + public void read(org.apache.thrift.protocol.TProtocol iprot) throws org.apache.thrift.TException { + schemes.get(iprot.getScheme()).getScheme().read(iprot, this); + } + + public void write(org.apache.thrift.protocol.TProtocol oprot) throws org.apache.thrift.TException { + schemes.get(oprot.getScheme()).getScheme().write(oprot, this); + } + + @Override + public String toString() { + StringBuilder sb = new StringBuilder("TimestampColumnStatsData("); + boolean first = true; + + if (isSetLowValue()) { + sb.append("lowValue:"); + if (this.lowValue == null) { + sb.append("null"); + } else { + sb.append(this.lowValue); + } + first = false; + } + if (isSetHighValue()) { + if (!first) sb.append(", "); + sb.append("highValue:"); + if (this.highValue == null) { + sb.append("null"); + } else { + sb.append(this.highValue); + } + first = false; + } + if (!first) sb.append(", "); + sb.append("numNulls:"); + sb.append(this.numNulls); + first = false; + if (!first) sb.append(", "); + sb.append("numDVs:"); + sb.append(this.numDVs); + first = false; + if (isSetBitVectors()) { + if (!first) sb.append(", "); + sb.append("bitVectors:"); + if (this.bitVectors == null) { + sb.append("null"); + } else { + org.apache.thrift.TBaseHelper.toString(this.bitVectors, sb); + } + first = false; + } + sb.append(")"); + return sb.toString(); + } + + public void validate() throws org.apache.thrift.TException { + // check for required fields + if (!isSetNumNulls()) { + throw new org.apache.thrift.protocol.TProtocolException("Required field 'numNulls' is unset! Struct:" + toString()); + } + + if (!isSetNumDVs()) { + throw new org.apache.thrift.protocol.TProtocolException("Required field 'numDVs' is unset! Struct:" + toString()); + } + + // check for sub-struct validity + if (lowValue != null) { + lowValue.validate(); + } + if (highValue != null) { + highValue.validate(); + } + } + + private void writeObject(java.io.ObjectOutputStream out) throws java.io.IOException { + try { + write(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(out))); + } catch (org.apache.thrift.TException te) { + throw new java.io.IOException(te); + } + } + + private void readObject(java.io.ObjectInputStream in) throws java.io.IOException, ClassNotFoundException { + try { + // it doesn't seem like you should have to do this, but java serialization is wacky, and doesn't call the default constructor. + __isset_bitfield = 0; + read(new org.apache.thrift.protocol.TCompactProtocol(new org.apache.thrift.transport.TIOStreamTransport(in))); + } catch (org.apache.thrift.TException te) { + throw new java.io.IOException(te); + } + } + + private static class TimestampColumnStatsDataStandardSchemeFactory implements SchemeFactory { + public TimestampColumnStatsDataStandardScheme getScheme() { + return new TimestampColumnStatsDataStandardScheme(); + } + } + + private static class TimestampColumnStatsDataStandardScheme extends StandardScheme { + + public void read(org.apache.thrift.protocol.TProtocol iprot, TimestampColumnStatsData struct) throws org.apache.thrift.TException { + org.apache.thrift.protocol.TField schemeField; + iprot.readStructBegin(); + while (true) + { + schemeField = iprot.readFieldBegin(); + if (schemeField.type == org.apache.thrift.protocol.TType.STOP) { + break; + } + switch (schemeField.id) { + case 1: // LOW_VALUE + if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { + struct.lowValue = new Timestamp(); + struct.lowValue.read(iprot); + struct.setLowValueIsSet(true); + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + break; + case 2: // HIGH_VALUE + if (schemeField.type == org.apache.thrift.protocol.TType.STRUCT) { + struct.highValue = new Timestamp(); + struct.highValue.read(iprot); + struct.setHighValueIsSet(true); + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + break; + case 3: // NUM_NULLS + if (schemeField.type == org.apache.thrift.protocol.TType.I64) { + struct.numNulls = iprot.readI64(); + struct.setNumNullsIsSet(true); + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + break; + case 4: // NUM_DVS + if (schemeField.type == org.apache.thrift.protocol.TType.I64) { + struct.numDVs = iprot.readI64(); + struct.setNumDVsIsSet(true); + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + break; + case 5: // BIT_VECTORS + if (schemeField.type == org.apache.thrift.protocol.TType.STRING) { + struct.bitVectors = iprot.readBinary(); + struct.setBitVectorsIsSet(true); + } else { + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + break; + default: + org.apache.thrift.protocol.TProtocolUtil.skip(iprot, schemeField.type); + } + iprot.readFieldEnd(); + } + iprot.readStructEnd(); + struct.validate(); + } + + public void write(org.apache.thrift.protocol.TProtocol oprot, TimestampColumnStatsData struct) throws org.apache.thrift.TException { + struct.validate(); + + oprot.writeStructBegin(STRUCT_DESC); + if (struct.lowValue != null) { + if (struct.isSetLowValue()) { + oprot.writeFieldBegin(LOW_VALUE_FIELD_DESC); + struct.lowValue.write(oprot); + oprot.writeFieldEnd(); + } + } + if (struct.highValue != null) { + if (struct.isSetHighValue()) { + oprot.writeFieldBegin(HIGH_VALUE_FIELD_DESC); + struct.highValue.write(oprot); + oprot.writeFieldEnd(); + } + } + oprot.writeFieldBegin(NUM_NULLS_FIELD_DESC); + oprot.writeI64(struct.numNulls); + oprot.writeFieldEnd(); + oprot.writeFieldBegin(NUM_DVS_FIELD_DESC); + oprot.writeI64(struct.numDVs); + oprot.writeFieldEnd(); + if (struct.bitVectors != null) { + if (struct.isSetBitVectors()) { + oprot.writeFieldBegin(BIT_VECTORS_FIELD_DESC); + oprot.writeBinary(struct.bitVectors); + oprot.writeFieldEnd(); + } + } + oprot.writeFieldStop(); + oprot.writeStructEnd(); + } + + } + + private static class TimestampColumnStatsDataTupleSchemeFactory implements SchemeFactory { + public TimestampColumnStatsDataTupleScheme getScheme() { + return new TimestampColumnStatsDataTupleScheme(); + } + } + + private static class TimestampColumnStatsDataTupleScheme extends TupleScheme { + + @Override + public void write(org.apache.thrift.protocol.TProtocol prot, TimestampColumnStatsData struct) throws org.apache.thrift.TException { + TTupleProtocol oprot = (TTupleProtocol) prot; + oprot.writeI64(struct.numNulls); + oprot.writeI64(struct.numDVs); + BitSet optionals = new BitSet(); + if (struct.isSetLowValue()) { + optionals.set(0); + } + if (struct.isSetHighValue()) { + optionals.set(1); + } + if (struct.isSetBitVectors()) { + optionals.set(2); + } + oprot.writeBitSet(optionals, 3); + if (struct.isSetLowValue()) { + struct.lowValue.write(oprot); + } + if (struct.isSetHighValue()) { + struct.highValue.write(oprot); + } + if (struct.isSetBitVectors()) { + oprot.writeBinary(struct.bitVectors); + } + } + + @Override + public void read(org.apache.thrift.protocol.TProtocol prot, TimestampColumnStatsData struct) throws org.apache.thrift.TException { + TTupleProtocol iprot = (TTupleProtocol) prot; + struct.numNulls = iprot.readI64(); + struct.setNumNullsIsSet(true); + struct.numDVs = iprot.readI64(); + struct.setNumDVsIsSet(true); + BitSet incoming = iprot.readBitSet(3); + if (incoming.get(0)) { + struct.lowValue = new Timestamp(); + struct.lowValue.read(iprot); + struct.setLowValueIsSet(true); + } + if (incoming.get(1)) { + struct.highValue = new Timestamp(); + struct.highValue.read(iprot); + struct.setHighValueIsSet(true); + } + if (incoming.get(2)) { + struct.bitVectors = iprot.readBinary(); + struct.setBitVectorsIsSet(true); + } + } + } + +} + diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-php/metastore/Types.php b/standalone-metastore/metastore-common/src/gen/thrift/gen-php/metastore/Types.php index 03aa9bdb93..c86912b813 100644 --- a/standalone-metastore/metastore-common/src/gen/thrift/gen-php/metastore/Types.php +++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-php/metastore/Types.php @@ -9856,6 +9856,258 @@ class DateColumnStatsData { } +class Timestamp { + static $_TSPEC; + + /** + * @var int + */ + public $secondsSinceEpoch = null; + + public function __construct($vals=null) { + if (!isset(self::$_TSPEC)) { + self::$_TSPEC = array( + 1 => array( + 'var' => 'secondsSinceEpoch', + 'type' => TType::I64, + ), + ); + } + if (is_array($vals)) { + if (isset($vals['secondsSinceEpoch'])) { + $this->secondsSinceEpoch = $vals['secondsSinceEpoch']; + } + } + } + + public function getName() { + return 'Timestamp'; + } + + public function read($input) + { + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + while (true) + { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + if ($ftype == TType::STOP) { + break; + } + switch ($fid) + { + case 1: + if ($ftype == TType::I64) { + $xfer += $input->readI64($this->secondsSinceEpoch); + } else { + $xfer += $input->skip($ftype); + } + break; + default: + $xfer += $input->skip($ftype); + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + return $xfer; + } + + public function write($output) { + $xfer = 0; + $xfer += $output->writeStructBegin('Timestamp'); + if ($this->secondsSinceEpoch !== null) { + $xfer += $output->writeFieldBegin('secondsSinceEpoch', TType::I64, 1); + $xfer += $output->writeI64($this->secondsSinceEpoch); + $xfer += $output->writeFieldEnd(); + } + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + return $xfer; + } + +} + +class TimestampColumnStatsData { + static $_TSPEC; + + /** + * @var \metastore\Timestamp + */ + public $lowValue = null; + /** + * @var \metastore\Timestamp + */ + public $highValue = null; + /** + * @var int + */ + public $numNulls = null; + /** + * @var int + */ + public $numDVs = null; + /** + * @var string + */ + public $bitVectors = null; + + public function __construct($vals=null) { + if (!isset(self::$_TSPEC)) { + self::$_TSPEC = array( + 1 => array( + 'var' => 'lowValue', + 'type' => TType::STRUCT, + 'class' => '\metastore\Timestamp', + ), + 2 => array( + 'var' => 'highValue', + 'type' => TType::STRUCT, + 'class' => '\metastore\Timestamp', + ), + 3 => array( + 'var' => 'numNulls', + 'type' => TType::I64, + ), + 4 => array( + 'var' => 'numDVs', + 'type' => TType::I64, + ), + 5 => array( + 'var' => 'bitVectors', + 'type' => TType::STRING, + ), + ); + } + if (is_array($vals)) { + if (isset($vals['lowValue'])) { + $this->lowValue = $vals['lowValue']; + } + if (isset($vals['highValue'])) { + $this->highValue = $vals['highValue']; + } + if (isset($vals['numNulls'])) { + $this->numNulls = $vals['numNulls']; + } + if (isset($vals['numDVs'])) { + $this->numDVs = $vals['numDVs']; + } + if (isset($vals['bitVectors'])) { + $this->bitVectors = $vals['bitVectors']; + } + } + } + + public function getName() { + return 'TimestampColumnStatsData'; + } + + public function read($input) + { + $xfer = 0; + $fname = null; + $ftype = 0; + $fid = 0; + $xfer += $input->readStructBegin($fname); + while (true) + { + $xfer += $input->readFieldBegin($fname, $ftype, $fid); + if ($ftype == TType::STOP) { + break; + } + switch ($fid) + { + case 1: + if ($ftype == TType::STRUCT) { + $this->lowValue = new \metastore\Timestamp(); + $xfer += $this->lowValue->read($input); + } else { + $xfer += $input->skip($ftype); + } + break; + case 2: + if ($ftype == TType::STRUCT) { + $this->highValue = new \metastore\Timestamp(); + $xfer += $this->highValue->read($input); + } else { + $xfer += $input->skip($ftype); + } + break; + case 3: + if ($ftype == TType::I64) { + $xfer += $input->readI64($this->numNulls); + } else { + $xfer += $input->skip($ftype); + } + break; + case 4: + if ($ftype == TType::I64) { + $xfer += $input->readI64($this->numDVs); + } else { + $xfer += $input->skip($ftype); + } + break; + case 5: + if ($ftype == TType::STRING) { + $xfer += $input->readString($this->bitVectors); + } else { + $xfer += $input->skip($ftype); + } + break; + default: + $xfer += $input->skip($ftype); + break; + } + $xfer += $input->readFieldEnd(); + } + $xfer += $input->readStructEnd(); + return $xfer; + } + + public function write($output) { + $xfer = 0; + $xfer += $output->writeStructBegin('TimestampColumnStatsData'); + if ($this->lowValue !== null) { + if (!is_object($this->lowValue)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('lowValue', TType::STRUCT, 1); + $xfer += $this->lowValue->write($output); + $xfer += $output->writeFieldEnd(); + } + if ($this->highValue !== null) { + if (!is_object($this->highValue)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('highValue', TType::STRUCT, 2); + $xfer += $this->highValue->write($output); + $xfer += $output->writeFieldEnd(); + } + if ($this->numNulls !== null) { + $xfer += $output->writeFieldBegin('numNulls', TType::I64, 3); + $xfer += $output->writeI64($this->numNulls); + $xfer += $output->writeFieldEnd(); + } + if ($this->numDVs !== null) { + $xfer += $output->writeFieldBegin('numDVs', TType::I64, 4); + $xfer += $output->writeI64($this->numDVs); + $xfer += $output->writeFieldEnd(); + } + if ($this->bitVectors !== null) { + $xfer += $output->writeFieldBegin('bitVectors', TType::STRING, 5); + $xfer += $output->writeString($this->bitVectors); + $xfer += $output->writeFieldEnd(); + } + $xfer += $output->writeFieldStop(); + $xfer += $output->writeStructEnd(); + return $xfer; + } + +} + class ColumnStatisticsData { static $_TSPEC; @@ -9887,6 +10139,10 @@ class ColumnStatisticsData { * @var \metastore\DateColumnStatsData */ public $dateStats = null; + /** + * @var \metastore\TimestampColumnStatsData + */ + public $timestampStats = null; public function __construct($vals=null) { if (!isset(self::$_TSPEC)) { @@ -9926,6 +10182,11 @@ class ColumnStatisticsData { 'type' => TType::STRUCT, 'class' => '\metastore\DateColumnStatsData', ), + 8 => array( + 'var' => 'timestampStats', + 'type' => TType::STRUCT, + 'class' => '\metastore\TimestampColumnStatsData', + ), ); } if (is_array($vals)) { @@ -9950,6 +10211,9 @@ class ColumnStatisticsData { if (isset($vals['dateStats'])) { $this->dateStats = $vals['dateStats']; } + if (isset($vals['timestampStats'])) { + $this->timestampStats = $vals['timestampStats']; + } } } @@ -10028,6 +10292,14 @@ class ColumnStatisticsData { $xfer += $input->skip($ftype); } break; + case 8: + if ($ftype == TType::STRUCT) { + $this->timestampStats = new \metastore\TimestampColumnStatsData(); + $xfer += $this->timestampStats->read($input); + } else { + $xfer += $input->skip($ftype); + } + break; default: $xfer += $input->skip($ftype); break; @@ -10097,6 +10369,14 @@ class ColumnStatisticsData { $xfer += $this->dateStats->write($output); $xfer += $output->writeFieldEnd(); } + if ($this->timestampStats !== null) { + if (!is_object($this->timestampStats)) { + throw new TProtocolException('Bad type in structure.', TProtocolException::INVALID_DATA); + } + $xfer += $output->writeFieldBegin('timestampStats', TType::STRUCT, 8); + $xfer += $this->timestampStats->write($output); + $xfer += $output->writeFieldEnd(); + } $xfer += $output->writeFieldStop(); $xfer += $output->writeStructEnd(); return $xfer; diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-py/hive_metastore/ttypes.py b/standalone-metastore/metastore-common/src/gen/thrift/gen-py/hive_metastore/ttypes.py index d4fefff62d..89adea23d7 100644 --- a/standalone-metastore/metastore-common/src/gen/thrift/gen-py/hive_metastore/ttypes.py +++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-py/hive_metastore/ttypes.py @@ -6795,6 +6795,196 @@ def __eq__(self, other): def __ne__(self, other): return not (self == other) +class Timestamp: + """ + Attributes: + - secondsSinceEpoch + """ + + thrift_spec = ( + None, # 0 + (1, TType.I64, 'secondsSinceEpoch', None, None, ), # 1 + ) + + def __init__(self, secondsSinceEpoch=None,): + self.secondsSinceEpoch = secondsSinceEpoch + + def read(self, iprot): + if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None: + fastbinary.decode_binary(self, iprot.trans, (self.__class__, self.thrift_spec)) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 1: + if ftype == TType.I64: + self.secondsSinceEpoch = iprot.readI64() + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and self.thrift_spec is not None and fastbinary is not None: + oprot.trans.write(fastbinary.encode_binary(self, (self.__class__, self.thrift_spec))) + return + oprot.writeStructBegin('Timestamp') + if self.secondsSinceEpoch is not None: + oprot.writeFieldBegin('secondsSinceEpoch', TType.I64, 1) + oprot.writeI64(self.secondsSinceEpoch) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + if self.secondsSinceEpoch is None: + raise TProtocol.TProtocolException(message='Required field secondsSinceEpoch is unset!') + return + + + def __hash__(self): + value = 17 + value = (value * 31) ^ hash(self.secondsSinceEpoch) + return value + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.iteritems()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) + +class TimestampColumnStatsData: + """ + Attributes: + - lowValue + - highValue + - numNulls + - numDVs + - bitVectors + """ + + thrift_spec = ( + None, # 0 + (1, TType.STRUCT, 'lowValue', (Timestamp, Timestamp.thrift_spec), None, ), # 1 + (2, TType.STRUCT, 'highValue', (Timestamp, Timestamp.thrift_spec), None, ), # 2 + (3, TType.I64, 'numNulls', None, None, ), # 3 + (4, TType.I64, 'numDVs', None, None, ), # 4 + (5, TType.STRING, 'bitVectors', None, None, ), # 5 + ) + + def __init__(self, lowValue=None, highValue=None, numNulls=None, numDVs=None, bitVectors=None,): + self.lowValue = lowValue + self.highValue = highValue + self.numNulls = numNulls + self.numDVs = numDVs + self.bitVectors = bitVectors + + def read(self, iprot): + if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None: + fastbinary.decode_binary(self, iprot.trans, (self.__class__, self.thrift_spec)) + return + iprot.readStructBegin() + while True: + (fname, ftype, fid) = iprot.readFieldBegin() + if ftype == TType.STOP: + break + if fid == 1: + if ftype == TType.STRUCT: + self.lowValue = Timestamp() + self.lowValue.read(iprot) + else: + iprot.skip(ftype) + elif fid == 2: + if ftype == TType.STRUCT: + self.highValue = Timestamp() + self.highValue.read(iprot) + else: + iprot.skip(ftype) + elif fid == 3: + if ftype == TType.I64: + self.numNulls = iprot.readI64() + else: + iprot.skip(ftype) + elif fid == 4: + if ftype == TType.I64: + self.numDVs = iprot.readI64() + else: + iprot.skip(ftype) + elif fid == 5: + if ftype == TType.STRING: + self.bitVectors = iprot.readString() + else: + iprot.skip(ftype) + else: + iprot.skip(ftype) + iprot.readFieldEnd() + iprot.readStructEnd() + + def write(self, oprot): + if oprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and self.thrift_spec is not None and fastbinary is not None: + oprot.trans.write(fastbinary.encode_binary(self, (self.__class__, self.thrift_spec))) + return + oprot.writeStructBegin('TimestampColumnStatsData') + if self.lowValue is not None: + oprot.writeFieldBegin('lowValue', TType.STRUCT, 1) + self.lowValue.write(oprot) + oprot.writeFieldEnd() + if self.highValue is not None: + oprot.writeFieldBegin('highValue', TType.STRUCT, 2) + self.highValue.write(oprot) + oprot.writeFieldEnd() + if self.numNulls is not None: + oprot.writeFieldBegin('numNulls', TType.I64, 3) + oprot.writeI64(self.numNulls) + oprot.writeFieldEnd() + if self.numDVs is not None: + oprot.writeFieldBegin('numDVs', TType.I64, 4) + oprot.writeI64(self.numDVs) + oprot.writeFieldEnd() + if self.bitVectors is not None: + oprot.writeFieldBegin('bitVectors', TType.STRING, 5) + oprot.writeString(self.bitVectors) + oprot.writeFieldEnd() + oprot.writeFieldStop() + oprot.writeStructEnd() + + def validate(self): + if self.numNulls is None: + raise TProtocol.TProtocolException(message='Required field numNulls is unset!') + if self.numDVs is None: + raise TProtocol.TProtocolException(message='Required field numDVs is unset!') + return + + + def __hash__(self): + value = 17 + value = (value * 31) ^ hash(self.lowValue) + value = (value * 31) ^ hash(self.highValue) + value = (value * 31) ^ hash(self.numNulls) + value = (value * 31) ^ hash(self.numDVs) + value = (value * 31) ^ hash(self.bitVectors) + return value + + def __repr__(self): + L = ['%s=%r' % (key, value) + for key, value in self.__dict__.iteritems()] + return '%s(%s)' % (self.__class__.__name__, ', '.join(L)) + + def __eq__(self, other): + return isinstance(other, self.__class__) and self.__dict__ == other.__dict__ + + def __ne__(self, other): + return not (self == other) + class ColumnStatisticsData: """ Attributes: @@ -6805,6 +6995,7 @@ class ColumnStatisticsData: - binaryStats - decimalStats - dateStats + - timestampStats """ thrift_spec = ( @@ -6816,9 +7007,10 @@ class ColumnStatisticsData: (5, TType.STRUCT, 'binaryStats', (BinaryColumnStatsData, BinaryColumnStatsData.thrift_spec), None, ), # 5 (6, TType.STRUCT, 'decimalStats', (DecimalColumnStatsData, DecimalColumnStatsData.thrift_spec), None, ), # 6 (7, TType.STRUCT, 'dateStats', (DateColumnStatsData, DateColumnStatsData.thrift_spec), None, ), # 7 + (8, TType.STRUCT, 'timestampStats', (TimestampColumnStatsData, TimestampColumnStatsData.thrift_spec), None, ), # 8 ) - def __init__(self, booleanStats=None, longStats=None, doubleStats=None, stringStats=None, binaryStats=None, decimalStats=None, dateStats=None,): + def __init__(self, booleanStats=None, longStats=None, doubleStats=None, stringStats=None, binaryStats=None, decimalStats=None, dateStats=None, timestampStats=None,): self.booleanStats = booleanStats self.longStats = longStats self.doubleStats = doubleStats @@ -6826,6 +7018,7 @@ def __init__(self, booleanStats=None, longStats=None, doubleStats=None, stringSt self.binaryStats = binaryStats self.decimalStats = decimalStats self.dateStats = dateStats + self.timestampStats = timestampStats def read(self, iprot): if iprot.__class__ == TBinaryProtocol.TBinaryProtocolAccelerated and isinstance(iprot.trans, TTransport.CReadableTransport) and self.thrift_spec is not None and fastbinary is not None: @@ -6878,6 +7071,12 @@ def read(self, iprot): self.dateStats.read(iprot) else: iprot.skip(ftype) + elif fid == 8: + if ftype == TType.STRUCT: + self.timestampStats = TimestampColumnStatsData() + self.timestampStats.read(iprot) + else: + iprot.skip(ftype) else: iprot.skip(ftype) iprot.readFieldEnd() @@ -6916,6 +7115,10 @@ def write(self, oprot): oprot.writeFieldBegin('dateStats', TType.STRUCT, 7) self.dateStats.write(oprot) oprot.writeFieldEnd() + if self.timestampStats is not None: + oprot.writeFieldBegin('timestampStats', TType.STRUCT, 8) + self.timestampStats.write(oprot) + oprot.writeFieldEnd() oprot.writeFieldStop() oprot.writeStructEnd() @@ -6932,6 +7135,7 @@ def __hash__(self): value = (value * 31) ^ hash(self.binaryStats) value = (value * 31) ^ hash(self.decimalStats) value = (value * 31) ^ hash(self.dateStats) + value = (value * 31) ^ hash(self.timestampStats) return value def __repr__(self): diff --git a/standalone-metastore/metastore-common/src/gen/thrift/gen-rb/hive_metastore_types.rb b/standalone-metastore/metastore-common/src/gen/thrift/gen-rb/hive_metastore_types.rb index 4863de6de2..13b05bdf22 100644 --- a/standalone-metastore/metastore-common/src/gen/thrift/gen-rb/hive_metastore_types.rb +++ b/standalone-metastore/metastore-common/src/gen/thrift/gen-rb/hive_metastore_types.rb @@ -1510,6 +1510,49 @@ class DateColumnStatsData ::Thrift::Struct.generate_accessors self end +class Timestamp + include ::Thrift::Struct, ::Thrift::Struct_Union + SECONDSSINCEEPOCH = 1 + + FIELDS = { + SECONDSSINCEEPOCH => {:type => ::Thrift::Types::I64, :name => 'secondsSinceEpoch'} + } + + def struct_fields; FIELDS; end + + def validate + raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Required field secondsSinceEpoch is unset!') unless @secondsSinceEpoch + end + + ::Thrift::Struct.generate_accessors self +end + +class TimestampColumnStatsData + include ::Thrift::Struct, ::Thrift::Struct_Union + LOWVALUE = 1 + HIGHVALUE = 2 + NUMNULLS = 3 + NUMDVS = 4 + BITVECTORS = 5 + + FIELDS = { + LOWVALUE => {:type => ::Thrift::Types::STRUCT, :name => 'lowValue', :class => ::Timestamp, :optional => true}, + HIGHVALUE => {:type => ::Thrift::Types::STRUCT, :name => 'highValue', :class => ::Timestamp, :optional => true}, + NUMNULLS => {:type => ::Thrift::Types::I64, :name => 'numNulls'}, + NUMDVS => {:type => ::Thrift::Types::I64, :name => 'numDVs'}, + BITVECTORS => {:type => ::Thrift::Types::STRING, :name => 'bitVectors', :binary => true, :optional => true} + } + + def struct_fields; FIELDS; end + + def validate + raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Required field numNulls is unset!') unless @numNulls + raise ::Thrift::ProtocolException.new(::Thrift::ProtocolException::UNKNOWN, 'Required field numDVs is unset!') unless @numDVs + end + + ::Thrift::Struct.generate_accessors self +end + class ColumnStatisticsData < ::Thrift::Union include ::Thrift::Struct_Union class << self @@ -1540,6 +1583,10 @@ class ColumnStatisticsData < ::Thrift::Union def dateStats(val) ColumnStatisticsData.new(:dateStats, val) end + + def timestampStats(val) + ColumnStatisticsData.new(:timestampStats, val) + end end BOOLEANSTATS = 1 @@ -1549,6 +1596,7 @@ class ColumnStatisticsData < ::Thrift::Union BINARYSTATS = 5 DECIMALSTATS = 6 DATESTATS = 7 + TIMESTAMPSTATS = 8 FIELDS = { BOOLEANSTATS => {:type => ::Thrift::Types::STRUCT, :name => 'booleanStats', :class => ::BooleanColumnStatsData}, @@ -1557,7 +1605,8 @@ class ColumnStatisticsData < ::Thrift::Union STRINGSTATS => {:type => ::Thrift::Types::STRUCT, :name => 'stringStats', :class => ::StringColumnStatsData}, BINARYSTATS => {:type => ::Thrift::Types::STRUCT, :name => 'binaryStats', :class => ::BinaryColumnStatsData}, DECIMALSTATS => {:type => ::Thrift::Types::STRUCT, :name => 'decimalStats', :class => ::DecimalColumnStatsData}, - DATESTATS => {:type => ::Thrift::Types::STRUCT, :name => 'dateStats', :class => ::DateColumnStatsData} + DATESTATS => {:type => ::Thrift::Types::STRUCT, :name => 'dateStats', :class => ::DateColumnStatsData}, + TIMESTAMPSTATS => {:type => ::Thrift::Types::STRUCT, :name => 'timestampStats', :class => ::TimestampColumnStatsData} } def struct_fields; FIELDS; end diff --git a/standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift b/standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift index 449393c73c..7b5b5fba32 100644 --- a/standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift +++ b/standalone-metastore/metastore-common/src/main/thrift/hive_metastore.thrift @@ -562,6 +562,18 @@ struct DateColumnStatsData { 5: optional binary bitVectors } +struct Timestamp { +1: required i64 secondsSinceEpoch +} + +struct TimestampColumnStatsData { +1: optional Timestamp lowValue, +2: optional Timestamp highValue, +3: required i64 numNulls, +4: required i64 numDVs, +5: optional binary bitVectors +} + union ColumnStatisticsData { 1: BooleanColumnStatsData booleanStats, 2: LongColumnStatsData longStats, @@ -569,7 +581,8 @@ union ColumnStatisticsData { 4: StringColumnStatsData stringStats, 5: BinaryColumnStatsData binaryStats, 6: DecimalColumnStatsData decimalStats, -7: DateColumnStatsData dateStats +7: DateColumnStatsData dateStats, +8: TimestampColumnStatsData timestampStats } struct ColumnStatisticsObj { diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java index e8d197abb4..bc177e7347 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java @@ -37,12 +37,15 @@ import org.apache.hadoop.hive.metastore.api.MetaException; import org.apache.hadoop.hive.metastore.api.NoSuchObjectException; import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; +import org.apache.hadoop.hive.metastore.api.Timestamp; +import org.apache.hadoop.hive.metastore.api.TimestampColumnStatsData; import org.apache.hadoop.hive.metastore.api.utils.DecimalUtils; import org.apache.hadoop.hive.metastore.columnstats.cache.DateColumnStatsDataInspector; import org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector; import org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector; import org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector; import org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector; +import org.apache.hadoop.hive.metastore.columnstats.cache.TimestampColumnStatsDataInspector; import org.apache.hadoop.hive.metastore.model.MPartition; import org.apache.hadoop.hive.metastore.model.MPartitionColumnStatistics; import org.apache.hadoop.hive.metastore.model.MTable; @@ -126,6 +129,14 @@ public static MTableColumnStatistics convertToMTableColumnStatistics(MTable tabl dateStats.isSetBitVectors() ? dateStats.getBitVectors() : null, dateStats.isSetLowValue() ? dateStats.getLowValue().getDaysSinceEpoch() : null, dateStats.isSetHighValue() ? dateStats.getHighValue().getDaysSinceEpoch() : null); + } else if (statsObj.getStatsData().isSetTimestampStats()) { + TimestampColumnStatsData timestampStats = statsObj.getStatsData().getTimestampStats(); + mColStats.setTimestampStats( + timestampStats.isSetNumNulls() ? timestampStats.getNumNulls() : null, + timestampStats.isSetNumDVs() ? timestampStats.getNumDVs() : null, + timestampStats.isSetBitVectors() ? timestampStats.getBitVectors() : null, + timestampStats.isSetLowValue() ? timestampStats.getLowValue().getSecondsSinceEpoch() : null, + timestampStats.isSetHighValue() ? timestampStats.getHighValue().getSecondsSinceEpoch() : null); } mColStats.setEngine(engine); return mColStats; @@ -251,8 +262,7 @@ public static ColumnStatisticsObj getTableColumnStatisticsObj( binaryStats.setMaxColLen(mStatsObj.getMaxColLen()); colStatsData.setBinaryStats(binaryStats); } else if (colType.equals("bigint") || colType.equals("int") || - colType.equals("smallint") || colType.equals("tinyint") || - colType.equals("timestamp")) { + colType.equals("smallint") || colType.equals("tinyint")) { LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector(); longStats.setNumNulls(mStatsObj.getNumNulls()); Long longHighValue = mStatsObj.getLongHighValue(); @@ -308,6 +318,20 @@ public static ColumnStatisticsObj getTableColumnStatisticsObj( dateStats.setNumDVs(mStatsObj.getNumDVs()); dateStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : mStatsObj.getBitVector()); colStatsData.setDateStats(dateStats); + } else if (colType.equals("timestamp")) { + TimestampColumnStatsDataInspector timestampStats = new TimestampColumnStatsDataInspector(); + timestampStats.setNumNulls(mStatsObj.getNumNulls()); + Long highValue = mStatsObj.getLongHighValue(); + if (highValue != null) { + timestampStats.setHighValue(new Timestamp(highValue)); + } + Long lowValue = mStatsObj.getLongLowValue(); + if (lowValue != null) { + timestampStats.setLowValue(new Timestamp(lowValue)); + } + timestampStats.setNumDVs(mStatsObj.getNumDVs()); + timestampStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : mStatsObj.getBitVector()); + colStatsData.setTimestampStats(timestampStats); } statsObj.setStatsData(colStatsData); return statsObj; @@ -394,6 +418,14 @@ public static MPartitionColumnStatistics convertToMPartitionColumnStatistics( dateStats.isSetBitVectors() ? dateStats.getBitVectors() : null, dateStats.isSetLowValue() ? dateStats.getLowValue().getDaysSinceEpoch() : null, dateStats.isSetHighValue() ? dateStats.getHighValue().getDaysSinceEpoch() : null); + } else if (statsObj.getStatsData().isSetTimestampStats()) { + TimestampColumnStatsData timestampStats = statsObj.getStatsData().getTimestampStats(); + mColStats.setTimestampStats( + timestampStats.isSetNumNulls() ? timestampStats.getNumNulls() : null, + timestampStats.isSetNumDVs() ? timestampStats.getNumDVs() : null, + timestampStats.isSetBitVectors() ? timestampStats.getBitVectors() : null, + timestampStats.isSetLowValue() ? timestampStats.getLowValue().getSecondsSinceEpoch() : null, + timestampStats.isSetHighValue() ? timestampStats.getHighValue().getSecondsSinceEpoch() : null); } mColStats.setEngine(engine); return mColStats; @@ -429,8 +461,7 @@ public static ColumnStatisticsObj getPartitionColumnStatisticsObj( binaryStats.setMaxColLen(mStatsObj.getMaxColLen()); colStatsData.setBinaryStats(binaryStats); } else if (colType.equals("tinyint") || colType.equals("smallint") || - colType.equals("int") || colType.equals("bigint") || - colType.equals("timestamp")) { + colType.equals("int") || colType.equals("bigint")) { LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector(); longStats.setNumNulls(mStatsObj.getNumNulls()); if (mStatsObj.getLongHighValue() != null) { @@ -480,6 +511,20 @@ public static ColumnStatisticsObj getPartitionColumnStatisticsObj( dateStats.setNumDVs(mStatsObj.getNumDVs()); dateStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : mStatsObj.getBitVector()); colStatsData.setDateStats(dateStats); + } else if (colType.equals("timestamp")) { + TimestampColumnStatsDataInspector timestampStats = new TimestampColumnStatsDataInspector(); + timestampStats.setNumNulls(mStatsObj.getNumNulls()); + Long highValue = mStatsObj.getLongHighValue(); + if (highValue != null) { + timestampStats.setHighValue(new Timestamp(highValue)); + } + Long lowValue = mStatsObj.getLongLowValue(); + if (lowValue != null) { + timestampStats.setLowValue(new Timestamp(lowValue)); + } + timestampStats.setNumDVs(mStatsObj.getNumDVs()); + timestampStats.setBitVectors((mStatsObj.getBitVector()==null||!enableBitVector)? null : mStatsObj.getBitVector()); + colStatsData.setTimestampStats(timestampStats); } statsObj.setStatsData(colStatsData); return statsObj; @@ -524,8 +569,7 @@ public static void fillColumnStatisticsData(String colType, ColumnStatisticsData binaryStats.setMaxColLen(MetastoreDirectSqlUtils.extractSqlLong(maxlen)); data.setBinaryStats(binaryStats); } else if (colType.equals("bigint") || colType.equals("int") || - colType.equals("smallint") || colType.equals("tinyint") || - colType.equals("timestamp")) { + colType.equals("smallint") || colType.equals("tinyint")) { LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector(); longStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls)); if (lhigh != null) { @@ -573,6 +617,18 @@ public static void fillColumnStatisticsData(String colType, ColumnStatisticsData dateStats.setNumDVs(MetastoreDirectSqlUtils.extractSqlLong(dist)); dateStats.setBitVectors(MetastoreDirectSqlUtils.extractSqlBlob(bitVector)); data.setDateStats(dateStats); + } else if (colType.equals("timestamp")) { + TimestampColumnStatsDataInspector timestampStats = new TimestampColumnStatsDataInspector(); + timestampStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls)); + if (lhigh != null) { + timestampStats.setHighValue(new Timestamp(MetastoreDirectSqlUtils.extractSqlLong(lhigh))); + } + if (llow != null) { + timestampStats.setLowValue(new Timestamp(MetastoreDirectSqlUtils.extractSqlLong(llow))); + } + timestampStats.setNumDVs(MetastoreDirectSqlUtils.extractSqlLong(dist)); + timestampStats.setBitVectors(MetastoreDirectSqlUtils.extractSqlBlob(bitVector)); + data.setTimestampStats(timestampStats); } } @@ -604,7 +660,7 @@ public static void fillColumnStatisticsData(String colType, ColumnStatisticsData binaryStats.setMaxColLen(MetastoreDirectSqlUtils.extractSqlLong(maxlen)); data.setBinaryStats(binaryStats); } else if (colType.equals("bigint") || colType.equals("int") || colType.equals("smallint") - || colType.equals("tinyint") || colType.equals("timestamp")) { + || colType.equals("tinyint")) { LongColumnStatsDataInspector longStats = new LongColumnStatsDataInspector(); longStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls)); if (lhigh != null) { @@ -674,6 +730,41 @@ public static void fillColumnStatisticsData(String colType, ColumnStatisticsData estimation = Math.min(estimation, rangeBound); dateStats.setNumDVs(estimation); data.setDateStats(dateStats); + } else if (colType.equals("timestamp")) { + TimestampColumnStatsDataInspector timestampStats = new TimestampColumnStatsDataInspector(); + timestampStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls)); + if (lhigh != null) { + timestampStats.setHighValue(new Timestamp(MetastoreDirectSqlUtils.extractSqlLong(lhigh))); + } + if (llow != null) { + timestampStats.setLowValue(new Timestamp(MetastoreDirectSqlUtils.extractSqlLong(llow))); + } + long lowerBound = MetastoreDirectSqlUtils.extractSqlLong(dist); + long higherBound = MetastoreDirectSqlUtils.extractSqlLong(sumDist); + long rangeBound = Long.MAX_VALUE; + if (lhigh != null && llow != null) { + rangeBound = MetastoreDirectSqlUtils.extractSqlLong(lhigh) + - MetastoreDirectSqlUtils.extractSqlLong(llow) + 1; + } + long estimation; + if (useDensityFunctionForNDVEstimation && lhigh != null && llow != null && avgLong != null + && MetastoreDirectSqlUtils.extractSqlDouble(avgLong) != 0.0) { + // We have estimation, lowerbound and higherbound. We use estimation if + // it is between lowerbound and higherbound. + estimation = MetastoreDirectSqlUtils + .extractSqlLong((MetastoreDirectSqlUtils.extractSqlLong(lhigh) - MetastoreDirectSqlUtils + .extractSqlLong(llow)) / MetastoreDirectSqlUtils.extractSqlDouble(avgLong)); + if (estimation < lowerBound) { + estimation = lowerBound; + } else if (estimation > higherBound) { + estimation = higherBound; + } + } else { + estimation = (long) (lowerBound + (higherBound - lowerBound) * ndvTuner); + } + estimation = Math.min(estimation, rangeBound); + timestampStats.setNumDVs(estimation); + data.setTimestampStats(timestampStats); } else if (colType.equals("double") || colType.equals("float")) { DoubleColumnStatsDataInspector doubleStats = new DoubleColumnStatsDataInspector(); doubleStats.setNumNulls(MetastoreDirectSqlUtils.extractSqlLong(nulls)); @@ -888,6 +979,26 @@ public static void setFieldsIntoOldStats(ColumnStatisticsObj oldStatObj, } break; } + case TIMESTAMP_STATS: { + TimestampColumnStatsData oldTimestampStatsData = oldStatObj.getStatsData().getTimestampStats(); + TimestampColumnStatsData newTimestampStatsData = newStatObj.getStatsData().getTimestampStats(); + if (newTimestampStatsData.isSetHighValue()) { + oldTimestampStatsData.setHighValue(newTimestampStatsData.getHighValue()); + } + if (newTimestampStatsData.isSetLowValue()) { + oldTimestampStatsData.setLowValue(newTimestampStatsData.getLowValue()); + } + if (newTimestampStatsData.isSetNumNulls()) { + oldTimestampStatsData.setNumNulls(newTimestampStatsData.getNumNulls()); + } + if (newTimestampStatsData.isSetNumDVs()) { + oldTimestampStatsData.setNumDVs(newTimestampStatsData.getNumDVs()); + } + if (newTimestampStatsData.isSetBitVectors()) { + oldTimestampStatsData.setBitVectors(newTimestampStatsData.getBitVectors()); + } + break; + } default: throw new IllegalArgumentException("Unknown stats type: " + typeNew.toString()); } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/ColumnsStatsUtils.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/ColumnsStatsUtils.java index 2d6d2261f7..dee750d99e 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/ColumnsStatsUtils.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/ColumnsStatsUtils.java @@ -23,6 +23,7 @@ import org.apache.hadoop.hive.metastore.columnstats.cache.DecimalColumnStatsDataInspector; import org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector; import org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector; +import org.apache.hadoop.hive.metastore.columnstats.cache.TimestampColumnStatsDataInspector; import org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector; /** @@ -48,6 +49,22 @@ public static DateColumnStatsDataInspector dateInspectorFromStats(ColumnStatisti return dateColumnStats; } + /** + * Convertes to TimestampColumnStatsDataInspector if it's a TimestampColumnStatsData. + * @param cso ColumnStatisticsObj + * @return TimestampColumnStatsDataInspector + */ + public static TimestampColumnStatsDataInspector timestampInspectorFromStats(ColumnStatisticsObj cso) { + TimestampColumnStatsDataInspector timestampColumnStats; + if (cso.getStatsData().getTimestampStats() instanceof TimestampColumnStatsDataInspector) { + timestampColumnStats = + (TimestampColumnStatsDataInspector)(cso.getStatsData().getTimestampStats()); + } else { + timestampColumnStats = new TimestampColumnStatsDataInspector(cso.getStatsData().getTimestampStats()); + } + return timestampColumnStats; + } + /** * Convertes to StringColumnStatsDataInspector * if it's a StringColumnStatsData. diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/ColumnStatsAggregatorFactory.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/ColumnStatsAggregatorFactory.java index 7aaab4a6b9..c2d14dfccc 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/ColumnStatsAggregatorFactory.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/ColumnStatsAggregatorFactory.java @@ -29,6 +29,7 @@ import org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector; import org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector; import org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector; +import org.apache.hadoop.hive.metastore.columnstats.cache.TimestampColumnStatsDataInspector; public class ColumnStatsAggregatorFactory { @@ -48,6 +49,9 @@ public static ColumnStatsAggregator getColumnStatsAggregator(_Fields type, case DATE_STATS: agg = new DateColumnStatsAggregator(); break; + case TIMESTAMP_STATS: + agg = new TimestampColumnStatsAggregator(); + break; case DOUBLE_STATS: agg = new DoubleColumnStatsAggregator(); break; @@ -86,6 +90,10 @@ public static ColumnStatisticsObj newColumnStaticsObj(String colName, String col csd.setDateStats(new DateColumnStatsDataInspector()); break; + case TIMESTAMP_STATS: + csd.setTimestampStats(new TimestampColumnStatsDataInspector()); + break; + case DOUBLE_STATS: csd.setDoubleStats(new DoubleColumnStatsDataInspector()); break; diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/TimestampColumnStatsAggregator.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/TimestampColumnStatsAggregator.java new file mode 100644 index 0000000000..8828f89ebf --- /dev/null +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/aggr/TimestampColumnStatsAggregator.java @@ -0,0 +1,362 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.hadoop.hive.metastore.columnstats.aggr; + +import java.util.Collections; +import java.util.Comparator; +import java.util.HashMap; +import java.util.LinkedList; +import java.util.List; +import java.util.Map; + +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.Timestamp; +import org.apache.hadoop.hive.metastore.api.TimestampColumnStatsData; +import org.apache.hadoop.hive.metastore.api.MetaException; +import org.apache.hadoop.hive.metastore.columnstats.cache.TimestampColumnStatsDataInspector; +import org.apache.hadoop.hive.metastore.columnstats.merge.TimestampColumnStatsMerger; +import org.apache.hadoop.hive.metastore.utils.MetaStoreServerUtils.ColStatsObjWithSourceInfo; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import static org.apache.hadoop.hive.metastore.columnstats.ColumnsStatsUtils.timestampInspectorFromStats; + +public class TimestampColumnStatsAggregator extends ColumnStatsAggregator implements + IExtrapolatePartStatus { + + private static final Logger LOG = LoggerFactory.getLogger(TimestampColumnStatsAggregator.class); + + @Override + public ColumnStatisticsObj aggregate(List colStatsWithSourceInfo, + List partNames, boolean areAllPartsFound) throws MetaException { + ColumnStatisticsObj statsObj = null; + String colType = null; + String colName = null; + // check if all the ColumnStatisticsObjs contain stats and all the ndv are + // bitvectors + boolean doAllPartitionContainStats = partNames.size() == colStatsWithSourceInfo.size(); + NumDistinctValueEstimator ndvEstimator = null; + for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) { + ColumnStatisticsObj cso = csp.getColStatsObj(); + if (statsObj == null) { + colName = cso.getColName(); + colType = cso.getColType(); + statsObj = ColumnStatsAggregatorFactory.newColumnStaticsObj(colName, colType, + cso.getStatsData().getSetField()); + LOG.trace("doAllPartitionContainStats for column: {} is: {}", colName, doAllPartitionContainStats); + } + TimestampColumnStatsDataInspector timestampColumnStats = timestampInspectorFromStats(cso); + + if (timestampColumnStats.getNdvEstimator() == null) { + ndvEstimator = null; + break; + } else { + // check if all of the bit vectors can merge + NumDistinctValueEstimator estimator = timestampColumnStats.getNdvEstimator(); + if (ndvEstimator == null) { + ndvEstimator = estimator; + } else { + if (ndvEstimator.canMerge(estimator)) { + continue; + } else { + ndvEstimator = null; + break; + } + } + } + } + if (ndvEstimator != null) { + ndvEstimator = NumDistinctValueEstimatorFactory + .getEmptyNumDistinctValueEstimator(ndvEstimator); + } + LOG.debug("all of the bit vectors can merge for " + colName + " is " + (ndvEstimator != null)); + ColumnStatisticsData columnStatisticsData = new ColumnStatisticsData(); + if (doAllPartitionContainStats || colStatsWithSourceInfo.size() < 2) { + TimestampColumnStatsDataInspector aggregateData = null; + long lowerBound = 0; + long higherBound = 0; + double densityAvgSum = 0.0; + for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) { + ColumnStatisticsObj cso = csp.getColStatsObj(); + TimestampColumnStatsDataInspector newData = timestampInspectorFromStats(cso); + higherBound += newData.getNumDVs(); + if (newData.isSetLowValue() && newData.isSetHighValue()) { + densityAvgSum += (diff(newData.getHighValue(), newData.getLowValue())) / newData.getNumDVs(); + } + if (ndvEstimator != null) { + ndvEstimator.mergeEstimators(newData.getNdvEstimator()); + } + if (aggregateData == null) { + aggregateData = newData.deepCopy(); + } else { + TimestampColumnStatsMerger merger = new TimestampColumnStatsMerger(); + merger.setLowValue(aggregateData, newData); + merger.setHighValue(aggregateData, newData); + + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); + aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); + } + } + if (ndvEstimator != null) { + // if all the ColumnStatisticsObjs contain bitvectors, we do not need to + // use uniform distribution assumption because we can merge bitvectors + // to get a good estimation. + aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues()); + } else { + long estimation; + if (useDensityFunctionForNDVEstimation) { + // We have estimation, lowerbound and higherbound. We use estimation + // if it is between lowerbound and higherbound. + double densityAvg = densityAvgSum / partNames.size(); + estimation = (long) (diff(aggregateData.getHighValue(), aggregateData.getLowValue()) / densityAvg); + if (estimation < lowerBound) { + estimation = lowerBound; + } else if (estimation > higherBound) { + estimation = higherBound; + } + } else { + estimation = (long) (lowerBound + (higherBound - lowerBound) * ndvTuner); + } + aggregateData.setNumDVs(estimation); + } + columnStatisticsData.setTimestampStats(aggregateData); + } else { + // we need extrapolation + LOG.debug("start extrapolation for " + colName); + + Map indexMap = new HashMap<>(); + for (int index = 0; index < partNames.size(); index++) { + indexMap.put(partNames.get(index), index); + } + Map adjustedIndexMap = new HashMap<>(); + Map adjustedStatsMap = new HashMap<>(); + // while we scan the css, we also get the densityAvg, lowerbound and + // higerbound when useDensityFunctionForNDVEstimation is true. + double densityAvgSum = 0.0; + if (ndvEstimator == null) { + // if not every partition uses bitvector for ndv, we just fall back to + // the traditional extrapolation methods. + for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) { + ColumnStatisticsObj cso = csp.getColStatsObj(); + String partName = csp.getPartName(); + TimestampColumnStatsData newData = cso.getStatsData().getTimestampStats(); + if (useDensityFunctionForNDVEstimation) { + densityAvgSum += diff(newData.getHighValue(), newData.getLowValue()) / newData.getNumDVs(); + } + adjustedIndexMap.put(partName, (double) indexMap.get(partName)); + adjustedStatsMap.put(partName, cso.getStatsData()); + } + } else { + // we first merge all the adjacent bitvectors that we could merge and + // derive new partition names and index. + StringBuilder pseudoPartName = new StringBuilder(); + double pseudoIndexSum = 0; + int length = 0; + int curIndex = -1; + TimestampColumnStatsDataInspector aggregateData = null; + for (ColStatsObjWithSourceInfo csp : colStatsWithSourceInfo) { + ColumnStatisticsObj cso = csp.getColStatsObj(); + String partName = csp.getPartName(); + TimestampColumnStatsDataInspector newData = timestampInspectorFromStats(cso); + // newData.isSetBitVectors() should be true for sure because we + // already checked it before. + if (indexMap.get(partName) != curIndex) { + // There is bitvector, but it is not adjacent to the previous ones. + if (length > 0) { + // we have to set ndv + adjustedIndexMap.put(pseudoPartName.toString(), pseudoIndexSum / length); + aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues()); + ColumnStatisticsData csd = new ColumnStatisticsData(); + csd.setTimestampStats(aggregateData); + adjustedStatsMap.put(pseudoPartName.toString(), csd); + if (useDensityFunctionForNDVEstimation) { + densityAvgSum += diff(aggregateData.getHighValue(), aggregateData.getLowValue()) + / aggregateData.getNumDVs(); + } + // reset everything + pseudoPartName = new StringBuilder(); + pseudoIndexSum = 0; + length = 0; + ndvEstimator = NumDistinctValueEstimatorFactory.getEmptyNumDistinctValueEstimator(ndvEstimator); + } + aggregateData = null; + } + curIndex = indexMap.get(partName); + pseudoPartName.append(partName); + pseudoIndexSum += curIndex; + length++; + curIndex++; + if (aggregateData == null) { + aggregateData = newData.deepCopy(); + } else { + aggregateData.setLowValue(min(aggregateData.getLowValue(), newData.getLowValue())); + aggregateData.setHighValue(max(aggregateData.getHighValue(), newData.getHighValue())); + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); + } + ndvEstimator.mergeEstimators(newData.getNdvEstimator()); + } + if (length > 0) { + // we have to set ndv + adjustedIndexMap.put(pseudoPartName.toString(), pseudoIndexSum / length); + aggregateData.setNumDVs(ndvEstimator.estimateNumDistinctValues()); + ColumnStatisticsData csd = new ColumnStatisticsData(); + csd.setTimestampStats(aggregateData); + adjustedStatsMap.put(pseudoPartName.toString(), csd); + if (useDensityFunctionForNDVEstimation) { + densityAvgSum += diff(aggregateData.getHighValue(), aggregateData.getLowValue()) + / aggregateData.getNumDVs(); + } + } + } + extrapolate(columnStatisticsData, partNames.size(), colStatsWithSourceInfo.size(), + adjustedIndexMap, adjustedStatsMap, densityAvgSum / adjustedStatsMap.size()); + } + LOG.debug( + "Ndv estimatation for {} is {} # of partitions requested: {} # of partitions found: {}", + colName, columnStatisticsData.getTimestampStats().getNumDVs(), partNames.size(), + colStatsWithSourceInfo.size()); + statsObj.setStatsData(columnStatisticsData); + return statsObj; + } + + private long diff(Timestamp d1, Timestamp d2) { + return d1.getSecondsSinceEpoch() - d2.getSecondsSinceEpoch(); + } + + private Timestamp min(Timestamp d1, Timestamp d2) { + return d1.compareTo(d2) < 0 ? d1 : d2; + } + + private Timestamp max(Timestamp d1, Timestamp d2) { + return d1.compareTo(d2) < 0 ? d2 : d1; + } + + @Override + public void extrapolate(ColumnStatisticsData extrapolateData, int numParts, + int numPartsWithStats, Map adjustedIndexMap, + Map adjustedStatsMap, double densityAvg) { + int rightBorderInd = numParts; + TimestampColumnStatsDataInspector extrapolateTimestampData = new TimestampColumnStatsDataInspector(); + Map extractedAdjustedStatsMap = new HashMap<>(); + for (Map.Entry entry : adjustedStatsMap.entrySet()) { + extractedAdjustedStatsMap.put(entry.getKey(), entry.getValue().getTimestampStats()); + } + List> list = new LinkedList<>( + extractedAdjustedStatsMap.entrySet()); + // get the lowValue + Collections.sort(list, new Comparator>() { + @Override + public int compare(Map.Entry o1, + Map.Entry o2) { + return o1.getValue().getLowValue().compareTo(o2.getValue().getLowValue()); + } + }); + double minInd = adjustedIndexMap.get(list.get(0).getKey()); + double maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey()); + long lowValue = 0; + long min = list.get(0).getValue().getLowValue().getSecondsSinceEpoch(); + long max = list.get(list.size() - 1).getValue().getLowValue().getSecondsSinceEpoch(); + if (minInd == maxInd) { + lowValue = min; + } else if (minInd < maxInd) { + // left border is the min + lowValue = (long) (max - (max - min) * maxInd / (maxInd - minInd)); + } else { + // right border is the min + lowValue = (long) (max - (max - min) * (rightBorderInd - maxInd) / (minInd - maxInd)); + } + + // get the highValue + Collections.sort(list, new Comparator>() { + @Override + public int compare(Map.Entry o1, + Map.Entry o2) { + return o1.getValue().getHighValue().compareTo(o2.getValue().getHighValue()); + } + }); + minInd = adjustedIndexMap.get(list.get(0).getKey()); + maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey()); + long highValue = 0; + min = list.get(0).getValue().getHighValue().getSecondsSinceEpoch(); + max = list.get(list.size() - 1).getValue().getHighValue().getSecondsSinceEpoch(); + if (minInd == maxInd) { + highValue = min; + } else if (minInd < maxInd) { + // right border is the max + highValue = (long) (min + (max - min) * (rightBorderInd - minInd) / (maxInd - minInd)); + } else { + // left border is the max + highValue = (long) (min + (max - min) * minInd / (minInd - maxInd)); + } + + // get the #nulls + long numNulls = 0; + for (Map.Entry entry : extractedAdjustedStatsMap.entrySet()) { + numNulls += entry.getValue().getNumNulls(); + } + // we scale up sumNulls based on the number of partitions + numNulls = numNulls * numParts / numPartsWithStats; + + // get the ndv + long ndv = 0; + Collections.sort(list, new Comparator>() { + @Override + public int compare(Map.Entry o1, + Map.Entry o2) { + return Long.compare(o1.getValue().getNumDVs(), o2.getValue().getNumDVs()); + } + }); + long lowerBound = list.get(list.size() - 1).getValue().getNumDVs(); + long higherBound = 0; + for (Map.Entry entry : list) { + higherBound += entry.getValue().getNumDVs(); + } + if (useDensityFunctionForNDVEstimation && densityAvg != 0.0) { + ndv = (long) ((highValue - lowValue) / densityAvg); + if (ndv < lowerBound) { + ndv = lowerBound; + } else if (ndv > higherBound) { + ndv = higherBound; + } + } else { + minInd = adjustedIndexMap.get(list.get(0).getKey()); + maxInd = adjustedIndexMap.get(list.get(list.size() - 1).getKey()); + min = list.get(0).getValue().getNumDVs(); + max = list.get(list.size() - 1).getValue().getNumDVs(); + if (minInd == maxInd) { + ndv = min; + } else if (minInd < maxInd) { + // right border is the max + ndv = (long) (min + (max - min) * (rightBorderInd - minInd) / (maxInd - minInd)); + } else { + // left border is the max + ndv = (long) (min + (max - min) * minInd / (minInd - maxInd)); + } + } + extrapolateTimestampData.setLowValue(new Timestamp(lowValue)); + extrapolateTimestampData.setHighValue(new Timestamp(highValue)); + extrapolateTimestampData.setNumNulls(numNulls); + extrapolateTimestampData.setNumDVs(ndv); + extrapolateData.setTimestampStats(extrapolateTimestampData); + } +} diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/cache/TimestampColumnStatsDataInspector.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/cache/TimestampColumnStatsDataInspector.java new file mode 100644 index 0000000000..30b22b8292 --- /dev/null +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/cache/TimestampColumnStatsDataInspector.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.metastore.columnstats.cache; + +import java.nio.ByteBuffer; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimatorFactory; +import org.apache.hadoop.hive.metastore.api.TimestampColumnStatsData; + +@SuppressWarnings("serial") +public class TimestampColumnStatsDataInspector extends TimestampColumnStatsData { + + private NumDistinctValueEstimator ndvEstimator; + + public TimestampColumnStatsDataInspector() { + super(); + } + + public TimestampColumnStatsDataInspector(long numNulls, long numDVs) { + super(numNulls, numDVs); + } + + public TimestampColumnStatsDataInspector(TimestampColumnStatsDataInspector other) { + super(other); + if (other.ndvEstimator != null) { + super.setBitVectors(ndvEstimator.serialize()); + } + } + + public TimestampColumnStatsDataInspector(TimestampColumnStatsData other) { + super(other); + } + + @Override + public TimestampColumnStatsDataInspector deepCopy() { + return new TimestampColumnStatsDataInspector(this); + } + + @Override + public byte[] getBitVectors() { + if (ndvEstimator != null) { + updateBitVectors(); + } + return super.getBitVectors(); + } + + @Override + public ByteBuffer bufferForBitVectors() { + if (ndvEstimator != null) { + updateBitVectors(); + } + return super.bufferForBitVectors(); + } + + @Override + public void setBitVectors(byte[] bitVectors) { + super.setBitVectors(bitVectors); + this.ndvEstimator = null; + } + + @Override + public void setBitVectors(ByteBuffer bitVectors) { + super.setBitVectors(bitVectors); + this.ndvEstimator = null; + } + + @Override + public void unsetBitVectors() { + super.unsetBitVectors(); + this.ndvEstimator = null; + } + + @Override + public boolean isSetBitVectors() { + if (ndvEstimator != null) { + updateBitVectors(); + } + return super.isSetBitVectors(); + } + + @Override + public void setBitVectorsIsSet(boolean value) { + if (ndvEstimator != null) { + updateBitVectors(); + } + super.setBitVectorsIsSet(value); + } + + public NumDistinctValueEstimator getNdvEstimator() { + if (isSetBitVectors() && getBitVectors().length != 0) { + updateNdvEstimator(); + } + return ndvEstimator; + } + + public void setNdvEstimator(NumDistinctValueEstimator ndvEstimator) { + super.unsetBitVectors(); + this.ndvEstimator = ndvEstimator; + } + + private void updateBitVectors() { + super.setBitVectors(ndvEstimator.serialize()); + this.ndvEstimator = null; + } + + private void updateNdvEstimator() { + this.ndvEstimator = NumDistinctValueEstimatorFactory + .getNumDistinctValueEstimator(super.getBitVectors()); + super.unsetBitVectors(); + } + +} diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerFactory.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerFactory.java index 64d07c77b7..261437bcf1 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerFactory.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/ColumnStatsMergerFactory.java @@ -29,6 +29,7 @@ import org.apache.hadoop.hive.metastore.columnstats.cache.DoubleColumnStatsDataInspector; import org.apache.hadoop.hive.metastore.columnstats.cache.LongColumnStatsDataInspector; import org.apache.hadoop.hive.metastore.columnstats.cache.StringColumnStatsDataInspector; +import org.apache.hadoop.hive.metastore.columnstats.cache.TimestampColumnStatsDataInspector; public class ColumnStatsMergerFactory { @@ -69,6 +70,10 @@ public static ColumnStatsMerger getColumnStatsMerger(ColumnStatisticsObj statsOb agg = new DateColumnStatsMerger(); break; } + case TIMESTAMP_STATS: { + agg = new TimestampColumnStatsMerger(); + break; + } default: throw new IllegalArgumentException("Unknown stats type " + statsObjNew.getStatsData().getSetField()); } @@ -109,6 +114,10 @@ public static ColumnStatisticsObj newColumnStaticsObj(String colName, String col csd.setDateStats(new DateColumnStatsDataInspector()); break; + case TIMESTAMP_STATS: + csd.setTimestampStats(new TimestampColumnStatsDataInspector()); + break; + default: throw new IllegalArgumentException("Unknown stats type"); } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/TimestampColumnStatsMerger.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/TimestampColumnStatsMerger.java new file mode 100644 index 0000000000..77827d98e4 --- /dev/null +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/columnstats/merge/TimestampColumnStatsMerger.java @@ -0,0 +1,94 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hive.metastore.columnstats.merge; + +import static org.apache.hadoop.hive.metastore.columnstats.ColumnsStatsUtils.timestampInspectorFromStats; + +import org.apache.hadoop.hive.common.ndv.NumDistinctValueEstimator; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.Timestamp; +import org.apache.hadoop.hive.metastore.columnstats.cache.TimestampColumnStatsDataInspector; + +public class TimestampColumnStatsMerger extends ColumnStatsMerger { + @Override + public void merge(ColumnStatisticsObj aggregateColStats, ColumnStatisticsObj newColStats) { + TimestampColumnStatsDataInspector aggregateData = timestampInspectorFromStats(aggregateColStats); + TimestampColumnStatsDataInspector newData = timestampInspectorFromStats(newColStats); + + setLowValue(aggregateData, newData); + setHighValue(aggregateData, newData); + + aggregateData.setNumNulls(aggregateData.getNumNulls() + newData.getNumNulls()); + if (aggregateData.getNdvEstimator() == null || newData.getNdvEstimator() == null) { + aggregateData.setNumDVs(Math.max(aggregateData.getNumDVs(), newData.getNumDVs())); + } else { + NumDistinctValueEstimator oldEst = aggregateData.getNdvEstimator(); + NumDistinctValueEstimator newEst = newData.getNdvEstimator(); + long ndv = -1; + if (oldEst.canMerge(newEst)) { + oldEst.mergeEstimators(newEst); + ndv = oldEst.estimateNumDistinctValues(); + aggregateData.setNdvEstimator(oldEst); + } else { + ndv = Math.max(aggregateData.getNumDVs(), newData.getNumDVs()); + } + LOG.debug("Use bitvector to merge column " + aggregateColStats.getColName() + "'s ndvs of " + + aggregateData.getNumDVs() + " and " + newData.getNumDVs() + " to be " + ndv); + aggregateData.setNumDVs(ndv); + } + + aggregateColStats.getStatsData().setTimestampStats(aggregateData); + } + + public void setLowValue(TimestampColumnStatsDataInspector aggregateData, TimestampColumnStatsDataInspector newData) { + if (!aggregateData.isSetLowValue() && !newData.isSetLowValue()) { + return; + } + + Timestamp aggregateLowValue = aggregateData.getLowValue(); + Timestamp newLowValue = newData.getLowValue(); + + Timestamp mergedLowValue = null; + if (aggregateData.isSetLowValue() && newData.isSetLowValue()) { + mergedLowValue = aggregateLowValue.compareTo(newLowValue) > 0 ? newLowValue : aggregateLowValue; + } else { + mergedLowValue = aggregateLowValue == null ? newLowValue : aggregateLowValue; + } + + aggregateData.setLowValue(mergedLowValue); + } + + public void setHighValue(TimestampColumnStatsDataInspector aggregateData, TimestampColumnStatsDataInspector newData) { + if (!aggregateData.isSetHighValue() && !newData.isSetHighValue()) { + return; + } + + Timestamp aggregateHighValue = aggregateData.getHighValue(); + Timestamp newHighValue = newData.getHighValue(); + + Timestamp mergedHighValue = null; + if (aggregateData.isSetHighValue() && newData.isSetHighValue()) { + mergedHighValue = aggregateHighValue.compareTo(newHighValue) > 0 ? aggregateHighValue : newHighValue; + } else { + mergedHighValue = aggregateHighValue == null ? newHighValue : aggregateHighValue; + } + + aggregateData.setHighValue(mergedHighValue); + } +} diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java index 27accc66da..102e0fdf26 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/model/MPartitionColumnStatistics.java @@ -224,6 +224,14 @@ public void setDateStats(Long numNulls, Long numNDVs, byte[] bitVector, Long low this.longHighValue = highValue; } + public void setTimestampStats(Long numNulls, Long numNDVs, byte[] bitVector, Long lowValue, Long highValue) { + this.numNulls = numNulls; + this.numDVs = numNDVs; + this.bitVector = bitVector; + this.longLowValue = lowValue; + this.longHighValue = highValue; + } + public Long getLongLowValue() { return longLowValue; } diff --git a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java index 81c3f8c1c3..2024bae366 100644 --- a/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java +++ b/standalone-metastore/metastore-server/src/main/java/org/apache/hadoop/hive/metastore/model/MTableColumnStatistics.java @@ -214,6 +214,14 @@ public void setDateStats(Long numNulls, Long numNDVs, byte[] bitVector, Long low this.longHighValue = highValue; } + public void setTimestampStats(Long numNulls, Long numNDVs, byte[] bitVector, Long lowValue, Long highValue) { + this.numNulls = numNulls; + this.numDVs = numNDVs; + this.bitVector = bitVector; + this.longLowValue = lowValue; + this.longHighValue = highValue; + } + public Long getLongLowValue() { return longLowValue; }