diff --git ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java index 7a482d968f..7cedab62fa 100644 --- ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java +++ ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/translator/RexNodeConverter.java @@ -17,18 +17,10 @@ */ package org.apache.hadoop.hive.ql.optimizer.calcite.translator; -import java.math.BigDecimal; -import java.math.BigInteger; -import java.sql.Timestamp; -import java.time.Instant; -import java.util.ArrayList; -import java.util.Calendar; -import java.util.Date; -import java.util.LinkedHashMap; -import java.util.List; -import java.util.Locale; -import java.util.Map; - +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableList.Builder; +import com.google.common.collect.ImmutableMap; +import com.google.common.collect.Iterables; import org.apache.calcite.avatica.util.TimeUnit; import org.apache.calcite.avatica.util.TimeUnitRange; import org.apache.calcite.plan.RelOptCluster; @@ -49,6 +41,7 @@ import org.apache.calcite.sql.fun.SqlStdOperatorTable; import org.apache.calcite.sql.parser.SqlParserPos; import org.apache.calcite.sql.type.SqlTypeName; +import org.apache.calcite.sql.type.SqlTypeUtil; import org.apache.calcite.util.ConversionUtil; import org.apache.calcite.util.DateString; import org.apache.calcite.util.NlsString; @@ -104,9 +97,17 @@ import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils; -import com.google.common.collect.ImmutableList; -import com.google.common.collect.ImmutableList.Builder; -import com.google.common.collect.ImmutableMap; +import java.math.BigDecimal; +import java.math.BigInteger; +import java.sql.Timestamp; +import java.time.Instant; +import java.util.ArrayList; +import java.util.Calendar; +import java.util.Date; +import java.util.LinkedHashMap; +import java.util.List; +import java.util.Locale; +import java.util.Map; public class RexNodeConverter { @@ -455,26 +456,50 @@ private RexNode handleExplicitCast(ExprNodeGenericFuncDesc func, List c private List rewriteExtractDateChildren(SqlOperator op, List childRexNodeLst) throws SemanticException { - List newChildRexNodeLst = new ArrayList(); + List newChildRexNodeLst = new ArrayList<>(2); + final boolean isTimestampLevel; if (op == HiveExtractDate.YEAR) { newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.YEAR)); + isTimestampLevel = false; } else if (op == HiveExtractDate.QUARTER) { newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.QUARTER)); + isTimestampLevel = false; } else if (op == HiveExtractDate.MONTH) { newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.MONTH)); + isTimestampLevel = false; } else if (op == HiveExtractDate.WEEK) { newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.WEEK)); + isTimestampLevel = false; } else if (op == HiveExtractDate.DAY) { newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.DAY)); + isTimestampLevel = false; } else if (op == HiveExtractDate.HOUR) { newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.HOUR)); + isTimestampLevel = true; } else if (op == HiveExtractDate.MINUTE) { newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.MINUTE)); + isTimestampLevel = true; } else if (op == HiveExtractDate.SECOND) { newChildRexNodeLst.add(cluster.getRexBuilder().makeFlag(TimeUnitRange.SECOND)); + isTimestampLevel = true; + } else { + isTimestampLevel = false; } - assert childRexNodeLst.size() == 1; - newChildRexNodeLst.add(childRexNodeLst.get(0)); + + final RexNode child = Iterables.getOnlyElement(childRexNodeLst); + if (SqlTypeUtil.isDatetime(child.getType()) || SqlTypeUtil.isInterval(child.getType())) { + newChildRexNodeLst.add(child); + } else { + // We need to add a cast to DATETIME Family + if (isTimestampLevel) { + newChildRexNodeLst.add( + cluster.getRexBuilder().makeCast(cluster.getTypeFactory().createSqlType(SqlTypeName.TIMESTAMP), child)); + } else { + newChildRexNodeLst.add( + cluster.getRexBuilder().makeCast(cluster.getTypeFactory().createSqlType(SqlTypeName.DATE), child)); + } + } + return newChildRexNodeLst; } diff --git ql/src/test/queries/clientpositive/druidmini_extractTime.q ql/src/test/queries/clientpositive/druidmini_extractTime.q index 429f7965c2..95413612ea 100644 --- ql/src/test/queries/clientpositive/druidmini_extractTime.q +++ ql/src/test/queries/clientpositive/druidmini_extractTime.q @@ -176,4 +176,27 @@ SELECT CAST(`__time` AS DATE) AS `x_date`, SUM(cfloat) FROM druid_table GROUP B SELECT CAST(`__time` AS DATE) AS `x_date` FROM druid_table ORDER BY `x_date` LIMIT 5; +-- Test Extract from non datetime column + +create table test_extract_from_string_base_table(`timecolumn` timestamp, `date_c` string, `timestamp_c` string, `metric_c` double); +insert into test_extract_from_string_base_table values ('2015-03-08 00:00:00', '2015-03-10', '2015-03-08 05:30:20', 5.0); + +CREATE TABLE druid_test_extract_from_string_table +STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' +TBLPROPERTIES ("druid.segment.granularity" = "DAY") +AS select +cast(`timecolumn` as timestamp with local time zone) as `__time`, `date_c`, `timestamp_c`, `metric_c` +FROM test_extract_from_string_base_table; + +explain select +year(date_c), month(date_c),day(date_c), +year(timestamp_c), month(timestamp_c),day(timestamp_c), hour(timestamp_c), minute (timestamp_c), second (timestamp_c) +from druid_test_extract_from_string_table; + +select year(date_c), month(date_c), day(date_c), +year(timestamp_c), month(timestamp_c), day(timestamp_c), hour(timestamp_c), minute (timestamp_c), second (timestamp_c) +from druid_test_extract_from_string_table; + +DROP TABLE druid_test_extract_from_string_table; +DROP TABLE test_extract_from_string_base_table; DROP TABLE druid_table; \ No newline at end of file diff --git ql/src/test/results/clientpositive/druid/druidmini_extractTime.q.out ql/src/test/results/clientpositive/druid/druidmini_extractTime.q.out index a1b1d24ef2..672d69398f 100644 --- ql/src/test/results/clientpositive/druid/druidmini_extractTime.q.out +++ ql/src/test/results/clientpositive/druid/druidmini_extractTime.q.out @@ -1089,6 +1089,109 @@ POSTHOOK: Output: hdfs://### HDFS PATH ### 1969-12-31 1969-12-31 1969-12-31 +PREHOOK: query: create table test_extract_from_string_base_table(`timecolumn` timestamp, `date_c` string, `timestamp_c` string, `metric_c` double) +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_extract_from_string_base_table +POSTHOOK: query: create table test_extract_from_string_base_table(`timecolumn` timestamp, `date_c` string, `timestamp_c` string, `metric_c` double) +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_extract_from_string_base_table +PREHOOK: query: insert into test_extract_from_string_base_table values ('2015-03-08 00:00:00', '2015-03-10', '2015-03-08 05:30:20', 5.0) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_extract_from_string_base_table +POSTHOOK: query: insert into test_extract_from_string_base_table values ('2015-03-08 00:00:00', '2015-03-10', '2015-03-08 05:30:20', 5.0) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_extract_from_string_base_table +POSTHOOK: Lineage: test_extract_from_string_base_table.date_c SCRIPT [] +POSTHOOK: Lineage: test_extract_from_string_base_table.metric_c SCRIPT [] +POSTHOOK: Lineage: test_extract_from_string_base_table.timecolumn SCRIPT [] +POSTHOOK: Lineage: test_extract_from_string_base_table.timestamp_c SCRIPT [] +PREHOOK: query: CREATE TABLE druid_test_extract_from_string_table +STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' +TBLPROPERTIES ("druid.segment.granularity" = "DAY") +AS select +cast(`timecolumn` as timestamp with local time zone) as `__time`, `date_c`, `timestamp_c`, `metric_c` +FROM test_extract_from_string_base_table +PREHOOK: type: CREATETABLE_AS_SELECT +PREHOOK: Input: default@test_extract_from_string_base_table +PREHOOK: Output: database:default +PREHOOK: Output: default@druid_test_extract_from_string_table +POSTHOOK: query: CREATE TABLE druid_test_extract_from_string_table +STORED BY 'org.apache.hadoop.hive.druid.DruidStorageHandler' +TBLPROPERTIES ("druid.segment.granularity" = "DAY") +AS select +cast(`timecolumn` as timestamp with local time zone) as `__time`, `date_c`, `timestamp_c`, `metric_c` +FROM test_extract_from_string_base_table +POSTHOOK: type: CREATETABLE_AS_SELECT +POSTHOOK: Input: default@test_extract_from_string_base_table +POSTHOOK: Output: database:default +POSTHOOK: Output: default@druid_test_extract_from_string_table +POSTHOOK: Lineage: druid_test_extract_from_string_table.__time EXPRESSION [(test_extract_from_string_base_table)test_extract_from_string_base_table.FieldSchema(name:timecolumn, type:timestamp, comment:null), ] +POSTHOOK: Lineage: druid_test_extract_from_string_table.date_c SIMPLE [(test_extract_from_string_base_table)test_extract_from_string_base_table.FieldSchema(name:date_c, type:string, comment:null), ] +POSTHOOK: Lineage: druid_test_extract_from_string_table.metric_c SIMPLE [(test_extract_from_string_base_table)test_extract_from_string_base_table.FieldSchema(name:metric_c, type:double, comment:null), ] +POSTHOOK: Lineage: druid_test_extract_from_string_table.timestamp_c SIMPLE [(test_extract_from_string_base_table)test_extract_from_string_base_table.FieldSchema(name:timestamp_c, type:string, comment:null), ] +PREHOOK: query: explain select +year(date_c), month(date_c),day(date_c), +year(timestamp_c), month(timestamp_c),day(timestamp_c), hour(timestamp_c), minute (timestamp_c), second (timestamp_c) +from druid_test_extract_from_string_table +PREHOOK: type: QUERY +POSTHOOK: query: explain select +year(date_c), month(date_c),day(date_c), +year(timestamp_c), month(timestamp_c),day(timestamp_c), hour(timestamp_c), minute (timestamp_c), second (timestamp_c) +from druid_test_extract_from_string_table +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + TableScan + alias: druid_test_extract_from_string_table + properties: + druid.fieldNames vc,vc0,vc1,vc2,vc3,vc4,vc5,vc6,vc7 + druid.fieldTypes int,int,int,int,int,int,int,int,int + druid.query.json {"queryType":"scan","dataSource":"default.druid_test_extract_from_string_table","intervals":["1900-01-01T00:00:00.000Z/3000-01-01T00:00:00.000Z"],"virtualColumns":[{"type":"expression","name":"vc","expression":"timestamp_extract(timestamp_floor(timestamp_parse(\"date_c\",'','US/Pacific'),'P1D','','US/Pacific'),'YEAR','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc0","expression":"timestamp_extract(timestamp_floor(timestamp_parse(\"date_c\",'','US/Pacific'),'P1D','','US/Pacific'),'MONTH','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc1","expression":"timestamp_extract(timestamp_floor(timestamp_parse(\"date_c\",'','US/Pacific'),'P1D','','US/Pacific'),'DAY','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc2","expression":"timestamp_extract(timestamp_floor(timestamp_parse(\"timestamp_c\",'','US/Pacific'),'P1D','','US/Pacific'),'YEAR','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc3","expression":"timestamp_extract(timestamp_floor(timestamp_parse(\"timestamp_c\",'','US/Pacific'),'P1D','','US/Pacific'),'MONTH','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc4","expression":"timestamp_extract(timestamp_floor(timestamp_parse(\"timestamp_c\",'','US/Pacific'),'P1D','','US/Pacific'),'DAY','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc5","expression":"timestamp_extract(timestamp_parse(\"timestamp_c\",'','US/Pacific'),'HOUR','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc6","expression":"timestamp_extract(timestamp_parse(\"timestamp_c\",'','US/Pacific'),'MINUTE','US/Pacific')","outputType":"LONG"},{"type":"expression","name":"vc7","expression":"timestamp_extract(timestamp_parse(\"timestamp_c\",'','US/Pacific'),'SECOND','US/Pacific')","outputType":"LONG"}],"columns":["vc","vc0","vc1","vc2","vc3","vc4","vc5","vc6","vc7"],"resultFormat":"compactedList"} + druid.query.type scan + Select Operator + expressions: vc (type: int), vc0 (type: int), vc1 (type: int), vc2 (type: int), vc3 (type: int), vc4 (type: int), vc5 (type: int), vc6 (type: int), vc7 (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 + ListSink + +PREHOOK: query: select year(date_c), month(date_c), day(date_c), +year(timestamp_c), month(timestamp_c), day(timestamp_c), hour(timestamp_c), minute (timestamp_c), second (timestamp_c) +from druid_test_extract_from_string_table +PREHOOK: type: QUERY +PREHOOK: Input: default@druid_test_extract_from_string_table +PREHOOK: Output: hdfs://### HDFS PATH ### +POSTHOOK: query: select year(date_c), month(date_c), day(date_c), +year(timestamp_c), month(timestamp_c), day(timestamp_c), hour(timestamp_c), minute (timestamp_c), second (timestamp_c) +from druid_test_extract_from_string_table +POSTHOOK: type: QUERY +POSTHOOK: Input: default@druid_test_extract_from_string_table +POSTHOOK: Output: hdfs://### HDFS PATH ### +2015 3 10 2015 3 8 5 30 20 +PREHOOK: query: DROP TABLE druid_test_extract_from_string_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@druid_test_extract_from_string_table +PREHOOK: Output: default@druid_test_extract_from_string_table +POSTHOOK: query: DROP TABLE druid_test_extract_from_string_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@druid_test_extract_from_string_table +POSTHOOK: Output: default@druid_test_extract_from_string_table +PREHOOK: query: DROP TABLE test_extract_from_string_base_table +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@test_extract_from_string_base_table +PREHOOK: Output: default@test_extract_from_string_base_table +POSTHOOK: query: DROP TABLE test_extract_from_string_base_table +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@test_extract_from_string_base_table +POSTHOOK: Output: default@test_extract_from_string_base_table PREHOOK: query: DROP TABLE druid_table PREHOOK: type: DROPTABLE PREHOOK: Input: default@druid_table diff --git ql/src/test/results/clientpositive/udf_hour.q.out ql/src/test/results/clientpositive/udf_hour.q.out index d26d71f323..318a98a46d 100644 --- ql/src/test/results/clientpositive/udf_hour.q.out +++ ql/src/test/results/clientpositive/udf_hour.q.out @@ -41,7 +41,7 @@ STAGE PLANS: predicate: (UDFToDouble(key) = 86.0D) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 13 (type: int), 13 (type: int), null (type: int) + expressions: 13 (type: int), null (type: int), 0 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE ListSink @@ -56,7 +56,7 @@ FROM src WHERE key = 86 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -13 13 NULL +13 NULL 0 PREHOOK: query: SELECT hour(cast('2009-08-07 13:14:15' as timestamp)) FROM src WHERE key=86 PREHOOK: type: QUERY diff --git ql/src/test/results/clientpositive/udf_minute.q.out ql/src/test/results/clientpositive/udf_minute.q.out index ababcca053..02a5c8d400 100644 --- ql/src/test/results/clientpositive/udf_minute.q.out +++ ql/src/test/results/clientpositive/udf_minute.q.out @@ -41,7 +41,7 @@ STAGE PLANS: predicate: (UDFToDouble(key) = 86.0D) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 14 (type: int), 14 (type: int), null (type: int) + expressions: 14 (type: int), null (type: int), 0 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE File Output Operator @@ -69,4 +69,4 @@ FROM src WHERE key = 86 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -14 14 NULL +14 NULL 0 diff --git ql/src/test/results/clientpositive/udf_second.q.out ql/src/test/results/clientpositive/udf_second.q.out index 8042a30ab4..6cc7775519 100644 --- ql/src/test/results/clientpositive/udf_second.q.out +++ ql/src/test/results/clientpositive/udf_second.q.out @@ -41,7 +41,7 @@ STAGE PLANS: predicate: (UDFToDouble(key) = 86.0D) (type: boolean) Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE Select Operator - expressions: 15 (type: int), 15 (type: int), null (type: int) + expressions: 15 (type: int), null (type: int), 0 (type: int) outputColumnNames: _col0, _col1, _col2 Statistics: Num rows: 250 Data size: 2656 Basic stats: COMPLETE Column stats: NONE ListSink @@ -56,4 +56,4 @@ FROM src WHERE key = 86 POSTHOOK: type: QUERY POSTHOOK: Input: default@src #### A masked pattern was here #### -15 15 NULL +15 NULL 0 diff --git ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out index 9f90e8262e..fb49a9b6f0 100644 --- ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out +++ ql/src/test/results/clientpositive/vectorized_timestamp_funcs.q.out @@ -450,13 +450,13 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator - expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(stimestamp1) (type: int), month(stimestamp1) (type: int), day(stimestamp1) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(stimestamp1) (type: int), hour(stimestamp1) (type: int), minute(stimestamp1) (type: int), second(stimestamp1) (type: int) + expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(CAST( stimestamp1 AS DATE)) (type: int), month(CAST( stimestamp1 AS DATE)) (type: int), day(CAST( stimestamp1 AS DATE)) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(CAST( stimestamp1 AS DATE)) (type: int), hour(CAST( stimestamp1 AS TIMESTAMP)) (type: int), minute(CAST( stimestamp1 AS TIMESTAMP)) (type: int), second(CAST( stimestamp1 AS TIMESTAMP)) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [5, 6, 7, 8, 9, 10, 11, 12, 13] - selectExpressions: VectorUDFUnixTimeStampString(col 2:string) -> 5:bigint, VectorUDFYearString(col 2:string, fieldStart 0, fieldLength 4) -> 6:int, VectorUDFMonthString(col 2:string, fieldStart 5, fieldLength 2) -> 7:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 8:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 9:int, VectorUDFWeekOfYearString(col 2:string) -> 10:int, VectorUDFHourString(col 2:string, fieldStart 11, fieldLength 2) -> 11:int, VectorUDFMinuteString(col 2:string, fieldStart 14, fieldLength 2) -> 12:int, VectorUDFSecondString(col 2:string, fieldStart 17, fieldLength 2) -> 13:int + projectedOutputColumnNums: [5, 7, 8, 9, 6, 11, 10, 13, 14] + selectExpressions: VectorUDFUnixTimeStampString(col 2:string) -> 5:bigint, VectorUDFYearDate(col 6, field YEAR)(children: CastStringToDate(col 2:string) -> 6:date) -> 7:int, VectorUDFMonthDate(col 6, field MONTH)(children: CastStringToDate(col 2:string) -> 6:date) -> 8:int, VectorUDFDayOfMonthDate(col 6, field DAY_OF_MONTH)(children: CastStringToDate(col 2:string) -> 6:date) -> 9:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 6:int, VectorUDFWeekOfYearDate(col 10, field WEEK_OF_YEAR)(children: CastStringToDate(col 2:string) -> 10:date) -> 11:int, VectorUDFHourTimestamp(col 12:timestamp, field HOUR_OF_DAY)(children: VectorUDFAdaptor(CAST( stimestamp1 AS TIMESTAMP)) -> 12:timestamp) -> 10:int, VectorUDFMinuteTimestamp(col 12:timestamp, field MINUTE)(children: VectorUDFAdaptor(CAST( stimestamp1 AS TIMESTAMP)) -> 12:timestamp) -> 13:int, VectorUDFSecondTimestamp(col 12:timestamp, field SECOND)(children: VectorUDFAdaptor(CAST( stimestamp1 AS TIMESTAMP)) -> 12:timestamp) -> 14:int Statistics: Num rows: 52 Data size: 3179 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) @@ -476,7 +476,7 @@ STAGE PLANS: featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false - usesVectorUDFAdaptor: false + usesVectorUDFAdaptor: true vectorized: true Reduce Vectorization: enabled: false @@ -627,13 +627,13 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator - expressions: (to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1)) (type: boolean), (year(ctimestamp1) = year(stimestamp1)) (type: boolean), (month(ctimestamp1) = month(stimestamp1)) (type: boolean), (day(ctimestamp1) = day(stimestamp1)) (type: boolean), (dayofmonth(ctimestamp1) = dayofmonth(stimestamp1)) (type: boolean), (weekofyear(ctimestamp1) = weekofyear(stimestamp1)) (type: boolean), (hour(ctimestamp1) = hour(stimestamp1)) (type: boolean), (minute(ctimestamp1) = minute(stimestamp1)) (type: boolean), (second(ctimestamp1) = second(stimestamp1)) (type: boolean) + expressions: (to_unix_timestamp(ctimestamp1) = to_unix_timestamp(stimestamp1)) (type: boolean), (year(ctimestamp1) = year(CAST( stimestamp1 AS DATE))) (type: boolean), (month(ctimestamp1) = month(CAST( stimestamp1 AS DATE))) (type: boolean), (day(ctimestamp1) = day(CAST( stimestamp1 AS DATE))) (type: boolean), (dayofmonth(ctimestamp1) = dayofmonth(stimestamp1)) (type: boolean), (weekofyear(ctimestamp1) = weekofyear(CAST( stimestamp1 AS DATE))) (type: boolean), (hour(ctimestamp1) = hour(CAST( stimestamp1 AS TIMESTAMP))) (type: boolean), (minute(ctimestamp1) = minute(CAST( stimestamp1 AS TIMESTAMP))) (type: boolean), (second(ctimestamp1) = second(CAST( stimestamp1 AS TIMESTAMP))) (type: boolean) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [7, 8, 9, 10, 11, 12, 13, 14, 15] - selectExpressions: LongColEqualLongColumn(col 5:bigint, col 6:bigint)(children: VectorUDFUnixTimeStampTimestamp(col 1:timestamp) -> 5:bigint, VectorUDFUnixTimeStampString(col 2:string) -> 6:bigint) -> 7:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 5:int, VectorUDFYearString(col 2:string, fieldStart 0, fieldLength 4) -> 6:int) -> 8:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFMonthTimestamp(col 1:timestamp, field MONTH) -> 5:int, VectorUDFMonthString(col 2:string, fieldStart 5, fieldLength 2) -> 6:int) -> 9:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 5:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 6:int) -> 10:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 5:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 6:int) -> 11:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFWeekOfYearTimestamp(col 1:timestamp, field WEEK_OF_YEAR) -> 5:int, VectorUDFWeekOfYearString(col 2:string) -> 6:int) -> 12:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFHourTimestamp(col 1:timestamp, field HOUR_OF_DAY) -> 5:int, VectorUDFHourString(col 2:string, fieldStart 11, fieldLength 2) -> 6:int) -> 13:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 5:int, VectorUDFMinuteString(col 2:string, fieldStart 14, fieldLength 2) -> 6:int) -> 14:boolean, LongColEqualLongColumn(col 5:int, col 6:int)(children: VectorUDFSecondTimestamp(col 1:timestamp, field SECOND) -> 5:int, VectorUDFSecondString(col 2:string, fieldStart 17, fieldLength 2) -> 6:int) -> 15:boolean + projectedOutputColumnNums: [7, 6, 8, 9, 11, 10, 14, 15, 16] + selectExpressions: LongColEqualLongColumn(col 5:bigint, col 6:bigint)(children: VectorUDFUnixTimeStampTimestamp(col 1:timestamp) -> 5:bigint, VectorUDFUnixTimeStampString(col 2:string) -> 6:bigint) -> 7:boolean, LongColEqualLongColumn(col 5:int, col 8:int)(children: VectorUDFYearTimestamp(col 1:timestamp, field YEAR) -> 5:int, VectorUDFYearDate(col 6, field YEAR)(children: CastStringToDate(col 2:string) -> 6:date) -> 8:int) -> 6:boolean, LongColEqualLongColumn(col 5:int, col 9:int)(children: VectorUDFMonthTimestamp(col 1:timestamp, field MONTH) -> 5:int, VectorUDFMonthDate(col 8, field MONTH)(children: CastStringToDate(col 2:string) -> 8:date) -> 9:int) -> 8:boolean, LongColEqualLongColumn(col 5:int, col 10:int)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 5:int, VectorUDFDayOfMonthDate(col 9, field DAY_OF_MONTH)(children: CastStringToDate(col 2:string) -> 9:date) -> 10:int) -> 9:boolean, LongColEqualLongColumn(col 5:int, col 10:int)(children: VectorUDFDayOfMonthTimestamp(col 1:timestamp, field DAY_OF_MONTH) -> 5:int, VectorUDFDayOfMonthString(col 2:string, fieldStart 8, fieldLength 2) -> 10:int) -> 11:boolean, LongColEqualLongColumn(col 5:int, col 12:int)(children: VectorUDFWeekOfYearTimestamp(col 1:timestamp, field WEEK_OF_YEAR) -> 5:int, VectorUDFWeekOfYearDate(col 10, field WEEK_OF_YEAR)(children: CastStringToDate(col 2:string) -> 10:date) -> 12:int) -> 10:boolean, LongColEqualLongColumn(col 5:int, col 12:int)(children: VectorUDFHourTimestamp(col 1:timestamp, field HOUR_OF_DAY) -> 5:int, VectorUDFHourTimestamp(col 13:timestamp, field HOUR_OF_DAY)(children: VectorUDFAdaptor(CAST( stimestamp1 AS TIMESTAMP)) -> 13:timestamp) -> 12:int) -> 14:boolean, LongColEqualLongColumn(col 5:int, col 12:int)(children: VectorUDFMinuteTimestamp(col 1:timestamp, field MINUTE) -> 5:int, VectorUDFMinuteTimestamp(col 13:timestamp, field MINUTE)(children: VectorUDFAdaptor(CAST( stimestamp1 AS TIMESTAMP)) -> 13:timestamp) -> 12:int) -> 15:boolean, LongColEqualLongColumn(col 5:int, col 12:int)(children: VectorUDFSecondTimestamp(col 1:timestamp, field SECOND) -> 5:int, VectorUDFSecondTimestamp(col 13:timestamp, field SECOND)(children: VectorUDFAdaptor(CAST( stimestamp1 AS TIMESTAMP)) -> 13:timestamp) -> 12:int) -> 16:boolean Statistics: Num rows: 52 Data size: 3179 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: boolean) @@ -653,7 +653,7 @@ STAGE PLANS: featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false - usesVectorUDFAdaptor: false + usesVectorUDFAdaptor: true vectorized: true Reduce Vectorization: enabled: false @@ -804,13 +804,13 @@ STAGE PLANS: TableScan Vectorization: native: true Select Operator - expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(stimestamp1) (type: int), month(stimestamp1) (type: int), day(stimestamp1) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(stimestamp1) (type: int), hour(stimestamp1) (type: int), minute(stimestamp1) (type: int), second(stimestamp1) (type: int) + expressions: to_unix_timestamp(stimestamp1) (type: bigint), year(CAST( stimestamp1 AS DATE)) (type: int), month(CAST( stimestamp1 AS DATE)) (type: int), day(CAST( stimestamp1 AS DATE)) (type: int), dayofmonth(stimestamp1) (type: int), weekofyear(CAST( stimestamp1 AS DATE)) (type: int), hour(CAST( stimestamp1 AS TIMESTAMP)) (type: int), minute(CAST( stimestamp1 AS TIMESTAMP)) (type: int), second(CAST( stimestamp1 AS TIMESTAMP)) (type: int) outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8 Select Vectorization: className: VectorSelectOperator native: true - projectedOutputColumnNums: [2, 3, 4, 5, 6, 7, 8, 9, 10] - selectExpressions: VectorUDFUnixTimeStampString(col 0:string) -> 2:bigint, VectorUDFYearString(col 0:string, fieldStart 0, fieldLength 4) -> 3:int, VectorUDFMonthString(col 0:string, fieldStart 5, fieldLength 2) -> 4:int, VectorUDFDayOfMonthString(col 0:string, fieldStart 8, fieldLength 2) -> 5:int, VectorUDFDayOfMonthString(col 0:string, fieldStart 8, fieldLength 2) -> 6:int, VectorUDFWeekOfYearString(col 0:string) -> 7:int, VectorUDFHourString(col 0:string, fieldStart 11, fieldLength 2) -> 8:int, VectorUDFMinuteString(col 0:string, fieldStart 14, fieldLength 2) -> 9:int, VectorUDFSecondString(col 0:string, fieldStart 17, fieldLength 2) -> 10:int + projectedOutputColumnNums: [2, 4, 5, 6, 3, 8, 7, 10, 11] + selectExpressions: VectorUDFUnixTimeStampString(col 0:string) -> 2:bigint, VectorUDFYearDate(col 3, field YEAR)(children: CastStringToDate(col 0:string) -> 3:date) -> 4:int, VectorUDFMonthDate(col 3, field MONTH)(children: CastStringToDate(col 0:string) -> 3:date) -> 5:int, VectorUDFDayOfMonthDate(col 3, field DAY_OF_MONTH)(children: CastStringToDate(col 0:string) -> 3:date) -> 6:int, VectorUDFDayOfMonthString(col 0:string, fieldStart 8, fieldLength 2) -> 3:int, VectorUDFWeekOfYearDate(col 7, field WEEK_OF_YEAR)(children: CastStringToDate(col 0:string) -> 7:date) -> 8:int, VectorUDFHourTimestamp(col 9:timestamp, field HOUR_OF_DAY)(children: VectorUDFAdaptor(CAST( stimestamp1 AS TIMESTAMP)) -> 9:timestamp) -> 7:int, VectorUDFMinuteTimestamp(col 9:timestamp, field MINUTE)(children: VectorUDFAdaptor(CAST( stimestamp1 AS TIMESTAMP)) -> 9:timestamp) -> 10:int, VectorUDFSecondTimestamp(col 9:timestamp, field SECOND)(children: VectorUDFAdaptor(CAST( stimestamp1 AS TIMESTAMP)) -> 9:timestamp) -> 11:int Statistics: Num rows: 3 Data size: 294 Basic stats: COMPLETE Column stats: NONE Reduce Output Operator key expressions: _col0 (type: bigint) @@ -830,7 +830,7 @@ STAGE PLANS: featureSupportInUse: [] inputFileFormats: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat allNative: false - usesVectorUDFAdaptor: false + usesVectorUDFAdaptor: true vectorized: true Reduce Vectorization: enabled: false @@ -885,7 +885,7 @@ ORDER BY c1 POSTHOOK: type: QUERY POSTHOOK: Input: default@alltypesorc_wrong #### A masked pattern was here #### -NULL NULL NULL NULL NULL NULL NULL NULL NULL +NULL 2 11 30 NULL 48 NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL NULL PREHOOK: query: EXPLAIN VECTORIZATION EXPRESSION SELECT