diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index e1f3c3d..22861ac 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -29,6 +29,8 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.type.Decimal128; import org.apache.hadoop.hive.common.type.HiveDecimal; @@ -63,7 +65,9 @@ * with the partition column. */ public class VectorizedRowBatchCtx { - + + private static final Log LOG = LogFactory.getLog(VectorizedRowBatchCtx.class.getName()); + // OI for raw row data (EG without partition cols) private StructObjectInspector rawRowOI; @@ -223,6 +227,9 @@ public void init(Configuration hiveConf, FileSplit split) throws ClassNotFoundEx convert(partSpec.get(key)); partitionTypes.put(key, TypeInfoFactory.getPrimitiveTypeInfo(partKeyTypes[i]).getPrimitiveCategory()); } + if (LOG.isDebugEnabled()) { + LOG.debug("Partition column: name: " + key + ", value: " + objectVal + ", type: " + partitionTypes.get(key)); + } partitionValues.put(key, objectVal); partObjectInspectors.add(objectInspector); } @@ -263,7 +270,7 @@ public VectorizedRowBatch createVectorizedRowBatch() throws HiveException // in the included list. if ((colsToInclude == null) || colsToInclude.contains(j) || ((partitionValues != null) && - (partitionValues.get(fieldRefs.get(j).getFieldName()) != null))) { + partitionValues.containsKey(fieldRefs.get(j).getFieldName()))) { ObjectInspector foi = fieldRefs.get(j).getFieldObjectInspector(); switch (foi.getCategory()) { case PRIMITIVE: { diff --git ql/src/test/queries/clientpositive/vector_non_string_partition.q ql/src/test/queries/clientpositive/vector_non_string_partition.q new file mode 100644 index 0000000..e86f77c --- /dev/null +++ ql/src/test/queries/clientpositive/vector_non_string_partition.q @@ -0,0 +1,16 @@ +SET hive.vectorized.execution.enabled=true; +CREATE TABLE non_string_part(cint INT, cstring1 STRING, cdouble DOUBLE, ctimestamp1 TIMESTAMP) PARTITIONED BY (ctinyint tinyint) STORED AS ORC; +SET hive.exec.dynamic.partition.mode=nonstrict; +SET hive.exec.dynamic.partition=true; + +INSERT OVERWRITE TABLE non_string_part PARTITION(ctinyint) SELECT cint, cstring1, cdouble, ctimestamp1, ctinyint fROM alltypesorc WHERE ctinyint IS NULL; + +SHOW PARTITIONS non_string_part; + +EXPLAIN SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 LIMIT 10; + +SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 LIMIT 10; + +EXPLAIN SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 LIMIT 10; + +SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 LIMIT 10; diff --git ql/src/test/results/clientpositive/vector_non_string_partition.q.out ql/src/test/results/clientpositive/vector_non_string_partition.q.out new file mode 100644 index 0000000..1552fe4 --- /dev/null +++ ql/src/test/results/clientpositive/vector_non_string_partition.q.out @@ -0,0 +1,162 @@ +PREHOOK: query: CREATE TABLE non_string_part(cint INT, cstring1 STRING, cdouble DOUBLE, ctimestamp1 TIMESTAMP) PARTITIONED BY (ctinyint tinyint) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE non_string_part(cint INT, cstring1 STRING, cdouble DOUBLE, ctimestamp1 TIMESTAMP) PARTITIONED BY (ctinyint tinyint) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@non_string_part +PREHOOK: query: INSERT OVERWRITE TABLE non_string_part PARTITION(ctinyint) SELECT cint, cstring1, cdouble, ctimestamp1, ctinyint fROM alltypesorc WHERE ctinyint IS NULL +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@non_string_part +POSTHOOK: query: INSERT OVERWRITE TABLE non_string_part PARTITION(ctinyint) SELECT cint, cstring1, cdouble, ctimestamp1, ctinyint fROM alltypesorc WHERE ctinyint IS NULL +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@non_string_part@ctinyint=__HIVE_DEFAULT_PARTITION__ +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] +PREHOOK: query: SHOW PARTITIONS non_string_part +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@non_string_part +POSTHOOK: query: SHOW PARTITIONS non_string_part +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@non_string_part +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] +ctinyint=__HIVE_DEFAULT_PARTITION__ +PREHOOK: query: EXPLAIN SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: non_string_part + Statistics: Num rows: 3115 Data size: 343434 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cint > 0) (type: boolean) + Statistics: Num rows: 1038 Data size: 114441 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int), ctinyint (type: tinyint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1038 Data size: 114441 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 1100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 1100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: 10 + +PREHOOK: query: SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@non_string_part +PREHOOK: Input: default@non_string_part@ctinyint=__HIVE_DEFAULT_PARTITION__ +#### A masked pattern was here #### +POSTHOOK: query: SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@non_string_part +POSTHOOK: Input: default@non_string_part@ctinyint=__HIVE_DEFAULT_PARTITION__ +#### A masked pattern was here #### +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] +250815419 NULL +428844835 NULL +332314412 NULL +180909333 NULL +532048781 NULL +246423894 NULL +430668873 NULL +955691407 NULL +84404564 NULL +690559558 NULL +PREHOOK: query: EXPLAIN SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: non_string_part + Statistics: Num rows: 3115 Data size: 343434 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cint > 0) (type: boolean) + Statistics: Num rows: 1038 Data size: 114441 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int), cstring1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1038 Data size: 114441 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 1100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 1100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized + + Stage: Stage-0 + Fetch Operator + limit: 10 + +PREHOOK: query: SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@non_string_part +PREHOOK: Input: default@non_string_part@ctinyint=__HIVE_DEFAULT_PARTITION__ +#### A masked pattern was here #### +POSTHOOK: query: SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@non_string_part +POSTHOOK: Input: default@non_string_part@ctinyint=__HIVE_DEFAULT_PARTITION__ +#### A masked pattern was here #### +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] +250815419 11F2M +428844835 3c4ER4QtMJwx83mT5Xp +332314412 k01Ir4eR2jd +180909333 Kamb1E +532048781 64xc3K542PGU2l2 +246423894 Q1JAdUlCVORmR0Q5X5Vf5u6 +430668873 yy2GiGM +955691407 fv6s5tGQJO45BvV4m8C +84404564 X7vKpt286BLxBIgQ +690559558 tphLsg0p