diff --git ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java index e1f3c3d..22861ac 100644 --- ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java +++ ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorizedRowBatchCtx.java @@ -29,6 +29,8 @@ import java.util.regex.Matcher; import java.util.regex.Pattern; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.common.type.Decimal128; import org.apache.hadoop.hive.common.type.HiveDecimal; @@ -63,7 +65,9 @@ * with the partition column. */ public class VectorizedRowBatchCtx { - + + private static final Log LOG = LogFactory.getLog(VectorizedRowBatchCtx.class.getName()); + // OI for raw row data (EG without partition cols) private StructObjectInspector rawRowOI; @@ -223,6 +227,9 @@ public void init(Configuration hiveConf, FileSplit split) throws ClassNotFoundEx convert(partSpec.get(key)); partitionTypes.put(key, TypeInfoFactory.getPrimitiveTypeInfo(partKeyTypes[i]).getPrimitiveCategory()); } + if (LOG.isDebugEnabled()) { + LOG.debug("Partition column: name: " + key + ", value: " + objectVal + ", type: " + partitionTypes.get(key)); + } partitionValues.put(key, objectVal); partObjectInspectors.add(objectInspector); } @@ -263,7 +270,7 @@ public VectorizedRowBatch createVectorizedRowBatch() throws HiveException // in the included list. if ((colsToInclude == null) || colsToInclude.contains(j) || ((partitionValues != null) && - (partitionValues.get(fieldRefs.get(j).getFieldName()) != null))) { + partitionValues.containsKey(fieldRefs.get(j).getFieldName()))) { ObjectInspector foi = fieldRefs.get(j).getFieldObjectInspector(); switch (foi.getCategory()) { case PRIMITIVE: { diff --git ql/src/test/queries/clientpositive/vector_non_string_partition.q ql/src/test/queries/clientpositive/vector_non_string_partition.q new file mode 100644 index 0000000..fc1dc6d --- /dev/null +++ ql/src/test/queries/clientpositive/vector_non_string_partition.q @@ -0,0 +1,17 @@ +SET hive.vectorized.execution.enabled=true; +CREATE TABLE non_string_part(cint INT, cstring1 STRING, cdouble DOUBLE, ctimestamp1 TIMESTAMP) PARTITIONED BY (ctinyint tinyint) STORED AS ORC; +SET hive.exec.dynamic.partition.mode=nonstrict; +SET hive.exec.dynamic.partition=true; + +INSERT OVERWRITE TABLE non_string_part PARTITION(ctinyint) SELECT cint, cstring1, cdouble, ctimestamp1, ctinyint fROM alltypesorc +WHERE ctinyint IS NULL AND cdouble IS NOT NULL ORDER BY cdouble; + +SHOW PARTITIONS non_string_part; + +EXPLAIN SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10; + +SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10; + +EXPLAIN SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10; + +SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10; diff --git ql/src/test/results/clientpositive/vector_non_string_partition.q.out ql/src/test/results/clientpositive/vector_non_string_partition.q.out new file mode 100644 index 0000000..7cd2625 --- /dev/null +++ ql/src/test/results/clientpositive/vector_non_string_partition.q.out @@ -0,0 +1,180 @@ +PREHOOK: query: CREATE TABLE non_string_part(cint INT, cstring1 STRING, cdouble DOUBLE, ctimestamp1 TIMESTAMP) PARTITIONED BY (ctinyint tinyint) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +POSTHOOK: query: CREATE TABLE non_string_part(cint INT, cstring1 STRING, cdouble DOUBLE, ctimestamp1 TIMESTAMP) PARTITIONED BY (ctinyint tinyint) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@non_string_part +PREHOOK: query: INSERT OVERWRITE TABLE non_string_part PARTITION(ctinyint) SELECT cint, cstring1, cdouble, ctimestamp1, ctinyint fROM alltypesorc +WHERE ctinyint IS NULL AND cdouble IS NOT NULL ORDER BY cdouble +PREHOOK: type: QUERY +PREHOOK: Input: default@alltypesorc +PREHOOK: Output: default@non_string_part +POSTHOOK: query: INSERT OVERWRITE TABLE non_string_part PARTITION(ctinyint) SELECT cint, cstring1, cdouble, ctimestamp1, ctinyint fROM alltypesorc +WHERE ctinyint IS NULL AND cdouble IS NOT NULL ORDER BY cdouble +POSTHOOK: type: QUERY +POSTHOOK: Input: default@alltypesorc +POSTHOOK: Output: default@non_string_part@ctinyint=__HIVE_DEFAULT_PARTITION__ +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] +PREHOOK: query: SHOW PARTITIONS non_string_part +PREHOOK: type: SHOWPARTITIONS +PREHOOK: Input: default@non_string_part +POSTHOOK: query: SHOW PARTITIONS non_string_part +POSTHOOK: type: SHOWPARTITIONS +POSTHOOK: Input: default@non_string_part +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] +ctinyint=__HIVE_DEFAULT_PARTITION__ +PREHOOK: query: EXPLAIN SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: non_string_part + Statistics: Num rows: 3073 Data size: 339150 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cint > 0) (type: boolean) + Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int), ctinyint (type: tinyint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int) + sort order: + + Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: tinyint) + Execution mode: vectorized + Reduce Operator Tree: + Extract + Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 1100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 1100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + +PREHOOK: query: SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@non_string_part +PREHOOK: Input: default@non_string_part@ctinyint=__HIVE_DEFAULT_PARTITION__ +#### A masked pattern was here #### +POSTHOOK: query: SELECT cint, ctinyint FROM non_string_part WHERE cint > 0 ORDER BY cint LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@non_string_part +POSTHOOK: Input: default@non_string_part@ctinyint=__HIVE_DEFAULT_PARTITION__ +#### A masked pattern was here #### +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] +762 NULL +762 NULL +6981 NULL +6981 NULL +6981 NULL +86028 NULL +504142 NULL +799471 NULL +1248059 NULL +1286921 NULL +PREHOOK: query: EXPLAIN SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 is a root stage + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: non_string_part + Statistics: Num rows: 3073 Data size: 339150 Basic stats: COMPLETE Column stats: NONE + Filter Operator + predicate: (cint > 0) (type: boolean) + Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int), cstring1 (type: string) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: int), _col1 (type: string) + sort order: ++ + Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE + value expressions: _col0 (type: int), _col1 (type: string) + Execution mode: vectorized + Reduce Operator Tree: + Extract + Statistics: Num rows: 1024 Data size: 113013 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 10 + Statistics: Num rows: 10 Data size: 1100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 10 Data size: 1100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 10 + +PREHOOK: query: SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 +PREHOOK: type: QUERY +PREHOOK: Input: default@non_string_part +PREHOOK: Input: default@non_string_part@ctinyint=__HIVE_DEFAULT_PARTITION__ +#### A masked pattern was here #### +POSTHOOK: query: SELECT cint, cstring1 FROM non_string_part WHERE cint > 0 ORDER BY cint, cstring1 LIMIT 10 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@non_string_part +POSTHOOK: Input: default@non_string_part@ctinyint=__HIVE_DEFAULT_PARTITION__ +#### A masked pattern was here #### +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cdouble SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cint SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).cstring1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:cstring1, type:string, comment:null), ] +POSTHOOK: Lineage: non_string_part PARTITION(ctinyint=__HIVE_DEFAULT_PARTITION__).ctimestamp1 SIMPLE [(alltypesorc)alltypesorc.FieldSchema(name:ctimestamp1, type:timestamp, comment:null), ] +762 3WsVeqb28VWEEOLI8ail +762 40ks5556SV +6981 1FNNhmiFLGw425NA13g +6981 o5mb0QP5Y48Qd4vdB0 +6981 sF2CRfgt2K +86028 T2o8XRFAL0HC4ikDQnfoCymw +504142 PlOxor04p5cvVl +799471 2fu24 +1248059 Uhps6mMh3IfHB3j7yH62K +1286921 ODLrXI8882q8LS8