diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java index bb066af..8dc9df0 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/serde/ArrayWritableObjectInspector.java @@ -106,10 +106,9 @@ private ObjectInspector getObjectInspector(final TypeInfo typeInfo) { return PrimitiveObjectInspectorFactory.writableBinaryObjectInspector; }else if (typeInfo.equals(TypeInfoFactory.dateTypeInfo)) { throw new UnsupportedOperationException("Parquet does not support date. See HIVE-6384"); - } else if (typeInfo.getTypeName().toLowerCase().startsWith(serdeConstants.CHAR_TYPE_NAME)) { - return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector((CharTypeInfo) typeInfo); - } else if (typeInfo.getTypeName().toLowerCase().startsWith(serdeConstants.VARCHAR_TYPE_NAME)) { - return PrimitiveObjectInspectorFactory.getPrimitiveWritableObjectInspector((VarcharTypeInfo) typeInfo); + } else if (typeInfo.getTypeName().toLowerCase().startsWith(serdeConstants.CHAR_TYPE_NAME) || + typeInfo.getTypeName().toLowerCase().startsWith(serdeConstants.VARCHAR_TYPE_NAME)) { + return PrimitiveObjectInspectorFactory.writableStringObjectInspector; } else { throw new UnsupportedOperationException("Unknown field type: " + typeInfo); } diff --git ql/src/test/queries/clientpositive/groupby_par_char.q ql/src/test/queries/clientpositive/groupby_par_char.q new file mode 100644 index 0000000..03e9cb9 --- /dev/null +++ ql/src/test/queries/clientpositive/groupby_par_char.q @@ -0,0 +1,49 @@ +SET hive.vectorized.execution.enabled=false; +drop table char_2; + +create table char_2 ( + key char(10), + value char(20) +) stored as parquet; + +insert overwrite table char_2 select * from src; + +select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value asc +limit 5; + +explain select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5; + +-- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5; + +select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value desc +limit 5; + +explain select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5; + +-- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5; + +drop table char_2; \ No newline at end of file diff --git ql/src/test/results/clientpositive/groupby_par_char.q.out ql/src/test/results/clientpositive/groupby_par_char.q.out new file mode 100644 index 0000000..8c3a0fe --- /dev/null +++ ql/src/test/results/clientpositive/groupby_par_char.q.out @@ -0,0 +1,294 @@ +PREHOOK: query: drop table char_2 +PREHOOK: type: DROPTABLE +POSTHOOK: query: drop table char_2 +POSTHOOK: type: DROPTABLE +PREHOOK: query: create table char_2 ( + key char(10), + value char(20) +) stored as parquet +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@char_2 +POSTHOOK: query: create table char_2 ( + key char(10), + value char(20) +) stored as parquet +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@char_2 +PREHOOK: query: insert overwrite table char_2 select * from src +PREHOOK: type: QUERY +PREHOOK: Input: default@src +PREHOOK: Output: default@char_2 +POSTHOOK: query: insert overwrite table char_2 select * from src +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +POSTHOOK: Output: default@char_2 +POSTHOOK: Lineage: char_2.key SIMPLE [(src)src.FieldSchema(name:key, type:string, comment:default), ] +POSTHOOK: Lineage: char_2.value SIMPLE [(src)src.FieldSchema(name:value, type:string, comment:default), ] +PREHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value asc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value asc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +val_0 0 3 +val_10 10 1 +val_100 200 2 +val_103 206 2 +val_104 208 2 +PREHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: char_2 + Statistics: Num rows: 500 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), UDFToInteger(key) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col1), count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 250 Data size: 500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 10 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: -- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_2 +#### A masked pattern was here #### +POSTHOOK: query: -- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value asc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_2 +#### A masked pattern was here #### +val_0 0 3 +val_10 10 1 +val_100 200 2 +val_103 206 2 +val_104 208 2 +PREHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value desc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@src +#### A masked pattern was here #### +POSTHOOK: query: select value, sum(cast(key as int)), count(*) numrows +from src +group by value +order by value desc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@src +#### A masked pattern was here #### +val_98 196 2 +val_97 194 2 +val_96 96 1 +val_95 190 2 +val_92 92 1 +PREHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5 +PREHOOK: type: QUERY +POSTHOOK: query: explain select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: char_2 + Statistics: Num rows: 500 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: value (type: string), UDFToInteger(key) (type: int) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 500 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: sum(_col1), count() + keys: _col0 (type: string) + mode: hash + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 500 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 500 Data size: 1000 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reduce Operator Tree: + Group By Operator + aggregations: sum(VALUE._col0), count(VALUE._col1) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 500 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: - + Statistics: Num rows: 250 Data size: 500 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint), _col2 (type: bigint) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint), VALUE._col1 (type: bigint) + outputColumnNames: _col0, _col1, _col2 + Statistics: Num rows: 250 Data size: 500 Basic stats: COMPLETE Column stats: NONE + Limit + Number of rows: 5 + Statistics: Num rows: 5 Data size: 10 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 5 Data size: 10 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: 5 + Processor Tree: + ListSink + +PREHOOK: query: -- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5 +PREHOOK: type: QUERY +PREHOOK: Input: default@char_2 +#### A masked pattern was here #### +POSTHOOK: query: -- should match the query from src +select value, sum(cast(key as int)), count(*) numrows +from char_2 +group by value +order by value desc +limit 5 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@char_2 +#### A masked pattern was here #### +val_98 196 2 +val_97 194 2 +val_96 96 1 +val_95 190 2 +val_92 92 1 +PREHOOK: query: drop table char_2 +PREHOOK: type: DROPTABLE +PREHOOK: Input: default@char_2 +PREHOOK: Output: default@char_2 +POSTHOOK: query: drop table char_2 +POSTHOOK: type: DROPTABLE +POSTHOOK: Input: default@char_2 +POSTHOOK: Output: default@char_2