commit 1e96c29ef8d1617810d1dd238c146622a247a0da Author: Vihang Karajgaonkar Date: Fri Jun 24 14:12:37 2016 -0700 HIVE-14085 : Allow type widening primitive conversion on hive/parquet tables diff --git a/data/files/parquet_type_promotion.txt b/data/files/parquet_type_promotion.txt index dc3e13eb733924dddb6ae38aad56f00697346d6a..a212d9cbab656b78cf9c3be01f5c427d3d635d6a 100644 --- a/data/files/parquet_type_promotion.txt +++ b/data/files/parquet_type_promotion.txt @@ -1,4 +1,4 @@ -100|5643|0.3|0.7|k1:11|7,17,22|10,20|k11:4.0|2.3,3.0,5.5|5.7,4.8 -200|5643|0.4|0.8|k2:14|8,17,24|20,20|v11:5.0|3.3,3.1,5.6|5.8,4.7 -300|7643|0.4|0.9|k3:12|9,17,25|30,60|b11:6.0|4.3,3.2,5.7|5.9,4.6 -400|8643|0.4|0.5|k4:15|7,18,27|50,70|d11:8.0|6.3,3.3,5.8|5.0,4.5 +100|150|155|5643|6666|0.3|0.7|k1:11|k1:71|7,17,22|9,11,13|10,20|k11:4.0|2.3,3.0,5.5|5.7,4.8 +200|250|255|5643|7777|0.4|0.8|k2:14|k2:13|8,17,24|15,17,19|20,20|v11:5.0|3.3,3.1,5.6|5.8,4.7 +300|350|355|7643|8888|0.4|0.9|k3:12|k3:19|9,17,25|21,23,25|30,60|b11:6.0|4.3,3.2,5.7|5.9,4.6 +400|450|455|8643|9999|0.4|0.5|k4:15|k4:23|7,18,27|27,29,31|50,70|d11:8.0|6.3,3.3,5.8|5.0,4.5 diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java index ca896407edbc8e523567c939d329bd7f253921e0..76d93b8e02a98c95da8a534f2820cd3e77b4bb43 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/convert/ETypeConverter.java @@ -21,6 +21,7 @@ import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime; import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils; +import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.io.DateWritable; import org.apache.hadoop.hive.serde2.io.DoubleWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; @@ -50,7 +51,6 @@ EDOUBLE_CONVERTER(Double.TYPE) { @Override - PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent, TypeInfo hiveTypeInfo) { return new PrimitiveConverter() { @Override @@ -93,27 +93,62 @@ public void addFloat(final float value) { }, EINT32_CONVERTER(Integer.TYPE) { @Override - PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent, TypeInfo hiveTypeInfo) { - if (hiveTypeInfo != null && hiveTypeInfo.equals(TypeInfoFactory.longTypeInfo)) { - return new PrimitiveConverter() { - @Override - public void addInt(final int value) { - parent.set(index, new LongWritable((long)value)); - } - }; - } else { - return new PrimitiveConverter() { - @Override - public void addInt(final int value) { - parent.set(index, new IntWritable(value)); - } - }; + PrimitiveConverter getConverter(final PrimitiveType type, final int index, + final ConverterParent parent, TypeInfo hiveTypeInfo) { + if (hiveTypeInfo != null) { + switch (hiveTypeInfo.getTypeName()) { + case serdeConstants.BIGINT_TYPE_NAME: + return new PrimitiveConverter() { + @Override + public void addInt(final int value) { + parent.set(index, new LongWritable((long) value)); + } + }; + case serdeConstants.FLOAT_TYPE_NAME: + return new PrimitiveConverter() { + @Override + public void addInt(final int value) { + parent.set(index, new FloatWritable((float) value)); + } + }; + case serdeConstants.DOUBLE_TYPE_NAME: + return new PrimitiveConverter() { + @Override + public void addInt(final int value) { + parent.set(index, new DoubleWritable((float) value)); + } + }; + } } + return new PrimitiveConverter() { + @Override + public void addInt(final int value) { + parent.set(index, new IntWritable(value)); + } + }; } }, EINT64_CONVERTER(Long.TYPE) { @Override PrimitiveConverter getConverter(final PrimitiveType type, final int index, final ConverterParent parent, TypeInfo hiveTypeInfo) { + if(hiveTypeInfo != null) { + switch(hiveTypeInfo.getTypeName()) { + case serdeConstants.FLOAT_TYPE_NAME: + return new PrimitiveConverter() { + @Override + public void addLong(final long value) { + parent.set(index, new FloatWritable(value)); + } + }; + case serdeConstants.DOUBLE_TYPE_NAME: + return new PrimitiveConverter() { + @Override + public void addLong(final long value) { + parent.set(index, new DoubleWritable(value)); + } + }; + } + } return new PrimitiveConverter() { @Override public void addLong(final long value) { diff --git a/ql/src/test/queries/clientpositive/parquet_columnar.q b/ql/src/test/queries/clientpositive/parquet_columnar.q index d6daeec577d3eda7b8bb122ea6b0b555c5261227..ca6da816b0eb4599b0f6298bf8da15bb9c2c7eea 100644 --- a/ql/src/test/queries/clientpositive/parquet_columnar.q +++ b/ql/src/test/queries/clientpositive/parquet_columnar.q @@ -32,3 +32,7 @@ SELECT * FROM parquet_columnar_access; ALTER TABLE parquet_columnar_access REPLACE COLUMNS (s1 string, x1 bigint, y1 int, f1 double); SELECT * FROM parquet_columnar_access; + +ALTER TABLE parquet_columnar_access REPLACE COLUMNS (s1 string, x1 float, y1 float, f1 double); + +SELECT * FROM parquet_columnar_access; \ No newline at end of file diff --git a/ql/src/test/queries/clientpositive/parquet_type_promotion.q b/ql/src/test/queries/clientpositive/parquet_type_promotion.q index c50221ddd1ae14ab11d960777b4e80503fdbd7eb..7edfc35e19a818246ac3a247f570e99a20012ec2 100644 --- a/ql/src/test/queries/clientpositive/parquet_type_promotion.q +++ b/ql/src/test/queries/clientpositive/parquet_type_promotion.q @@ -5,11 +5,16 @@ SET hive.metastore.disallow.incompatible.col.type.changes=false; CREATE TABLE parquet_type_promotion_staging ( cint int, + cint2 int, + cint3 int, clong bigint, + clong2 bigint, cfloat float, cdouble double, m1 map, + m2 map, l1 array, + l2 array, st1 struct, fm1 map, fl1 array, @@ -25,16 +30,20 @@ SELECT * FROM parquet_type_promotion_staging; CREATE TABLE parquet_type_promotion ( cint int, + cint2 int, + cint3 int, clong bigint, + clong2 bigint, cfloat float, cdouble double, m1 map, + m2 map, l1 array, + l2 array, st1 struct, fm1 map, fl1 array, fst1 struct - ) STORED AS PARQUET; INSERT OVERWRITE TABLE parquet_type_promotion @@ -44,11 +53,16 @@ SELECT * FROM parquet_type_promotion; ALTER TABLE parquet_type_promotion REPLACE COLUMNS( cint bigint, - clong bigint, + cint2 float, + cint3 double, + clong float, + clong2 double, cfloat double, cdouble double, m1 map, + m2 map, l1 array, + l2 array, st1 struct, fm1 map, fl1 array, diff --git a/ql/src/test/results/clientpositive/parquet_columnar.q.out b/ql/src/test/results/clientpositive/parquet_columnar.q.out index fd28f5ce241d0661a10c18430522ccbb6f0fc88f..a426d5eb3f46a4297c39e9112f199f8f809d0150 100644 --- a/ql/src/test/results/clientpositive/parquet_columnar.q.out +++ b/ql/src/test/results/clientpositive/parquet_columnar.q.out @@ -174,3 +174,40 @@ POSTHOOK: Input: default@parquet_columnar_access 1cde18 1 2 1.2999999523162842 1fgh19 2 3 1.399999976158142 1ijk20 3 4 1.0 +PREHOOK: query: ALTER TABLE parquet_columnar_access REPLACE COLUMNS (s1 string, x1 float, y1 float, f1 double) +PREHOOK: type: ALTERTABLE_REPLACECOLS +PREHOOK: Input: default@parquet_columnar_access +PREHOOK: Output: default@parquet_columnar_access +POSTHOOK: query: ALTER TABLE parquet_columnar_access REPLACE COLUMNS (s1 string, x1 float, y1 float, f1 double) +POSTHOOK: type: ALTERTABLE_REPLACECOLS +POSTHOOK: Input: default@parquet_columnar_access +POSTHOOK: Output: default@parquet_columnar_access +PREHOOK: query: SELECT * FROM parquet_columnar_access +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_columnar_access +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_columnar_access +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_columnar_access +#### A masked pattern was here #### +1abc00 1.0 2.0 1.0 +1def01 2.0 3.0 1.100000023841858 +1ghi02 3.0 4.0 1.2000000476837158 +1jkl03 1.0 2.0 1.2999999523162842 +1mno04 2.0 3.0 1.399999976158142 +1pqr05 3.0 4.0 1.0 +1stu06 1.0 2.0 1.100000023841858 +1vwx07 2.0 3.0 1.2000000476837158 +1yza08 3.0 4.0 1.2999999523162842 +1bcd09 1.0 2.0 1.399999976158142 +1efg10 2.0 3.0 1.0 +1hij11 3.0 4.0 1.100000023841858 +1klm12 1.0 2.0 1.2000000476837158 +1nop13 2.0 3.0 1.2999999523162842 +1qrs14 3.0 4.0 1.399999976158142 +1tuv15 1.0 2.0 1.0 +1wxy16 2.0 3.0 1.100000023841858 +1zab17 3.0 4.0 1.2000000476837158 +1cde18 1.0 2.0 1.2999999523162842 +1fgh19 2.0 3.0 1.399999976158142 +1ijk20 3.0 4.0 1.0 diff --git a/ql/src/test/results/clientpositive/parquet_type_promotion.q.out b/ql/src/test/results/clientpositive/parquet_type_promotion.q.out index 91c3fff57de418db975af18b779bc46e95a15501..71c271dc44587f84f5e65037414693f740b0de56 100644 --- a/ql/src/test/results/clientpositive/parquet_type_promotion.q.out +++ b/ql/src/test/results/clientpositive/parquet_type_promotion.q.out @@ -8,11 +8,16 @@ POSTHOOK: query: DROP TABLE parquet_type_promotion POSTHOOK: type: DROPTABLE PREHOOK: query: CREATE TABLE parquet_type_promotion_staging ( cint int, + cint2 int, + cint3 int, clong bigint, + clong2 bigint, cfloat float, cdouble double, m1 map, + m2 map, l1 array, + l2 array, st1 struct, fm1 map, fl1 array, @@ -26,11 +31,16 @@ PREHOOK: Output: database:default PREHOOK: Output: default@parquet_type_promotion_staging POSTHOOK: query: CREATE TABLE parquet_type_promotion_staging ( cint int, + cint2 int, + cint3 int, clong bigint, + clong2 bigint, cfloat float, cdouble double, m1 map, + m2 map, l1 array, + l2 array, st1 struct, fm1 map, fl1 array, @@ -58,38 +68,46 @@ POSTHOOK: query: SELECT * FROM parquet_type_promotion_staging POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_type_promotion_staging #### A masked pattern was here #### -100 5643 0.3 0.7 {"k1":11} [7,17,22] {"c1":10,"c2":20} {"k11":4.0} [2.3,3.0,5.5] {"c1":5.7,"c2":4.8} -200 5643 0.4 0.8 {"k2":14} [8,17,24] {"c1":20,"c2":20} {"v11":5.0} [3.3,3.1,5.6] {"c1":5.8,"c2":4.7} -300 7643 0.4 0.9 {"k3":12} [9,17,25] {"c1":30,"c2":60} {"b11":6.0} [4.3,3.2,5.7] {"c1":5.9,"c2":4.6} -400 8643 0.4 0.5 {"k4":15} [7,18,27] {"c1":50,"c2":70} {"d11":8.0} [6.3,3.3,5.8] {"c1":5.0,"c2":4.5} +100 150 155 5643 6666 0.3 0.7 {"k1":11} {"k1":71} [7,17,22] [9,11,13] {"c1":10,"c2":20} {"k11":4.0} [2.3,3.0,5.5] {"c1":5.7,"c2":4.8} +200 250 255 5643 7777 0.4 0.8 {"k2":14} {"k2":13} [8,17,24] [15,17,19] {"c1":20,"c2":20} {"v11":5.0} [3.3,3.1,5.6] {"c1":5.8,"c2":4.7} +300 350 355 7643 8888 0.4 0.9 {"k3":12} {"k3":19} [9,17,25] [21,23,25] {"c1":30,"c2":60} {"b11":6.0} [4.3,3.2,5.7] {"c1":5.9,"c2":4.6} +400 450 455 8643 9999 0.4 0.5 {"k4":15} {"k4":23} [7,18,27] [27,29,31] {"c1":50,"c2":70} {"d11":8.0} [6.3,3.3,5.8] {"c1":5.0,"c2":4.5} PREHOOK: query: CREATE TABLE parquet_type_promotion ( cint int, + cint2 int, + cint3 int, clong bigint, + clong2 bigint, cfloat float, cdouble double, m1 map, + m2 map, l1 array, + l2 array, st1 struct, fm1 map, fl1 array, fst1 struct - ) STORED AS PARQUET PREHOOK: type: CREATETABLE PREHOOK: Output: database:default PREHOOK: Output: default@parquet_type_promotion POSTHOOK: query: CREATE TABLE parquet_type_promotion ( cint int, + cint2 int, + cint3 int, clong bigint, + clong2 bigint, cfloat float, cdouble double, m1 map, + m2 map, l1 array, + l2 array, st1 struct, fm1 map, fl1 array, fst1 struct - ) STORED AS PARQUET POSTHOOK: type: CREATETABLE POSTHOOK: Output: database:default @@ -107,12 +125,17 @@ POSTHOOK: Output: default@parquet_type_promotion POSTHOOK: Lineage: parquet_type_promotion.cdouble SIMPLE [(parquet_type_promotion_staging)parquet_type_promotion_staging.FieldSchema(name:cdouble, type:double, comment:null), ] POSTHOOK: Lineage: parquet_type_promotion.cfloat SIMPLE [(parquet_type_promotion_staging)parquet_type_promotion_staging.FieldSchema(name:cfloat, type:float, comment:null), ] POSTHOOK: Lineage: parquet_type_promotion.cint SIMPLE [(parquet_type_promotion_staging)parquet_type_promotion_staging.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: parquet_type_promotion.cint2 SIMPLE [(parquet_type_promotion_staging)parquet_type_promotion_staging.FieldSchema(name:cint2, type:int, comment:null), ] +POSTHOOK: Lineage: parquet_type_promotion.cint3 SIMPLE [(parquet_type_promotion_staging)parquet_type_promotion_staging.FieldSchema(name:cint3, type:int, comment:null), ] POSTHOOK: Lineage: parquet_type_promotion.clong SIMPLE [(parquet_type_promotion_staging)parquet_type_promotion_staging.FieldSchema(name:clong, type:bigint, comment:null), ] +POSTHOOK: Lineage: parquet_type_promotion.clong2 SIMPLE [(parquet_type_promotion_staging)parquet_type_promotion_staging.FieldSchema(name:clong2, type:bigint, comment:null), ] POSTHOOK: Lineage: parquet_type_promotion.fl1 SIMPLE [(parquet_type_promotion_staging)parquet_type_promotion_staging.FieldSchema(name:fl1, type:array, comment:null), ] POSTHOOK: Lineage: parquet_type_promotion.fm1 SIMPLE [(parquet_type_promotion_staging)parquet_type_promotion_staging.FieldSchema(name:fm1, type:map, comment:null), ] POSTHOOK: Lineage: parquet_type_promotion.fst1 SIMPLE [(parquet_type_promotion_staging)parquet_type_promotion_staging.FieldSchema(name:fst1, type:struct, comment:null), ] POSTHOOK: Lineage: parquet_type_promotion.l1 SIMPLE [(parquet_type_promotion_staging)parquet_type_promotion_staging.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: parquet_type_promotion.l2 SIMPLE [(parquet_type_promotion_staging)parquet_type_promotion_staging.FieldSchema(name:l2, type:array, comment:null), ] POSTHOOK: Lineage: parquet_type_promotion.m1 SIMPLE [(parquet_type_promotion_staging)parquet_type_promotion_staging.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: parquet_type_promotion.m2 SIMPLE [(parquet_type_promotion_staging)parquet_type_promotion_staging.FieldSchema(name:m2, type:map, comment:null), ] POSTHOOK: Lineage: parquet_type_promotion.st1 SIMPLE [(parquet_type_promotion_staging)parquet_type_promotion_staging.FieldSchema(name:st1, type:struct, comment:null), ] PREHOOK: query: SELECT * FROM parquet_type_promotion PREHOOK: type: QUERY @@ -122,17 +145,22 @@ POSTHOOK: query: SELECT * FROM parquet_type_promotion POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_type_promotion #### A masked pattern was here #### -100 5643 0.3 0.7 {"k1":11} [7,17,22] {"c1":10,"c2":20} {"k11":4.0} [2.3,3.0,5.5] {"c1":5.7,"c2":4.8} -200 5643 0.4 0.8 {"k2":14} [8,17,24] {"c1":20,"c2":20} {"v11":5.0} [3.3,3.1,5.6] {"c1":5.8,"c2":4.7} -300 7643 0.4 0.9 {"k3":12} [9,17,25] {"c1":30,"c2":60} {"b11":6.0} [4.3,3.2,5.7] {"c1":5.9,"c2":4.6} -400 8643 0.4 0.5 {"k4":15} [7,18,27] {"c1":50,"c2":70} {"d11":8.0} [6.3,3.3,5.8] {"c1":5.0,"c2":4.5} +100 150 155 5643 6666 0.3 0.7 {"k1":11} {"k1":71} [7,17,22] [9,11,13] {"c1":10,"c2":20} {"k11":4.0} [2.3,3.0,5.5] {"c1":5.7,"c2":4.8} +200 250 255 5643 7777 0.4 0.8 {"k2":14} {"k2":13} [8,17,24] [15,17,19] {"c1":20,"c2":20} {"v11":5.0} [3.3,3.1,5.6] {"c1":5.8,"c2":4.7} +300 350 355 7643 8888 0.4 0.9 {"k3":12} {"k3":19} [9,17,25] [21,23,25] {"c1":30,"c2":60} {"b11":6.0} [4.3,3.2,5.7] {"c1":5.9,"c2":4.6} +400 450 455 8643 9999 0.4 0.5 {"k4":15} {"k4":23} [7,18,27] [27,29,31] {"c1":50,"c2":70} {"d11":8.0} [6.3,3.3,5.8] {"c1":5.0,"c2":4.5} PREHOOK: query: ALTER TABLE parquet_type_promotion REPLACE COLUMNS( cint bigint, - clong bigint, + cint2 float, + cint3 double, + clong float, + clong2 double, cfloat double, cdouble double, m1 map, + m2 map, l1 array, + l2 array, st1 struct, fm1 map, fl1 array, @@ -143,11 +171,16 @@ PREHOOK: Input: default@parquet_type_promotion PREHOOK: Output: default@parquet_type_promotion POSTHOOK: query: ALTER TABLE parquet_type_promotion REPLACE COLUMNS( cint bigint, - clong bigint, + cint2 float, + cint3 double, + clong float, + clong2 double, cfloat double, cdouble double, m1 map, + m2 map, l1 array, + l2 array, st1 struct, fm1 map, fl1 array, @@ -164,10 +197,10 @@ POSTHOOK: query: SELECT * FROM parquet_type_promotion POSTHOOK: type: QUERY POSTHOOK: Input: default@parquet_type_promotion #### A masked pattern was here #### -100 5643 0.30000001192092896 0.7 {"k1":11} [7,17,22] {"c1":10,"c2":20} {"k11":4.0} [2.299999952316284,3.0,5.5] {"c1":5.699999809265137,"c2":4.8} -200 5643 0.4000000059604645 0.8 {"k2":14} [8,17,24] {"c1":20,"c2":20} {"v11":5.0} [3.299999952316284,3.0999999046325684,5.599999904632568] {"c1":5.800000190734863,"c2":4.7} -300 7643 0.4000000059604645 0.9 {"k3":12} [9,17,25] {"c1":30,"c2":60} {"b11":6.0} [4.300000190734863,3.200000047683716,5.699999809265137] {"c1":5.900000095367432,"c2":4.6} -400 8643 0.4000000059604645 0.5 {"k4":15} [7,18,27] {"c1":50,"c2":70} {"d11":8.0} [6.300000190734863,3.299999952316284,5.800000190734863] {"c1":5.0,"c2":4.5} +100 150.0 155.0 5643.0 6666.0 0.30000001192092896 0.7 {"k1":11} {"k1":71.0} [7,17,22] [9.0,11.0,13.0] {"c1":10,"c2":20} {"k11":4.0} [2.299999952316284,3.0,5.5] {"c1":5.699999809265137,"c2":4.8} +200 250.0 255.0 5643.0 7777.0 0.4000000059604645 0.8 {"k2":14} {"k2":13.0} [8,17,24] [15.0,17.0,19.0] {"c1":20,"c2":20} {"v11":5.0} [3.299999952316284,3.0999999046325684,5.599999904632568] {"c1":5.800000190734863,"c2":4.7} +300 350.0 355.0 7643.0 8888.0 0.4000000059604645 0.9 {"k3":12} {"k3":19.0} [9,17,25] [21.0,23.0,25.0] {"c1":30,"c2":60} {"b11":6.0} [4.300000190734863,3.200000047683716,5.699999809265137] {"c1":5.900000095367432,"c2":4.6} +400 450.0 455.0 8643.0 9999.0 0.4000000059604645 0.5 {"k4":15} {"k4":23.0} [7,18,27] [27.0,29.0,31.0] {"c1":50,"c2":70} {"d11":8.0} [6.300000190734863,3.299999952316284,5.800000190734863] {"c1":5.0,"c2":4.5} PREHOOK: query: -- This test covers the case where array> data -- can be retrieved useing map. -- This also test if there are more than 2 fields in array_of_struct