diff --git a/data/files/parquet_types.txt b/data/files/parquet_types.txt index 27ee4fb..c712b7c 100644 --- a/data/files/parquet_types.txt +++ b/data/files/parquet_types.txt @@ -1,22 +1,22 @@ -100|1|1|1.0|0.0|abc|2011-01-01 01:01:01.111111111|a |a |B4F3CAFDBEDD|k1:v1|101,200|10,abc|2011-01-01 -101|2|2|1.1|0.3|def|2012-02-02 02:02:02.222222222|ab |ab |68692CCAC0BDE7|k2:v2|102,200|10,def|2012-02-02 -102|3|3|1.2|0.6|ghi|2013-03-03 03:03:03.333333333|abc|abc|B4F3CAFDBEDD|k3:v3|103,200|10,ghi|2013-03-03 -103|1|4|1.3|0.9|jkl|2014-04-04 04:04:04.444444444|abcd|abcd|68692CCAC0BDE7|k4:v4|104,200|10,jkl|2014-04-04 -104|2|5|1.4|1.2|mno|2015-05-05 05:05:05.555555555|abcde|abcde|B4F3CAFDBEDD|k5:v5|105,200|10,mno|2015-05-05 -105|3|1|1.0|1.5|pqr|2016-06-06 06:06:06.666666666|abcdef|abcdef|68692CCAC0BDE7|k6:v6|106,200|10,pqr|2016-06-06 -106|1|2|1.1|1.8|stu|2017-07-07 07:07:07.777777777|abcdefg|abcdefg|B4F3CAFDBEDD|k7:v7|107,200|10,stu|2017-07-07 -107|2|3|1.2|2.1|vwx|2018-08-08 08:08:08.888888888|bcdefg|abcdefgh|68692CCAC0BDE7|k8:v8|108,200|10,vwx|2018-08-08 -108|3|4|1.3|2.4|yza|2019-09-09 09:09:09.999999999|cdefg|B4F3CAFDBEDD|68656C6C6F|k9:v9|109,200|10,yza|2019-09-09 -109|1|5|1.4|2.7|bcd|2020-10-10 10:10:10.101010101|klmno|abcdedef|68692CCAC0BDE7|k10:v10|110,200|10,bcd|2020-10-10 -110|2|1|1.0|3.0|efg|2021-11-11 11:11:11.111111111|pqrst|abcdede|B4F3CAFDBEDD|k11:v11|111,200|10,efg|2021-11-11 -111|3|2|1.1|3.3|hij|2022-12-12 12:12:12.121212121|nopqr|abcded|68692CCAC0BDE7|k12:v12|112,200|10,hij|2022-12-12 -112|1|3|1.2|3.6|klm|2023-01-02 13:13:13.131313131|opqrs|abcdd|B4F3CAFDBEDD|k13:v13|113,200|10,klm|2023-01-02 -113|2|4|1.3|3.9|nop|2024-02-02 14:14:14.141414141|pqrst|abc|68692CCAC0BDE7|k14:v14|114,200|10,nop|2024-02-02 -114|3|5|1.4|4.2|qrs|2025-03-03 15:15:15.151515151|qrstu|b|B4F3CAFDBEDD|k15:v15|115,200|10,qrs|2025-03-03 -115|1|1|1.0|4.5|qrs|2026-04-04 16:16:16.161616161|rstuv|abcded|68692CCAC0BDE7|k16:v16|116,200|10,qrs|2026-04-04 -116|2|2|1.1|4.8|wxy|2027-05-05 17:17:17.171717171|stuvw|abcded|B4F3CAFDBEDD|k17:v17|117,200|10,wxy|2027-05-05 -117|3|3|1.2|5.1|zab|2028-06-06 18:18:18.181818181|tuvwx|abcded|68692CCAC0BDE7|k18:v18|118,200|10,zab|2028-06-06 -118|1|4|1.3|5.4|cde|2029-07-07 19:19:19.191919191|uvwzy|abcdede|B4F3CAFDBEDD|k19:v19|119,200|10,cde|2029-07-07 -119|2|5|1.4|5.7|fgh|2030-08-08 20:20:20.202020202|vwxyz|abcdede|68692CCAC0BDE7|k20:v20|120,200|10,fgh|2030-08-08 -120|3|1|1.0|6.0|ijk|2031-09-09 21:21:21.212121212|wxyza|abcde|B4F3CAFDBEDD|k21:v21|121,200|10,ijk|2031-09-09 -121|1|2|1.1|6.3|lmn|2032-10-10 22:22:22.222222222|bcdef|abcde||k22:v22|122,200|10,lmn|2032-10-10 \ No newline at end of file +100|1|1|1.0|0.0|abc|2011-01-01 01:01:01.111111111|a |a |B4F3CAFDBEDD|k1:v1|101,200|10,abc|2011-01-01|48.88 +101|2|2|1.1|0.3|def|2012-02-02 02:02:02.222222222|ab |ab |68692CCAC0BDE7|k2:v2|102,200|10,def|2012-02-02|8.72 +102|3|3|1.2|0.6|ghi|2013-03-03 03:03:03.333333333|abc|abc|B4F3CAFDBEDD|k3:v3|103,200|10,ghi|2013-03-03|90.21 +103|1|4|1.3|0.9|jkl|2014-04-04 04:04:04.444444444|abcd|abcd|68692CCAC0BDE7|k4:v4|104,200|10,jkl|2014-04-04|3.89 +104|2|5|1.4|1.2|mno|2015-05-05 05:05:05.555555555|abcde|abcde|B4F3CAFDBEDD|k5:v5|105,200|10,mno|2015-05-05|56.23 +105|3|1|1.0|1.5|pqr|2016-06-06 06:06:06.666666666|abcdef|abcdef|68692CCAC0BDE7|k6:v6|106,200|10,pqr|2016-06-06|90.21 +106|1|2|1.1|1.8|stu|2017-07-07 07:07:07.777777777|abcdefg|abcdefg|B4F3CAFDBEDD|k7:v7|107,200|10,stu|2017-07-07|6.09 +107|2|3|1.2|2.1|vwx|2018-08-08 08:08:08.888888888|bcdefg|abcdefgh|68692CCAC0BDE7|k8:v8|108,200|10,vwx|2018-08-08|9.44 +108|3|4|1.3|2.4|yza|2019-09-09 09:09:09.999999999|cdefg|B4F3CAFDBEDD|68656C6C6F|k9:v9|109,200|10,yza|2019-09-09|77.54 +109|1|5|1.4|2.7|bcd|2020-10-10 10:10:10.101010101|klmno|abcdedef|68692CCAC0BDE7|k10:v10|110,200|10,bcd|2020-10-10|25.42 +110|2|1|1.0|3.0|efg|2021-11-11 11:11:11.111111111|pqrst|abcdede|B4F3CAFDBEDD|k11:v11|111,200|10,efg|2021-11-11|60.12 +111|3|2|1.1|3.3|hij|2022-12-12 12:12:12.121212121|nopqr|abcded|68692CCAC0BDE7|k12:v12|112,200|10,hij|2022-12-12|49.56 +112|1|3|1.2|3.6|klm|2023-01-02 13:13:13.131313131|opqrs|abcdd|B4F3CAFDBEDD|k13:v13|113,200|10,klm|2023-01-02|80.76 +113|2|4|1.3|3.9|nop|2024-02-02 14:14:14.141414141|pqrst|abc|68692CCAC0BDE7|k14:v14|114,200|10,nop|2024-02-02|23.23 +114|3|5|1.4|4.2|qrs|2025-03-03 15:15:15.151515151|qrstu|b|B4F3CAFDBEDD|k15:v15|115,200|10,qrs|2025-03-03|1.01 +115|1|1|1.0|4.5|qrs|2026-04-04 16:16:16.161616161|rstuv|abcded|68692CCAC0BDE7|k16:v16|116,200|10,qrs|2026-04-04|5.98 +116|2|2|1.1|4.8|wxy|2027-05-05 17:17:17.171717171|stuvw|abcded|B4F3CAFDBEDD|k17:v17|117,200|10,wxy|2027-05-05|11.22 +117|3|3|1.2|5.1|zab|2028-06-06 18:18:18.181818181|tuvwx|abcded|68692CCAC0BDE7|k18:v18|118,200|10,zab|2028-06-06|9.88 +118|1|4|1.3|5.4|cde|2029-07-07 19:19:19.191919191|uvwzy|abcdede|B4F3CAFDBEDD|k19:v19|119,200|10,cde|2029-07-07|4.76 +119|2|5|1.4|5.7|fgh|2030-08-08 20:20:20.202020202|vwxyz|abcdede|68692CCAC0BDE7|k20:v20|120,200|10,fgh|2030-08-08|12.83 +120|3|1|1.0|6.0|ijk|2031-09-09 21:21:21.212121212|wxyza|abcde|B4F3CAFDBEDD|k21:v21|121,200|10,ijk|2031-09-09|73.04 +121|1|2|1.1|6.3|lmn|2032-10-10 22:22:22.222222222|bcdef|abcde||k22:v22|122,200|10,lmn|2032-10-10|90.33 \ No newline at end of file diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java index 681f803..befe2fc 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/vector/VectorColumnAssignFactory.java @@ -573,6 +573,12 @@ public void assignObjectValue(Object val, int destIndex) throws HiveException { vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.INTERVAL_DAY_TIME); } else if (writables[i] instanceof BooleanWritable) { vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.BOOLEAN); + } else if (writables[i] instanceof HiveDecimalWritable) { + vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.DECIMAL); + } else if (writables[i] instanceof HiveCharWritable) { + vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.CHAR); + } else if (writables[i] instanceof HiveVarcharWritable) { + vcas[i] = buildObjectAssign(outputBatch, i, PrimitiveCategory.VARCHAR); } else { throw new HiveException("Unimplemented vector assigner for writable type " + writables[i].getClass()); diff --git a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/VectorizedParquetInputFormat.java b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/VectorizedParquetInputFormat.java index d7edd52..3bb3eb1 100644 --- a/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/VectorizedParquetInputFormat.java +++ b/ql/src/java/org/apache/hadoop/hive/ql/io/parquet/VectorizedParquetInputFormat.java @@ -109,7 +109,9 @@ public float getProgress() throws IOException { @Override public boolean next(NullWritable key, VectorizedRowBatch outputBatch) throws IOException { - assert(outputBatch.numCols == assigners.length); + if (assigners != null) { + assert(outputBatch.numCols == assigners.length); + } outputBatch.reset(); int maxSize = outputBatch.getMaxSize(); try { diff --git a/ql/src/test/queries/clientpositive/vectorized_parquet_types.q b/ql/src/test/queries/clientpositive/vectorized_parquet_types.q new file mode 100644 index 0000000..0aaa432 --- /dev/null +++ b/ql/src/test/queries/clientpositive/vectorized_parquet_types.q @@ -0,0 +1,82 @@ +SET hive.vectorized.execution.enabled=true; + +DROP TABLE parquet_types_staging; +DROP TABLE parquet_types; + +-- init +CREATE TABLE parquet_types_staging ( + cint int, + ctinyint tinyint, + csmallint smallint, + cfloat float, + cdouble double, + cstring1 string, + t timestamp, + cchar char(5), + cvarchar varchar(10), + cbinary string, + m1 map, + l1 array, + st1 struct, + d date, + cdecimal decimal(4,2) +) ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' +MAP KEYS TERMINATED BY ':'; + +CREATE TABLE parquet_types ( + cint int, + ctinyint tinyint, + csmallint smallint, + cfloat float, + cdouble double, + cstring1 string, + t timestamp, + cchar char(5), + cvarchar varchar(10), + cbinary binary, + cdecimal decimal(4,2) +) STORED AS PARQUET; + +LOAD DATA LOCAL INPATH '../../data/files/parquet_types.txt' OVERWRITE INTO TABLE parquet_types_staging; + +INSERT OVERWRITE TABLE parquet_types +SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, +unhex(cbinary), cdecimal FROM parquet_types_staging; + +-- select +explain +SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, +hex(cbinary), cdecimal FROM parquet_types; + +SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, +hex(cbinary), cdecimal FROM parquet_types; + +explain +SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar), cdecimal, SIGN(cdecimal) FROM parquet_types; + +SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar), cdecimal, SIGN(cdecimal) FROM parquet_types; + +explain +SELECT ctinyint, + MAX(cint), + MIN(csmallint), + COUNT(cstring1), + AVG(cfloat), + STDDEV_POP(cdouble), + MAX(cdecimal) +FROM parquet_types +GROUP BY ctinyint +ORDER BY ctinyint; + +SELECT ctinyint, + MAX(cint), + MIN(csmallint), + COUNT(cstring1), + AVG(cfloat), + STDDEV_POP(cdouble), + MAX(cdecimal) +FROM parquet_types +GROUP BY ctinyint +ORDER BY ctinyint; \ No newline at end of file diff --git a/ql/src/test/results/clientpositive/vectorized_parquet_types.q.out b/ql/src/test/results/clientpositive/vectorized_parquet_types.q.out new file mode 100644 index 0000000..8d1bddc --- /dev/null +++ b/ql/src/test/results/clientpositive/vectorized_parquet_types.q.out @@ -0,0 +1,378 @@ +PREHOOK: query: DROP TABLE parquet_types_staging +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE parquet_types_staging +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE parquet_types +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE parquet_types +POSTHOOK: type: DROPTABLE +PREHOOK: query: -- init +CREATE TABLE parquet_types_staging ( + cint int, + ctinyint tinyint, + csmallint smallint, + cfloat float, + cdouble double, + cstring1 string, + t timestamp, + cchar char(5), + cvarchar varchar(10), + cbinary string, + m1 map, + l1 array, + st1 struct, + d date, + cdecimal decimal(4,2) +) ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' +MAP KEYS TERMINATED BY ':' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_types_staging +POSTHOOK: query: -- init +CREATE TABLE parquet_types_staging ( + cint int, + ctinyint tinyint, + csmallint smallint, + cfloat float, + cdouble double, + cstring1 string, + t timestamp, + cchar char(5), + cvarchar varchar(10), + cbinary string, + m1 map, + l1 array, + st1 struct, + d date, + cdecimal decimal(4,2) +) ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' +MAP KEYS TERMINATED BY ':' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_types_staging +PREHOOK: query: CREATE TABLE parquet_types ( + cint int, + ctinyint tinyint, + csmallint smallint, + cfloat float, + cdouble double, + cstring1 string, + t timestamp, + cchar char(5), + cvarchar varchar(10), + cbinary binary, + cdecimal decimal(4,2) +) STORED AS PARQUET +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_types +POSTHOOK: query: CREATE TABLE parquet_types ( + cint int, + ctinyint tinyint, + csmallint smallint, + cfloat float, + cdouble double, + cstring1 string, + t timestamp, + cchar char(5), + cvarchar varchar(10), + cbinary binary, + cdecimal decimal(4,2) +) STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_types +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_types.txt' OVERWRITE INTO TABLE parquet_types_staging +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_types_staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_types.txt' OVERWRITE INTO TABLE parquet_types_staging +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_types_staging +PREHOOK: query: INSERT OVERWRITE TABLE parquet_types +SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, +unhex(cbinary), cdecimal FROM parquet_types_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_types_staging +PREHOOK: Output: default@parquet_types +POSTHOOK: query: INSERT OVERWRITE TABLE parquet_types +SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, +unhex(cbinary), cdecimal FROM parquet_types_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_types_staging +POSTHOOK: Output: default@parquet_types +POSTHOOK: Lineage: parquet_types.cbinary EXPRESSION [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cbinary, type:string, comment:null), ] +POSTHOOK: Lineage: parquet_types.cchar SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cchar, type:char(5), comment:null), ] +POSTHOOK: Lineage: parquet_types.cdecimal SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cdecimal, type:decimal(4,2), comment:null), ] +POSTHOOK: Lineage: parquet_types.cdouble SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: parquet_types.cfloat SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: parquet_types.cint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: parquet_types.csmallint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:csmallint, type:smallint, comment:null), ] +POSTHOOK: Lineage: parquet_types.cstring1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cstring1, type:string, comment:null), ] +POSTHOOK: Lineage: parquet_types.ctinyint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] +POSTHOOK: Lineage: parquet_types.cvarchar SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cvarchar, type:varchar(10), comment:null), ] +POSTHOOK: Lineage: parquet_types.t SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:t, type:timestamp, comment:null), ] +PREHOOK: query: -- select +explain +SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, +hex(cbinary), cdecimal FROM parquet_types +PREHOOK: type: QUERY +POSTHOOK: query: -- select +explain +SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, +hex(cbinary), cdecimal FROM parquet_types +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: parquet_types + Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cint (type: int), ctinyint (type: tinyint), csmallint (type: smallint), cfloat (type: float), cdouble (type: double), cstring1 (type: string), t (type: timestamp), cchar (type: char(5)), cvarchar (type: varchar(10)), hex(cbinary) (type: string), cdecimal (type: decimal(4,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6, _col7, _col8, _col9, _col10 + Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, +hex(cbinary), cdecimal FROM parquet_types +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_types +#### A masked pattern was here #### +POSTHOOK: query: SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, +hex(cbinary), cdecimal FROM parquet_types +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_types +#### A masked pattern was here #### +100 1 1 1.0 0.0 abc 2011-01-01 01:01:01.111111111 a a B4F3CAFDBEDD 48.88 +101 2 2 1.1 0.3 def 2012-02-02 02:02:02.222222222 ab ab 68692CCAC0BDE7 8.72 +102 3 3 1.2 0.6 ghi 2013-03-03 03:03:03.333333333 abc abc B4F3CAFDBEDD 90.21 +103 1 4 1.3 0.9 jkl 2014-04-04 04:04:04.444444444 abcd abcd 68692CCAC0BDE7 3.89 +104 2 5 1.4 1.2 mno 2015-05-05 05:05:05.555555555 abcde abcde B4F3CAFDBEDD 56.23 +105 3 1 1.0 1.5 pqr 2016-06-06 06:06:06.666666666 abcde abcdef 68692CCAC0BDE7 90.21 +106 1 2 1.1 1.8 stu 2017-07-07 07:07:07.777777777 abcde abcdefg B4F3CAFDBEDD 6.09 +107 2 3 1.2 2.1 vwx 2018-08-08 08:08:08.888888888 bcdef abcdefgh 68692CCAC0BDE7 9.44 +108 3 4 1.3 2.4 yza 2019-09-09 09:09:09.999999999 cdefg B4F3CAFDBE 68656C6C6F 77.54 +109 1 5 1.4 2.7 bcd 2020-10-10 10:10:10.101010101 klmno abcdedef 68692CCAC0BDE7 25.42 +110 2 1 1.0 3.0 efg 2021-11-11 11:11:11.111111111 pqrst abcdede B4F3CAFDBEDD 60.12 +111 3 2 1.1 3.3 hij 2022-12-12 12:12:12.121212121 nopqr abcded 68692CCAC0BDE7 49.56 +112 1 3 1.2 3.6 klm 2023-01-02 13:13:13.131313131 opqrs abcdd B4F3CAFDBEDD 80.76 +113 2 4 1.3 3.9 nop 2024-02-02 14:14:14.141414141 pqrst abc 68692CCAC0BDE7 23.23 +114 3 5 1.4 4.2 qrs 2025-03-03 15:15:15.151515151 qrstu b B4F3CAFDBEDD 1.01 +115 1 1 1.0 4.5 qrs 2026-04-04 16:16:16.161616161 rstuv abcded 68692CCAC0BDE7 5.98 +116 2 2 1.1 4.8 wxy 2027-05-05 17:17:17.171717171 stuvw abcded B4F3CAFDBEDD 11.22 +117 3 3 1.2 5.1 zab 2028-06-06 18:18:18.181818181 tuvwx abcded 68692CCAC0BDE7 9.88 +118 1 4 1.3 5.4 cde 2029-07-07 19:19:19.191919191 uvwzy abcdede B4F3CAFDBEDD 4.76 +119 2 5 1.4 5.7 fgh 2030-08-08 20:20:20.202020202 vwxyz abcdede 68692CCAC0BDE7 12.83 +120 3 1 1.0 6.0 ijk 2031-09-09 21:21:21.212121212 wxyza abcde B4F3CAFDBEDD 73.04 +121 1 2 1.1 6.3 lmn 2032-10-10 22:22:22.222222222 bcdef abcde 90.33 +PREHOOK: query: explain +SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar), cdecimal, SIGN(cdecimal) FROM parquet_types +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar), cdecimal, SIGN(cdecimal) FROM parquet_types +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: parquet_types + Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cchar (type: char(5)), length(cchar) (type: int), cvarchar (type: varchar(10)), length(cvarchar) (type: int), cdecimal (type: decimal(4,2)), sign(cdecimal) (type: int) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar), cdecimal, SIGN(cdecimal) FROM parquet_types +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_types +#### A masked pattern was here #### +POSTHOOK: query: SELECT cchar, LENGTH(cchar), cvarchar, LENGTH(cvarchar), cdecimal, SIGN(cdecimal) FROM parquet_types +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_types +#### A masked pattern was here #### +a 1 a 3 48.88 1 +ab 2 ab 3 8.72 1 +abc 3 abc 3 90.21 1 +abcd 4 abcd 4 3.89 1 +abcde 5 abcde 5 56.23 1 +abcde 5 abcdef 6 90.21 1 +abcde 5 abcdefg 7 6.09 1 +bcdef 5 abcdefgh 8 9.44 1 +cdefg 5 B4F3CAFDBE 10 77.54 1 +klmno 5 abcdedef 8 25.42 1 +pqrst 5 abcdede 7 60.12 1 +nopqr 5 abcded 6 49.56 1 +opqrs 5 abcdd 5 80.76 1 +pqrst 5 abc 3 23.23 1 +qrstu 5 b 1 1.01 1 +rstuv 5 abcded 6 5.98 1 +stuvw 5 abcded 6 11.22 1 +tuvwx 5 abcded 6 9.88 1 +uvwzy 5 abcdede 7 4.76 1 +vwxyz 5 abcdede 7 12.83 1 +wxyza 5 abcde 5 73.04 1 +bcdef 5 abcde 5 90.33 1 +PREHOOK: query: explain +SELECT ctinyint, + MAX(cint), + MIN(csmallint), + COUNT(cstring1), + AVG(cfloat), + STDDEV_POP(cdouble), + MAX(cdecimal) +FROM parquet_types +GROUP BY ctinyint +ORDER BY ctinyint +PREHOOK: type: QUERY +POSTHOOK: query: explain +SELECT ctinyint, + MAX(cint), + MIN(csmallint), + COUNT(cstring1), + AVG(cfloat), + STDDEV_POP(cdouble), + MAX(cdecimal) +FROM parquet_types +GROUP BY ctinyint +ORDER BY ctinyint +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: parquet_types + Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), cint (type: int), csmallint (type: smallint), cstring1 (type: string), cfloat (type: float), cdouble (type: double), cdecimal (type: decimal(4,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(_col1), min(_col2), count(_col3), avg(_col4), stddev_pop(_col5), max(_col6) + keys: _col0 (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct), _col6 (type: decimal(4,2)) + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4), max(VALUE._col5) + keys: KEY._col0 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 11 Data size: 121 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Statistics: Num rows: 11 Data size: 121 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: double), _col6 (type: decimal(4,2)) + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: int), VALUE._col1 (type: smallint), VALUE._col2 (type: bigint), VALUE._col3 (type: double), VALUE._col4 (type: double), VALUE._col5 (type: decimal(4,2)) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, _col6 + Statistics: Num rows: 11 Data size: 121 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 121 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.TextInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT ctinyint, + MAX(cint), + MIN(csmallint), + COUNT(cstring1), + AVG(cfloat), + STDDEV_POP(cdouble), + MAX(cdecimal) +FROM parquet_types +GROUP BY ctinyint +ORDER BY ctinyint +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_types +#### A masked pattern was here #### +POSTHOOK: query: SELECT ctinyint, + MAX(cint), + MIN(csmallint), + COUNT(cstring1), + AVG(cfloat), + STDDEV_POP(cdouble), + MAX(cdecimal) +FROM parquet_types +GROUP BY ctinyint +ORDER BY ctinyint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_types +#### A masked pattern was here #### +1 121 1 8 1.1749999970197678 2.0621590627301285 90.33 +2 119 1 7 1.2142857142857142 1.8 60.12 +3 120 1 7 1.171428578240531 1.7999999999999996 90.21