diff --git data/files/parquet_non_dictionary_types.txt data/files/parquet_non_dictionary_types.txt new file mode 100644 index 0000000..92561f8 --- /dev/null +++ data/files/parquet_non_dictionary_types.txt @@ -0,0 +1,300 @@ +1000|-128|0|0|0.0|0.3||1940-01-01 01:01:01.111111111||||:|1000,1001|1000,b|1940-01-01 +1001|-127|1|3000|0.3|1.3|b|1941-02-02 01:01:01.111111111|b|b|b|b:b|1001,1002|1001,c|1941-02-02 +1002|-126|2|6000|0.6|2.3|c|1942-03-03 01:01:01.111111111|c|c|c|c:c|1002,1003|1002,d|1942-03-03 +1003|-125|3|9000|0.9|3.3|d|1943-04-04 01:01:01.111111111|d|d|d|d:d|1003,1004|1003,e|1943-04-04 +1004|-124|4|12000|1.2|4.3|e|1944-05-05 01:01:01.111111111|e|e|e|e:e|1004,1005|1004,f|1944-05-05 +1005|-123|5|15000|1.5|5.3|f|1945-06-06 01:01:01.111111111|f|f|f|f:f|1005,1006|1005,g|1945-06-06 +1006|-122|6|18000|1.8|6.3|g|1946-07-07 01:01:01.111111111|g|g|g|g:g|1006,1007|1006,h|1946-07-07 +1007|-121|7|21000|2.1|7.3|h|1947-08-08 01:01:01.111111111|h|h|h|h:h|1007,1008|1007,i|1947-08-08 +1008|-120|8|24000|2.4|8.3|i|1948-09-09 01:01:01.111111111|i|i|i|i:i|1008,1009|1008,j|1948-09-09 +1009|-119|9|27000|2.7|9.3|j|1949-10-10 01:01:01.111111111|j|j|j|j:j|1009,1010|1009,k|1949-10-10 +1010|-118|10|30000|3.0|10.3|k|1950-11-11 01:01:01.111111111|k|k|k|k:k|1010,1011|1010,l|1950-11-11 +1011|-117|11|33000|3.3|11.3|l|1951-12-12 01:01:01.111111111|l|l|l|l:l|1011,1012|1011,m|1951-12-12 +1012|-116|12|36000|3.6|12.3|m|1952-01-013 01:01:01.111111111|m|m|m|m:m|1012,1013|1012,n|1952-01-013 +1013|-115|13|39000|3.9|13.3|n|1953-02-014 01:01:01.111111111|n|n|n|n:n|1013,1014|1013,o|1953-02-014 +1014|-114|14|42000|4.2|14.3|o|1954-03-015 01:01:01.111111111|o|o|o|o:o|1014,1015|1014,p|1954-03-015 +1015|-113|15|45000|4.5|15.3|p|1955-04-016 01:01:01.111111111|p|p|p|p:p|1015,1016|1015,q|1955-04-016 +1016|-112|16|48000|4.8|16.3|q|1956-05-017 01:01:01.111111111|q|q|q|q:q|1016,1017|1016,r|1956-05-017 +1017|-111|17|51000|5.1|17.3|r|1957-06-018 01:01:01.111111111|r|r|r|r:r|1017,1018|1017,s|1957-06-018 +1018|-110|18|54000|5.4|18.3|s|1958-07-019 01:01:01.111111111|s|s|s|s:s|1018,1019|1018,t|1958-07-019 +1019|-109|19|57000|5.7|19.3|t|1959-08-020 01:01:01.111111111|t|t|t|t:t|1019,1020|1019,u|1959-08-020 +1020|-108|20|60000|6.0|20.3|u|1960-09-021 01:01:01.111111111|u|u|u|u:u|1020,1021|1020,v|1960-09-021 +1021|-107|21|63000|6.3|21.3|v|1961-10-22 01:01:01.111111111|v|v|v|v:v|1021,1022|1021,w|1961-10-22 +1022|-106|22|66000|6.6|22.3|w|1962-11-23 01:01:01.111111111|w|w|w|w:w|1022,1023|1022,x|1962-11-23 +1023|-105|23|69000|6.9|23.3|x|1963-12-24 01:01:01.111111111|x|x|x|x:x|1023,1024|1023,y|1963-12-24 +1024|-104|24|72000|7.2|24.3|y|1964-01-025 01:01:01.111111111|y|y|y|y:y|1024,1025|1024,z|1964-01-025 +1025|-103|25|75000|7.5|25.3|z|1965-02-026 01:01:01.111111111|z|z|z|z:z|1025,1026|1025,ba|1965-02-026 +1026|-102|26|78000|7.8|26.3|ba|1966-03-027 01:01:01.111111111|ba|ba|ba|ba:ba|1026,1027|1026,bb|1966-03-027 +1027|-101|27|81000|8.1|27.3|bb|1967-04-01 01:01:01.111111111|bb|bb|bb|bb:bb|1027,1028|1027,bc|1967-04-01 +1028|-100|28|84000|8.4|28.3|bc|1968-05-02 01:01:01.111111111|bc|bc|bc|bc:bc|1028,1029|1028,bd|1968-05-02 +1029|-99|29|87000|8.7|29.3|bd|1969-06-03 01:01:01.111111111|bd|bd|bd|bd:bd|1029,1030|1029,be|1969-06-03 +1030|-98|30|90000|9.0|30.3|be|1970-07-04 01:01:01.111111111|be|be|be|be:be|1030,1031|1030,bf|1970-07-04 +1031|-97|31|93000|9.3|31.3|bf|1971-08-05 01:01:01.111111111|bf|bf|bf|bf:bf|1031,1032|1031,bg|1971-08-05 +1032|-96|32|96000|9.6|32.3|bg|1972-09-06 01:01:01.111111111|bg|bg|bg|bg:bg|1032,1033|1032,bh|1972-09-06 +1033|-95|33|99000|9.9|33.3|bh|1973-10-7 01:01:01.111111111|bh|bh|bh|bh:bh|1033,1034|1033,bi|1973-10-7 +1034|-94|34|102000|10.2|34.3|bi|1974-11-8 01:01:01.111111111|bi|bi|bi|bi:bi|1034,1035|1034,bj|1974-11-8 +1035|-93|35|105000|10.5|35.3|bj|1975-12-9 01:01:01.111111111|bj|bj|bj|bj:bj|1035,1036|1035,bk|1975-12-9 +1036|-92|36|108000|10.8|36.3|bk|1976-01-010 01:01:01.111111111|bk|bk|bk|bk:bk|1036,1037|1036,bl|1976-01-010 +1037|-91|37|111000|11.1|37.3|bl|1977-02-011 01:01:01.111111111|bl|bl|bl|bl:bl|1037,1038|1037,bm|1977-02-011 +1038|-90|38|114000|11.4|38.3|bm|1978-03-012 01:01:01.111111111|bm|bm|bm|bm:bm|1038,1039|1038,bn|1978-03-012 +1039|-89|39|117000|11.7|39.3|bn|1979-04-013 01:01:01.111111111|bn|bn|bn|bn:bn|1039,1040|1039,bo|1979-04-013 +1040|-88|40|120000|12.0|40.3|bo|1980-05-014 01:01:01.111111111|bo|bo|bo|bo:bo|1040,1041|1040,bp|1980-05-014 +1041|-87|41|123000|12.3|41.3|bp|1981-06-015 01:01:01.111111111|bp|bp|bp|bp:bp|1041,1042|1041,bq|1981-06-015 +1042|-86|42|126000|12.6|42.3|bq|1982-07-016 01:01:01.111111111|bq|bq|bq|bq:bq|1042,1043|1042,br|1982-07-016 +1043|-85|43|129000|12.9|43.3|br|1983-08-017 01:01:01.111111111|br|br|br|br:br|1043,1044|1043,bs|1983-08-017 +1044|-84|44|132000|13.2|44.3|bs|1984-09-018 01:01:01.111111111|bs|bs|bs|bs:bs|1044,1045|1044,bt|1984-09-018 +1045|-83|45|135000|13.5|45.3|bt|1985-10-19 01:01:01.111111111|bt|bt|bt|bt:bt|1045,1046|1045,bu|1985-10-19 +1046|-82|46|138000|13.8|46.3|bu|1986-11-20 01:01:01.111111111|bu|bu|bu|bu:bu|1046,1047|1046,bv|1986-11-20 +1047|-81|47|141000|14.1|47.3|bv|1987-12-21 01:01:01.111111111|bv|bv|bv|bv:bv|1047,1048|1047,bw|1987-12-21 +1048|-80|48|144000|14.4|48.3|bw|1988-01-022 01:01:01.111111111|bw|bw|bw|bw:bw|1048,1049|1048,bx|1988-01-022 +1049|-79|49|147000|14.7|49.3|bx|1989-02-023 01:01:01.111111111|bx|bx|bx|bx:bx|1049,1050|1049,by|1989-02-023 +1050|-78|50|150000|15.0|50.3|by|1990-03-024 01:01:01.111111111|by|by|by|by:by|1050,1051|1050,bz|1990-03-024 +1051|-77|51|153000|15.3|51.3|bz|1991-04-025 01:01:01.111111111|bz|bz|bz|bz:bz|1051,1052|1051,ca|1991-04-025 +1052|-76|52|156000|15.6|52.3|ca|1992-05-026 01:01:01.111111111|ca|ca|ca|ca:ca|1052,1053|1052,cb|1992-05-026 +1053|-75|53|159000|15.9|53.3|cb|1993-06-027 01:01:01.111111111|cb|cb|cb|cb:cb|1053,1054|1053,cc|1993-06-027 +1054|-74|54|162000|16.2|54.3|cc|1994-07-01 01:01:01.111111111|cc|cc|cc|cc:cc|1054,1055|1054,cd|1994-07-01 +1055|-73|55|165000|16.5|55.3|cd|1995-08-02 01:01:01.111111111|cd|cd|cd|cd:cd|1055,1056|1055,ce|1995-08-02 +1056|-72|56|168000|16.8|56.3|ce|1996-09-03 01:01:01.111111111|ce|ce|ce|ce:ce|1056,1057|1056,cf|1996-09-03 +1057|-71|57|171000|17.1|57.3|cf|1997-10-4 01:01:01.111111111|cf|cf|cf|cf:cf|1057,1058|1057,cg|1997-10-4 +1058|-70|58|174000|17.4|58.3|cg|1998-11-5 01:01:01.111111111|cg|cg|cg|cg:cg|1058,1059|1058,ch|1998-11-5 +1059|-69|59|177000|17.7|59.3|ch|1999-12-6 01:01:01.111111111|ch|ch|ch|ch:ch|1059,1060|1059,ci|1999-12-6 +1060|-68|60|180000|18.0|60.3|ci|2000-01-07 01:01:01.111111111|ci|ci|ci|ci:ci|1060,1061|1060,cj|2000-01-07 +1061|-67|61|183000|18.3|61.3|cj|2001-02-08 01:01:01.111111111|cj|cj|cj|cj:cj|1061,1062|1061,ck|2001-02-08 +1062|-66|62|186000|18.6|62.3|ck|2002-03-09 01:01:01.111111111|ck|ck|ck|ck:ck|1062,1063|1062,cl|2002-03-09 +1063|-65|63|189000|18.9|63.3|cl|2003-04-010 01:01:01.111111111|cl|cl|cl|cl:cl|1063,1064|1063,cm|2003-04-010 +1064|-64|64|192000|19.2|64.3|cm|2004-05-011 01:01:01.111111111|cm|cm|cm|cm:cm|1064,1065|1064,cn|2004-05-011 +1065|-63|65|195000|19.5|65.3|cn|2005-06-012 01:01:01.111111111|cn|cn|cn|cn:cn|1065,1066|1065,co|2005-06-012 +1066|-62|66|198000|19.8|66.3|co|2006-07-013 01:01:01.111111111|co|co|co|co:co|1066,1067|1066,cp|2006-07-013 +1067|-61|67|201000|20.1|67.3|cp|2007-08-014 01:01:01.111111111|cp|cp|cp|cp:cp|1067,1068|1067,cq|2007-08-014 +1068|-60|68|204000|20.4|68.3|cq|2008-09-015 01:01:01.111111111|cq|cq|cq|cq:cq|1068,1069|1068,cr|2008-09-015 +1069|-59|69|207000|20.7|69.3|cr|2009-10-16 01:01:01.111111111|cr|cr|cr|cr:cr|1069,1070|1069,cs|2009-10-16 +1070|-58|70|210000|21.0|70.3|cs|2010-11-17 01:01:01.111111111|cs|cs|cs|cs:cs|1070,1071|1070,ct|2010-11-17 +1071|-57|71|213000|21.3|71.3|ct|2011-12-18 01:01:01.111111111|ct|ct|ct|ct:ct|1071,1072|1071,cu|2011-12-18 +1072|-56|72|216000|21.6|72.3|cu|2012-01-019 01:01:01.111111111|cu|cu|cu|cu:cu|1072,1073|1072,cv|2012-01-019 +1073|-55|73|219000|21.9|73.3|cv|2013-02-020 01:01:01.111111111|cv|cv|cv|cv:cv|1073,1074|1073,cw|2013-02-020 +1074|-54|74|222000|22.2|74.3|cw|2014-03-021 01:01:01.111111111|cw|cw|cw|cw:cw|1074,1075|1074,cx|2014-03-021 +1075|-53|75|225000|22.5|75.3|cx|2015-04-022 01:01:01.111111111|cx|cx|cx|cx:cx|1075,1076|1075,cy|2015-04-022 +1076|-52|76|228000|22.8|76.3|cy|2016-05-023 01:01:01.111111111|cy|cy|cy|cy:cy|1076,1077|1076,cz|2016-05-023 +1077|-51|77|231000|23.1|77.3|cz|2017-06-024 01:01:01.111111111|cz|cz|cz|cz:cz|1077,1078|1077,da|2017-06-024 +1078|-50|78|234000|23.4|78.3|da|2018-07-025 01:01:01.111111111|da|da|da|da:da|1078,1079|1078,db|2018-07-025 +1079|-49|79|237000|23.7|79.3|db|2019-08-026 01:01:01.111111111|db|db|db|db:db|1079,1080|1079,dc|2019-08-026 +1080|-48|80|240000|24.0|80.3|dc|2020-09-027 01:01:01.111111111|dc|dc|dc|dc:dc|1080,1081|1080,dd|2020-09-027 +1081|-47|81|243000|24.3|81.3|dd|2021-10-1 01:01:01.111111111|dd|dd|dd|dd:dd|1081,1082|1081,de|2021-10-1 +1082|-46|82|246000|24.6|82.3|de|2022-11-2 01:01:01.111111111|de|de|de|de:de|1082,1083|1082,df|2022-11-2 +1083|-45|83|249000|24.9|83.3|df|2023-12-3 01:01:01.111111111|df|df|df|df:df|1083,1084|1083,dg|2023-12-3 +1084|-44|84|252000|25.2|84.3|dg|2024-01-04 01:01:01.111111111|dg|dg|dg|dg:dg|1084,1085|1084,dh|2024-01-04 +1085|-43|85|255000|25.5|85.3|dh|2025-02-05 01:01:01.111111111|dh|dh|dh|dh:dh|1085,1086|1085,di|2025-02-05 +1086|-42|86|258000|25.8|86.3|di|2026-03-06 01:01:01.111111111|di|di|di|di:di|1086,1087|1086,dj|2026-03-06 +1087|-41|87|261000|26.1|87.3|dj|2027-04-07 01:01:01.111111111|dj|dj|dj|dj:dj|1087,1088|1087,dk|2027-04-07 +1088|-40|88|264000|26.4|88.3|dk|2028-05-08 01:01:01.111111111|dk|dk|dk|dk:dk|1088,1089|1088,dl|2028-05-08 +1089|-39|89|267000|26.7|89.3|dl|2029-06-09 01:01:01.111111111|dl|dl|dl|dl:dl|1089,1090|1089,dm|2029-06-09 +1090|-38|90|270000|27.0|90.3|dm|2030-07-010 01:01:01.111111111|dm|dm|dm|dm:dm|1090,1091|1090,dn|2030-07-010 +1091|-37|91|273000|27.3|91.3|dn|2031-08-011 01:01:01.111111111|dn|dn|dn|dn:dn|1091,1092|1091,do|2031-08-011 +1092|-36|92|276000|27.6|92.3|do|2032-09-012 01:01:01.111111111|do|do|do|do:do|1092,1093|1092,dp|2032-09-012 +1093|-35|93|279000|27.9|93.3|dp|2033-10-13 01:01:01.111111111|dp|dp|dp|dp:dp|1093,1094|1093,dq|2033-10-13 +1094|-34|94|282000|28.2|94.3|dq|2034-11-14 01:01:01.111111111|dq|dq|dq|dq:dq|1094,1095|1094,dr|2034-11-14 +1095|-33|95|285000|28.5|95.3|dr|2035-12-15 01:01:01.111111111|dr|dr|dr|dr:dr|1095,1096|1095,ds|2035-12-15 +1096|-32|96|288000|28.8|96.3|ds|2036-01-016 01:01:01.111111111|ds|ds|ds|ds:ds|1096,1097|1096,dt|2036-01-016 +1097|-31|97|291000|29.1|97.3|dt|2037-02-017 01:01:01.111111111|dt|dt|dt|dt:dt|1097,1098|1097,du|2037-02-017 +1098|-30|98|294000|29.4|98.3|du|2038-03-018 01:01:01.111111111|du|du|du|du:du|1098,1099|1098,dv|2038-03-018 +1099|-29|99|297000|29.7|99.3|dv|2039-04-019 01:01:01.111111111|dv|dv|dv|dv:dv|1099,1100|1099,dw|2039-04-019 +1100|-28|100|300000|30.0|100.3|dw|2040-05-020 01:01:01.111111111|dw|dw|dw|dw:dw|1100,1101|1100,dx|2040-05-020 +1101|-27|101|303000|30.3|101.3|dx|2041-06-021 01:01:01.111111111|dx|dx|dx|dx:dx|1101,1102|1101,dy|2041-06-021 +1102|-26|102|306000|30.6|102.3|dy|2042-07-022 01:01:01.111111111|dy|dy|dy|dy:dy|1102,1103|1102,dz|2042-07-022 +1103|-25|103|309000|30.9|103.3|dz|2043-08-023 01:01:01.111111111|dz|dz|dz|dz:dz|1103,1104|1103,ea|2043-08-023 +1104|-24|104|312000|31.2|104.3|ea|2044-09-024 01:01:01.111111111|ea|ea|ea|ea:ea|1104,1105|1104,eb|2044-09-024 +1105|-23|105|315000|31.5|105.3|eb|2045-10-25 01:01:01.111111111|eb|eb|eb|eb:eb|1105,1106|1105,ec|2045-10-25 +1106|-22|106|318000|31.8|106.3|ec|2046-11-26 01:01:01.111111111|ec|ec|ec|ec:ec|1106,1107|1106,ed|2046-11-26 +1107|-21|107|321000|32.1|107.3|ed|2047-12-27 01:01:01.111111111|ed|ed|ed|ed:ed|1107,1108|1107,ee|2047-12-27 +1108|-20|108|324000|32.4|108.3|ee|2048-01-01 01:01:01.111111111|ee|ee|ee|ee:ee|1108,1109|1108,ef|2048-01-01 +1109|-19|109|327000|32.7|109.3|ef|2049-02-02 01:01:01.111111111|ef|ef|ef|ef:ef|1109,1110|1109,eg|2049-02-02 +1110|-18|110|330000|33.0|110.3|eg|2050-03-03 01:01:01.111111111|eg|eg|eg|eg:eg|1110,1111|1110,eh|2050-03-03 +1111|-17|111|333000|33.3|111.3|eh|2051-04-04 01:01:01.111111111|eh|eh|eh|eh:eh|1111,1112|1111,ei|2051-04-04 +1112|-16|112|336000|33.6|112.3|ei|2052-05-05 01:01:01.111111111|ei|ei|ei|ei:ei|1112,1113|1112,ej|2052-05-05 +1113|-15|113|339000|33.9|113.3|ej|2053-06-06 01:01:01.111111111|ej|ej|ej|ej:ej|1113,1114|1113,ek|2053-06-06 +1114|-14|114|342000|34.2|114.3|ek|2054-07-07 01:01:01.111111111|ek|ek|ek|ek:ek|1114,1115|1114,el|2054-07-07 +1115|-13|115|345000|34.5|115.3|el|2055-08-08 01:01:01.111111111|el|el|el|el:el|1115,1116|1115,em|2055-08-08 +1116|-12|116|348000|34.8|116.3|em|2056-09-09 01:01:01.111111111|em|em|em|em:em|1116,1117|1116,en|2056-09-09 +1117|-11|117|351000|35.1|117.3|en|2057-10-10 01:01:01.111111111|en|en|en|en:en|1117,1118|1117,eo|2057-10-10 +1118|-10|118|354000|35.4|118.3|eo|2058-11-11 01:01:01.111111111|eo|eo|eo|eo:eo|1118,1119|1118,ep|2058-11-11 +1119|-9|119|357000|35.7|119.3|ep|2059-12-12 01:01:01.111111111|ep|ep|ep|ep:ep|1119,1120|1119,eq|2059-12-12 +1120|-8|120|360000|36.0|120.3|eq|2060-01-013 01:01:01.111111111|eq|eq|eq|eq:eq|1120,1121|1120,er|2060-01-013 +1121|-7|121|363000|36.3|121.3|er|2061-02-014 01:01:01.111111111|er|er|er|er:er|1121,1122|1121,es|2061-02-014 +1122|-6|122|366000|36.6|122.3|es|2062-03-015 01:01:01.111111111|es|es|es|es:es|1122,1123|1122,et|2062-03-015 +1123|-5|123|369000|36.9|123.3|et|2063-04-016 01:01:01.111111111|et|et|et|et:et|1123,1124|1123,eu|2063-04-016 +1124|-4|124|372000|37.2|124.3|eu|2064-05-017 01:01:01.111111111|eu|eu|eu|eu:eu|1124,1125|1124,ev|2064-05-017 +1125|-3|125|375000|37.5|125.3|ev|2065-06-018 01:01:01.111111111|ev|ev|ev|ev:ev|1125,1126|1125,ew|2065-06-018 +1126|-2|126|378000|37.8|126.3|ew|2066-07-019 01:01:01.111111111|ew|ew|ew|ew:ew|1126,1127|1126,ex|2066-07-019 +1127|-1|127|381000|38.1|127.3|ex|2067-08-020 01:01:01.111111111|ex|ex|ex|ex:ex|1127,1128|1127,ey|2067-08-020 +1128|0|128|384000|38.4|128.3|ey|2068-09-021 01:01:01.111111111|ey|ey|ey|ey:ey|1128,1129|1128,ez|2068-09-021 +1129|1|129|387000|38.7|129.3|ez|2069-10-22 01:01:01.111111111|ez|ez|ez|ez:ez|1129,1130|1129,fa|2069-10-22 +1130|2|130|390000|39.0|130.3|fa|2070-11-23 01:01:01.111111111|fa|fa|fa|fa:fa|1130,1131|1130,fb|2070-11-23 +1131|3|131|393000|39.3|131.3|fb|2071-12-24 01:01:01.111111111|fb|fb|fb|fb:fb|1131,1132|1131,fc|2071-12-24 +1132|4|132|396000|39.6|132.3|fc|2072-01-025 01:01:01.111111111|fc|fc|fc|fc:fc|1132,1133|1132,fd|2072-01-025 +1133|5|133|399000|39.9|133.3|fd|2073-02-026 01:01:01.111111111|fd|fd|fd|fd:fd|1133,1134|1133,fe|2073-02-026 +1134|6|134|402000|40.2|134.3|fe|2074-03-027 01:01:01.111111111|fe|fe|fe|fe:fe|1134,1135|1134,ff|2074-03-027 +1135|7|135|405000|40.5|135.3|ff|2075-04-01 01:01:01.111111111|ff|ff|ff|ff:ff|1135,1136|1135,fg|2075-04-01 +1136|8|136|408000|40.8|136.3|fg|2076-05-02 01:01:01.111111111|fg|fg|fg|fg:fg|1136,1137|1136,fh|2076-05-02 +1137|9|137|411000|41.1|137.3|fh|2077-06-03 01:01:01.111111111|fh|fh|fh|fh:fh|1137,1138|1137,fi|2077-06-03 +1138|10|138|414000|41.4|138.3|fi|2078-07-04 01:01:01.111111111|fi|fi|fi|fi:fi|1138,1139|1138,fj|2078-07-04 +1139|11|139|417000|41.7|139.3|fj|2079-08-05 01:01:01.111111111|fj|fj|fj|fj:fj|1139,1140|1139,fk|2079-08-05 +1140|12|140|420000|42.0|140.3|fk|2080-09-06 01:01:01.111111111|fk|fk|fk|fk:fk|1140,1141|1140,fl|2080-09-06 +1141|13|141|423000|42.3|141.3|fl|2081-10-7 01:01:01.111111111|fl|fl|fl|fl:fl|1141,1142|1141,fm|2081-10-7 +1142|14|142|426000|42.6|142.3|fm|2082-11-8 01:01:01.111111111|fm|fm|fm|fm:fm|1142,1143|1142,fn|2082-11-8 +1143|15|143|429000|42.9|143.3|fn|2083-12-9 01:01:01.111111111|fn|fn|fn|fn:fn|1143,1144|1143,fo|2083-12-9 +1144|16|144|432000|43.2|144.3|fo|2084-01-010 01:01:01.111111111|fo|fo|fo|fo:fo|1144,1145|1144,fp|2084-01-010 +1145|17|145|435000|43.5|145.3|fp|2085-02-011 01:01:01.111111111|fp|fp|fp|fp:fp|1145,1146|1145,fq|2085-02-011 +1146|18|146|438000|43.8|146.3|fq|2086-03-012 01:01:01.111111111|fq|fq|fq|fq:fq|1146,1147|1146,fr|2086-03-012 +1147|19|147|441000|44.1|147.3|fr|2087-04-013 01:01:01.111111111|fr|fr|fr|fr:fr|1147,1148|1147,fs|2087-04-013 +1148|20|148|444000|44.4|148.3|fs|2088-05-014 01:01:01.111111111|fs|fs|fs|fs:fs|1148,1149|1148,ft|2088-05-014 +1149|21|149|447000|44.7|149.3|ft|2089-06-015 01:01:01.111111111|ft|ft|ft|ft:ft|1149,1150|1149,fu|2089-06-015 +1150|22|150|450000|45.0|150.3|fu|2090-07-016 01:01:01.111111111|fu|fu|fu|fu:fu|1150,1151|1150,fv|2090-07-016 +1151|23|151|453000|45.3|151.3|fv|2091-08-017 01:01:01.111111111|fv|fv|fv|fv:fv|1151,1152|1151,fw|2091-08-017 +1152|24|152|456000|45.6|152.3|fw|2092-09-018 01:01:01.111111111|fw|fw|fw|fw:fw|1152,1153|1152,fx|2092-09-018 +1153|25|153|459000|45.9|153.3|fx|2093-10-19 01:01:01.111111111|fx|fx|fx|fx:fx|1153,1154|1153,fy|2093-10-19 +1154|26|154|462000|46.2|154.3|fy|2094-11-20 01:01:01.111111111|fy|fy|fy|fy:fy|1154,1155|1154,fz|2094-11-20 +1155|27|155|465000|46.5|155.3|fz|2095-12-21 01:01:01.111111111|fz|fz|fz|fz:fz|1155,1156|1155,ga|2095-12-21 +1156|28|156|468000|46.8|156.3|ga|2096-01-022 01:01:01.111111111|ga|ga|ga|ga:ga|1156,1157|1156,gb|2096-01-022 +1157|29|157|471000|47.1|157.3|gb|2097-02-023 01:01:01.111111111|gb|gb|gb|gb:gb|1157,1158|1157,gc|2097-02-023 +1158|30|158|474000|47.4|158.3|gc|2098-03-024 01:01:01.111111111|gc|gc|gc|gc:gc|1158,1159|1158,gd|2098-03-024 +1159|31|159|477000|47.7|159.3|gd|2099-04-025 01:01:01.111111111|gd|gd|gd|gd:gd|1159,1160|1159,ge|2099-04-025 +1160|32|160|480000|48.0|160.3|ge|2100-05-026 01:01:01.111111111|ge|ge|ge|ge:ge|1160,1161|1160,gf|2100-05-026 +1161|33|161|483000|48.3|161.3|gf|2101-06-027 01:01:01.111111111|gf|gf|gf|gf:gf|1161,1162|1161,gg|2101-06-027 +1162|34|162|486000|48.6|162.3|gg|2102-07-01 01:01:01.111111111|gg|gg|gg|gg:gg|1162,1163|1162,gh|2102-07-01 +1163|35|163|489000|48.9|163.3|gh|2103-08-02 01:01:01.111111111|gh|gh|gh|gh:gh|1163,1164|1163,gi|2103-08-02 +1164|36|164|492000|49.2|164.3|gi|2104-09-03 01:01:01.111111111|gi|gi|gi|gi:gi|1164,1165|1164,gj|2104-09-03 +1165|37|165|495000|49.5|165.3|gj|2105-10-4 01:01:01.111111111|gj|gj|gj|gj:gj|1165,1166|1165,gk|2105-10-4 +1166|38|166|498000|49.8|166.3|gk|2106-11-5 01:01:01.111111111|gk|gk|gk|gk:gk|1166,1167|1166,gl|2106-11-5 +1167|39|167|501000|50.1|167.3|gl|2107-12-6 01:01:01.111111111|gl|gl|gl|gl:gl|1167,1168|1167,gm|2107-12-6 +1168|40|168|504000|50.4|168.3|gm|2108-01-07 01:01:01.111111111|gm|gm|gm|gm:gm|1168,1169|1168,gn|2108-01-07 +1169|41|169|507000|50.7|169.3|gn|2109-02-08 01:01:01.111111111|gn|gn|gn|gn:gn|1169,1170|1169,go|2109-02-08 +1170|42|170|510000|51.0|170.3|go|2110-03-09 01:01:01.111111111|go|go|go|go:go|1170,1171|1170,gp|2110-03-09 +1171|43|171|513000|51.3|171.3|gp|2111-04-010 01:01:01.111111111|gp|gp|gp|gp:gp|1171,1172|1171,gq|2111-04-010 +1172|44|172|516000|51.6|172.3|gq|2112-05-011 01:01:01.111111111|gq|gq|gq|gq:gq|1172,1173|1172,gr|2112-05-011 +1173|45|173|519000|51.9|173.3|gr|2113-06-012 01:01:01.111111111|gr|gr|gr|gr:gr|1173,1174|1173,gs|2113-06-012 +1174|46|174|522000|52.2|174.3|gs|2114-07-013 01:01:01.111111111|gs|gs|gs|gs:gs|1174,1175|1174,gt|2114-07-013 +1175|47|175|525000|52.5|175.3|gt|2115-08-014 01:01:01.111111111|gt|gt|gt|gt:gt|1175,1176|1175,gu|2115-08-014 +1176|48|176|528000|52.8|176.3|gu|2116-09-015 01:01:01.111111111|gu|gu|gu|gu:gu|1176,1177|1176,gv|2116-09-015 +1177|49|177|531000|53.1|177.3|gv|2117-10-16 01:01:01.111111111|gv|gv|gv|gv:gv|1177,1178|1177,gw|2117-10-16 +1178|50|178|534000|53.4|178.3|gw|2118-11-17 01:01:01.111111111|gw|gw|gw|gw:gw|1178,1179|1178,gx|2118-11-17 +1179|51|179|537000|53.7|179.3|gx|2119-12-18 01:01:01.111111111|gx|gx|gx|gx:gx|1179,1180|1179,gy|2119-12-18 +1180|52|180|540000|54.0|180.3|gy|2120-01-019 01:01:01.111111111|gy|gy|gy|gy:gy|1180,1181|1180,gz|2120-01-019 +1181|53|181|543000|54.3|181.3|gz|2121-02-020 01:01:01.111111111|gz|gz|gz|gz:gz|1181,1182|1181,ha|2121-02-020 +1182|54|182|546000|54.6|182.3|ha|2122-03-021 01:01:01.111111111|ha|ha|ha|ha:ha|1182,1183|1182,hb|2122-03-021 +1183|55|183|549000|54.9|183.3|hb|2123-04-022 01:01:01.111111111|hb|hb|hb|hb:hb|1183,1184|1183,hc|2123-04-022 +1184|56|184|552000|55.2|184.3|hc|2124-05-023 01:01:01.111111111|hc|hc|hc|hc:hc|1184,1185|1184,hd|2124-05-023 +1185|57|185|555000|55.5|185.3|hd|2125-06-024 01:01:01.111111111|hd|hd|hd|hd:hd|1185,1186|1185,he|2125-06-024 +1186|58|186|558000|55.8|186.3|he|2126-07-025 01:01:01.111111111|he|he|he|he:he|1186,1187|1186,hf|2126-07-025 +1187|59|187|561000|56.1|187.3|hf|2127-08-026 01:01:01.111111111|hf|hf|hf|hf:hf|1187,1188|1187,hg|2127-08-026 +1188|60|188|564000|56.4|188.3|hg|2128-09-027 01:01:01.111111111|hg|hg|hg|hg:hg|1188,1189|1188,hh|2128-09-027 +1189|61|189|567000|56.7|189.3|hh|2129-10-1 01:01:01.111111111|hh|hh|hh|hh:hh|1189,1190|1189,hi|2129-10-1 +1190|62|190|570000|57.0|190.3|hi|2130-11-2 01:01:01.111111111|hi|hi|hi|hi:hi|1190,1191|1190,hj|2130-11-2 +1191|63|191|573000|57.3|191.3|hj|2131-12-3 01:01:01.111111111|hj|hj|hj|hj:hj|1191,1192|1191,hk|2131-12-3 +1192|64|192|576000|57.6|192.3|hk|2132-01-04 01:01:01.111111111|hk|hk|hk|hk:hk|1192,1193|1192,hl|2132-01-04 +1193|65|193|579000|57.9|193.3|hl|2133-02-05 01:01:01.111111111|hl|hl|hl|hl:hl|1193,1194|1193,hm|2133-02-05 +1194|66|194|582000|58.2|194.3|hm|2134-03-06 01:01:01.111111111|hm|hm|hm|hm:hm|1194,1195|1194,hn|2134-03-06 +1195|67|195|585000|58.5|195.3|hn|2135-04-07 01:01:01.111111111|hn|hn|hn|hn:hn|1195,1196|1195,ho|2135-04-07 +1196|68|196|588000|58.8|196.3|ho|2136-05-08 01:01:01.111111111|ho|ho|ho|ho:ho|1196,1197|1196,hp|2136-05-08 +1197|69|197|591000|59.1|197.3|hp|2137-06-09 01:01:01.111111111|hp|hp|hp|hp:hp|1197,1198|1197,hq|2137-06-09 +1198|70|198|594000|59.4|198.3|hq|2138-07-010 01:01:01.111111111|hq|hq|hq|hq:hq|1198,1199|1198,hr|2138-07-010 +1199|71|199|597000|59.7|199.3|hr|2139-08-011 01:01:01.111111111|hr|hr|hr|hr:hr|1199,1200|1199,hs|2139-08-011 +1200|72|200|600000|60.0|200.3|hs|2140-09-012 01:01:01.111111111|hs|hs|hs|hs:hs|1200,1201|1200,ht|2140-09-012 +1201|73|201|603000|60.3|201.3|ht|2141-10-13 01:01:01.111111111|ht|ht|ht|ht:ht|1201,1202|1201,hu|2141-10-13 +1202|74|202|606000|60.6|202.3|hu|2142-11-14 01:01:01.111111111|hu|hu|hu|hu:hu|1202,1203|1202,hv|2142-11-14 +1203|75|203|609000|60.9|203.3|hv|2143-12-15 01:01:01.111111111|hv|hv|hv|hv:hv|1203,1204|1203,hw|2143-12-15 +1204|76|204|612000|61.2|204.3|hw|2144-01-016 01:01:01.111111111|hw|hw|hw|hw:hw|1204,1205|1204,hx|2144-01-016 +1205|77|205|615000|61.5|205.3|hx|2145-02-017 01:01:01.111111111|hx|hx|hx|hx:hx|1205,1206|1205,hy|2145-02-017 +1206|78|206|618000|61.8|206.3|hy|2146-03-018 01:01:01.111111111|hy|hy|hy|hy:hy|1206,1207|1206,hz|2146-03-018 +1207|79|207|621000|62.1|207.3|hz|2147-04-019 01:01:01.111111111|hz|hz|hz|hz:hz|1207,1208|1207,ia|2147-04-019 +1208|80|208|624000|62.4|208.3|ia|2148-05-020 01:01:01.111111111|ia|ia|ia|ia:ia|1208,1209|1208,ib|2148-05-020 +1209|81|209|627000|62.7|209.3|ib|2149-06-021 01:01:01.111111111|ib|ib|ib|ib:ib|1209,1210|1209,ic|2149-06-021 +1210|82|210|630000|63.0|210.3|ic|2150-07-022 01:01:01.111111111|ic|ic|ic|ic:ic|1210,1211|1210,id|2150-07-022 +1211|83|211|633000|63.3|211.3|id|2151-08-023 01:01:01.111111111|id|id|id|id:id|1211,1212|1211,ie|2151-08-023 +1212|84|212|636000|63.6|212.3|ie|2152-09-024 01:01:01.111111111|ie|ie|ie|ie:ie|1212,1213|1212,if|2152-09-024 +1213|85|213|639000|63.9|213.3|if|2153-10-25 01:01:01.111111111|if|if|if|if:if|1213,1214|1213,ig|2153-10-25 +1214|86|214|642000|64.2|214.3|ig|2154-11-26 01:01:01.111111111|ig|ig|ig|ig:ig|1214,1215|1214,ih|2154-11-26 +1215|87|215|645000|64.5|215.3|ih|2155-12-27 01:01:01.111111111|ih|ih|ih|ih:ih|1215,1216|1215,ii|2155-12-27 +1216|88|216|648000|64.8|216.3|ii|2156-01-01 01:01:01.111111111|ii|ii|ii|ii:ii|1216,1217|1216,ij|2156-01-01 +1217|89|217|651000|65.1|217.3|ij|2157-02-02 01:01:01.111111111|ij|ij|ij|ij:ij|1217,1218|1217,ik|2157-02-02 +1218|90|218|654000|65.4|218.3|ik|2158-03-03 01:01:01.111111111|ik|ik|ik|ik:ik|1218,1219|1218,il|2158-03-03 +1219|91|219|657000|65.7|219.3|il|2159-04-04 01:01:01.111111111|il|il|il|il:il|1219,1220|1219,im|2159-04-04 +1220|92|220|660000|66.0|220.3|im|2160-05-05 01:01:01.111111111|im|im|im|im:im|1220,1221|1220,in|2160-05-05 +1221|93|221|663000|66.3|221.3|in|2161-06-06 01:01:01.111111111|in|in|in|in:in|1221,1222|1221,io|2161-06-06 +1222|94|222|666000|66.6|222.3|io|2162-07-07 01:01:01.111111111|io|io|io|io:io|1222,1223|1222,ip|2162-07-07 +1223|95|223|669000|66.9|223.3|ip|2163-08-08 01:01:01.111111111|ip|ip|ip|ip:ip|1223,1224|1223,iq|2163-08-08 +1224|96|224|672000|67.2|224.3|iq|2164-09-09 01:01:01.111111111|iq|iq|iq|iq:iq|1224,1225|1224,ir|2164-09-09 +1225|97|225|675000|67.5|225.3|ir|2165-10-10 01:01:01.111111111|ir|ir|ir|ir:ir|1225,1226|1225,is|2165-10-10 +1226|98|226|678000|67.8|226.3|is|2166-11-11 01:01:01.111111111|is|is|is|is:is|1226,1227|1226,it|2166-11-11 +1227|99|227|681000|68.1|227.3|it|2167-12-12 01:01:01.111111111|it|it|it|it:it|1227,1228|1227,iu|2167-12-12 +1228|100|228|684000|68.4|228.3|iu|2168-01-013 01:01:01.111111111|iu|iu|iu|iu:iu|1228,1229|1228,iv|2168-01-013 +1229|101|229|687000|68.7|229.3|iv|2169-02-014 01:01:01.111111111|iv|iv|iv|iv:iv|1229,1230|1229,iw|2169-02-014 +1230|102|230|690000|69.0|230.3|iw|2170-03-015 01:01:01.111111111|iw|iw|iw|iw:iw|1230,1231|1230,ix|2170-03-015 +1231|103|231|693000|69.3|231.3|ix|2171-04-016 01:01:01.111111111|ix|ix|ix|ix:ix|1231,1232|1231,iy|2171-04-016 +1232|104|232|696000|69.6|232.3|iy|2172-05-017 01:01:01.111111111|iy|iy|iy|iy:iy|1232,1233|1232,iz|2172-05-017 +1233|105|233|699000|69.9|233.3|iz|2173-06-018 01:01:01.111111111|iz|iz|iz|iz:iz|1233,1234|1233,ja|2173-06-018 +1234|106|234|702000|70.2|234.3|ja|2174-07-019 01:01:01.111111111|ja|ja|ja|ja:ja|1234,1235|1234,jb|2174-07-019 +1235|107|235|705000|70.5|235.3|jb|2175-08-020 01:01:01.111111111|jb|jb|jb|jb:jb|1235,1236|1235,jc|2175-08-020 +1236|108|236|708000|70.8|236.3|jc|2176-09-021 01:01:01.111111111|jc|jc|jc|jc:jc|1236,1237|1236,jd|2176-09-021 +1237|109|237|711000|71.1|237.3|jd|2177-10-22 01:01:01.111111111|jd|jd|jd|jd:jd|1237,1238|1237,je|2177-10-22 +1238|110|238|714000|71.4|238.3|je|2178-11-23 01:01:01.111111111|je|je|je|je:je|1238,1239|1238,jf|2178-11-23 +1239|111|239|717000|71.7|239.3|jf|2179-12-24 01:01:01.111111111|jf|jf|jf|jf:jf|1239,1240|1239,jg|2179-12-24 +1240|112|240|720000|72.0|240.3|jg|2180-01-025 01:01:01.111111111|jg|jg|jg|jg:jg|1240,1241|1240,jh|2180-01-025 +1241|113|241|723000|72.3|241.3|jh|2181-02-026 01:01:01.111111111|jh|jh|jh|jh:jh|1241,1242|1241,ji|2181-02-026 +1242|114|242|726000|72.6|242.3|ji|2182-03-027 01:01:01.111111111|ji|ji|ji|ji:ji|1242,1243|1242,jj|2182-03-027 +1243|115|243|729000|72.9|243.3|jj|2183-04-01 01:01:01.111111111|jj|jj|jj|jj:jj|1243,1244|1243,jk|2183-04-01 +1244|116|244|732000|73.2|244.3|jk|2184-05-02 01:01:01.111111111|jk|jk|jk|jk:jk|1244,1245|1244,jl|2184-05-02 +1245|117|245|735000|73.5|245.3|jl|2185-06-03 01:01:01.111111111|jl|jl|jl|jl:jl|1245,1246|1245,jm|2185-06-03 +1246|118|246|738000|73.8|246.3|jm|2186-07-04 01:01:01.111111111|jm|jm|jm|jm:jm|1246,1247|1246,jn|2186-07-04 +1247|119|247|741000|74.1|247.3|jn|2187-08-05 01:01:01.111111111|jn|jn|jn|jn:jn|1247,1248|1247,jo|2187-08-05 +1248|120|248|744000|74.4|248.3|jo|2188-09-06 01:01:01.111111111|jo|jo|jo|jo:jo|1248,1249|1248,jp|2188-09-06 +1249|121|249|747000|74.7|249.3|jp|2189-10-7 01:01:01.111111111|jp|jp|jp|jp:jp|1249,1250|1249,jq|2189-10-7 +1250|122|250|750000|75.0|250.3|jq|2190-11-8 01:01:01.111111111|jq|jq|jq|jq:jq|1250,1251|1250,jr|2190-11-8 +1251|123|251|753000|75.3|251.3|jr|2191-12-9 01:01:01.111111111|jr|jr|jr|jr:jr|1251,1252|1251,js|2191-12-9 +1252|124|252|756000|75.6|252.3|js|2192-01-010 01:01:01.111111111|js|js|js|js:js|1252,1253|1252,jt|2192-01-010 +1253|125|253|759000|75.9|253.3|jt|2193-02-011 01:01:01.111111111|jt|jt|jt|jt:jt|1253,1254|1253,ju|2193-02-011 +1254|126|254|762000|76.2|254.3|ju|2194-03-012 01:01:01.111111111|ju|ju|ju|ju:ju|1254,1255|1254,jv|2194-03-012 +1255|127|255|765000|76.5|255.3|jv|2195-04-013 01:01:01.111111111|jv|jv|jv|jv:jv|1255,1256|1255,jw|2195-04-013 +1256|-128|256|768000|76.8|256.3|jw|2196-05-014 01:01:01.111111111|jw|jw|jw|jw:jw|1256,1257|1256,jx|2196-05-014 +1257|-127|257|771000|77.1|257.3|jx|2197-06-015 01:01:01.111111111|jx|jx|jx|jx:jx|1257,1258|1257,jy|2197-06-015 +1258|-126|258|774000|77.4|258.3|jy|2198-07-016 01:01:01.111111111|jy|jy|jy|jy:jy|1258,1259|1258,jz|2198-07-016 +1259|-125|259|777000|77.7|259.3|jz|2199-08-017 01:01:01.111111111|jz|jz|jz|jz:jz|1259,1260|1259,ka|2199-08-017 +1260|-124|260|780000|78.0|260.3|ka|2200-09-018 01:01:01.111111111|ka|ka|ka|ka:ka|1260,1261|1260,kb|2200-09-018 +1261|-123|261|783000|78.3|261.3|kb|2201-10-19 01:01:01.111111111|kb|kb|kb|kb:kb|1261,1262|1261,kc|2201-10-19 +1262|-122|262|786000|78.6|262.3|kc|2202-11-20 01:01:01.111111111|kc|kc|kc|kc:kc|1262,1263|1262,kd|2202-11-20 +1263|-121|263|789000|78.9|263.3|kd|2203-12-21 01:01:01.111111111|kd|kd|kd|kd:kd|1263,1264|1263,ke|2203-12-21 +1264|-120|264|792000|79.2|264.3|ke|2204-01-022 01:01:01.111111111|ke|ke|ke|ke:ke|1264,1265|1264,kf|2204-01-022 +1265|-119|265|795000|79.5|265.3|kf|2205-02-023 01:01:01.111111111|kf|kf|kf|kf:kf|1265,1266|1265,kg|2205-02-023 +1266|-118|266|798000|79.8|266.3|kg|2206-03-024 01:01:01.111111111|kg|kg|kg|kg:kg|1266,1267|1266,kh|2206-03-024 +1267|-117|267|801000|80.1|267.3|kh|2207-04-025 01:01:01.111111111|kh|kh|kh|kh:kh|1267,1268|1267,ki|2207-04-025 +1268|-116|268|804000|80.4|268.3|ki|2208-05-026 01:01:01.111111111|ki|ki|ki|ki:ki|1268,1269|1268,kj|2208-05-026 +1269|-115|269|807000|80.7|269.3|kj|2209-06-027 01:01:01.111111111|kj|kj|kj|kj:kj|1269,1270|1269,kk|2209-06-027 +1270|-114|270|810000|81.0|270.3|kk|2210-07-01 01:01:01.111111111|kk|kk|kk|kk:kk|1270,1271|1270,kl|2210-07-01 +1271|-113|271|813000|81.3|271.3|kl|2211-08-02 01:01:01.111111111|kl|kl|kl|kl:kl|1271,1272|1271,km|2211-08-02 +1272|-112|272|816000|81.6|272.3|km|2212-09-03 01:01:01.111111111|km|km|km|km:km|1272,1273|1272,kn|2212-09-03 +1273|-111|273|819000|81.9|273.3|kn|2213-10-4 01:01:01.111111111|kn|kn|kn|kn:kn|1273,1274|1273,ko|2213-10-4 +1274|-110|274|822000|82.2|274.3|ko|2214-11-5 01:01:01.111111111|ko|ko|ko|ko:ko|1274,1275|1274,kp|2214-11-5 +1275|-109|275|825000|82.5|275.3|kp|2215-12-6 01:01:01.111111111|kp|kp|kp|kp:kp|1275,1276|1275,kq|2215-12-6 +1276|-108|276|828000|82.8|276.3|kq|2216-01-07 01:01:01.111111111|kq|kq|kq|kq:kq|1276,1277|1276,kr|2216-01-07 +1277|-107|277|831000|83.1|277.3|kr|2217-02-08 01:01:01.111111111|kr|kr|kr|kr:kr|1277,1278|1277,ks|2217-02-08 +1278|-106|278|834000|83.4|278.3|ks|2218-03-09 01:01:01.111111111|ks|ks|ks|ks:ks|1278,1279|1278,kt|2218-03-09 +1279|-105|279|837000|83.7|279.3|kt|2219-04-010 01:01:01.111111111|kt|kt|kt|kt:kt|1279,1280|1279,ku|2219-04-010 +1280|-104|280|840000|84.0|280.3|ku|2220-05-011 01:01:01.111111111|ku|ku|ku|ku:ku|1280,1281|1280,kv|2220-05-011 +1281|-103|281|843000|84.3|281.3|kv|2221-06-012 01:01:01.111111111|kv|kv|kv|kv:kv|1281,1282|1281,kw|2221-06-012 +1282|-102|282|846000|84.6|282.3|kw|2222-07-013 01:01:01.111111111|kw|kw|kw|kw:kw|1282,1283|1282,kx|2222-07-013 +1283|-101|283|849000|84.9|283.3|kx|2223-08-014 01:01:01.111111111|kx|kx|kx|kx:kx|1283,1284|1283,ky|2223-08-014 +1284|-100|284|852000|85.2|284.3|ky|2224-09-015 01:01:01.111111111|ky|ky|ky|ky:ky|1284,1285|1284,kz|2224-09-015 +1285|-99|285|855000|85.5|285.3|kz|2225-10-16 01:01:01.111111111|kz|kz|kz|kz:kz|1285,1286|1285,la|2225-10-16 +1286|-98|286|858000|85.8|286.3|la|2226-11-17 01:01:01.111111111|la|la|la|la:la|1286,1287|1286,lb|2226-11-17 +1287|-97|287|861000|86.1|287.3|lb|2227-12-18 01:01:01.111111111|lb|lb|lb|lb:lb|1287,1288|1287,lc|2227-12-18 +1288|-96|288|864000|86.4|288.3|lc|2228-01-019 01:01:01.111111111|lc|lc|lc|lc:lc|1288,1289|1288,ld|2228-01-019 +1289|-95|289|867000|86.7|289.3|ld|2229-02-020 01:01:01.111111111|ld|ld|ld|ld:ld|1289,1290|1289,le|2229-02-020 +1290|-94|290|870000|87.0|290.3|le|2230-03-021 01:01:01.111111111|le|le|le|le:le|1290,1291|1290,lf|2230-03-021 +1291|-93|291|873000|87.3|291.3|lf|2231-04-022 01:01:01.111111111|lf|lf|lf|lf:lf|1291,1292|1291,lg|2231-04-022 +1292|-92|292|876000|87.6|292.3|lg|2232-05-023 01:01:01.111111111|lg|lg|lg|lg:lg|1292,1293|1292,lh|2232-05-023 +1293|-91|293|879000|87.9|293.3|lh|2233-06-024 01:01:01.111111111|lh|lh|lh|lh:lh|1293,1294|1293,li|2233-06-024 +1294|-90|294|882000|88.2|294.3|li|2234-07-025 01:01:01.111111111|li|li|li|li:li|1294,1295|1294,lj|2234-07-025 +1295|-89|295|885000|88.5|295.3|lj|2235-08-026 01:01:01.111111111|lj|lj|lj|lj:lj|1295,1296|1295,lk|2235-08-026 +1296|-88|296|888000|88.8|296.3|lk|2236-09-027 01:01:01.111111111|lk|lk|lk|lk:lk|1296,1297|1296,ll|2236-09-027 +1297|-87|297|891000|89.1|297.3|ll|2237-10-1 01:01:01.111111111|ll|ll|ll|ll:ll|1297,1298|1297,lm|2237-10-1 +1298|-86|298|894000|89.4|298.3|lm|2238-11-2 01:01:01.111111111|lm|lm|lm|lm:lm|1298,1299|1298,ln|2238-11-2 +1299|-85|299|897000|89.7|299.3|ln|2239-12-3 01:01:01.111111111|ln|ln|ln|ln:ln|1299,1300|1299,lo|2239-12-3 \ No newline at end of file diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java index 5b65e5c..f4fadbb 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/MapredParquetInputFormat.java @@ -14,6 +14,8 @@ package org.apache.hadoop.hive.ql.io.parquet; import java.io.IOException; + +import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.hive.ql.exec.Utilities; @@ -34,7 +36,8 @@ * NOTE: With HIVE-9235 we removed "implements VectorizedParquetInputFormat" since all data types * are not currently supported. Removing the interface turns off vectorization. */ -public class MapredParquetInputFormat extends FileInputFormat { +public class MapredParquetInputFormat extends FileInputFormat + implements VectorizedInputFormatInterface { private static final Logger LOG = LoggerFactory.getLogger(MapredParquetInputFormat.class); @@ -48,7 +51,7 @@ public MapredParquetInputFormat() { protected MapredParquetInputFormat(final ParquetInputFormat inputFormat) { this.realInput = inputFormat; - vectorizedSelf = new VectorizedParquetInputFormat(inputFormat); + vectorizedSelf = new VectorizedParquetInputFormat(); } @SuppressWarnings({ "unchecked", "rawtypes" }) @@ -69,8 +72,7 @@ protected MapredParquetInputFormat(final ParquetInputFormat input if (LOG.isDebugEnabled()) { LOG.debug("Using row-mode record reader"); } - return (RecordReader) - new ParquetRecordReaderWrapper(realInput, split, job, reporter); + return new ParquetRecordReaderWrapper(realInput, split, job, reporter); } } catch (final InterruptedException e) { throw new RuntimeException("Cannot create a RecordReaderWrapper", e); diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ParquetRecordReaderBase.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ParquetRecordReaderBase.java new file mode 100644 index 0000000..167f9b6 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/ParquetRecordReaderBase.java @@ -0,0 +1,171 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet; + +import com.google.common.base.Strings; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport; +import org.apache.hadoop.hive.ql.io.parquet.read.ParquetFilterPredicateConverter; +import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg; +import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; +import org.apache.hadoop.hive.serde2.SerDeStats; +import org.apache.hadoop.mapred.FileSplit; +import org.apache.hadoop.mapred.JobConf; +import org.apache.parquet.filter2.compat.FilterCompat; +import org.apache.parquet.filter2.compat.RowGroupFilter; +import org.apache.parquet.filter2.predicate.FilterPredicate; +import org.apache.parquet.hadoop.ParquetFileReader; +import org.apache.parquet.hadoop.ParquetInputFormat; +import org.apache.parquet.hadoop.ParquetInputSplit; +import org.apache.parquet.hadoop.api.InitContext; +import org.apache.parquet.hadoop.api.ReadSupport; +import org.apache.parquet.hadoop.metadata.BlockMetaData; +import org.apache.parquet.hadoop.metadata.FileMetaData; +import org.apache.parquet.hadoop.metadata.ParquetMetadata; +import org.apache.parquet.schema.MessageType; +import org.apache.parquet.schema.MessageTypeParser; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.List; + +public class ParquetRecordReaderBase { + public static final Logger LOG = LoggerFactory.getLogger(ParquetRecordReaderBase.class); + + protected Path file; + protected ProjectionPusher projectionPusher; + protected boolean skipTimestampConversion = false; + protected SerDeStats serDeStats; + protected JobConf jobConf; + + protected int schemaSize; + protected List filtedBlocks; + protected ParquetFileReader reader; + + /** + * gets a ParquetInputSplit corresponding to a split given by Hive + * + * @param oldSplit The split given by Hive + * @param conf The JobConf of the Hive job + * @return a ParquetInputSplit corresponding to the oldSplit + * @throws IOException if the config cannot be enhanced or if the footer cannot be read from the file + */ + @SuppressWarnings("deprecation") + protected ParquetInputSplit getSplit( + final org.apache.hadoop.mapred.InputSplit oldSplit, + final JobConf conf + ) throws IOException { + ParquetInputSplit split; + if (oldSplit instanceof FileSplit) { + final Path finalPath = ((FileSplit) oldSplit).getPath(); + jobConf = projectionPusher.pushProjectionsAndFilters(conf, finalPath.getParent()); + + // TODO enable MetadataFilter by using readFooter(Configuration configuration, Path file, + // MetadataFilter filter) API + final ParquetMetadata parquetMetadata = ParquetFileReader.readFooter(jobConf, finalPath); + final List blocks = parquetMetadata.getBlocks(); + final FileMetaData fileMetaData = parquetMetadata.getFileMetaData(); + + final ReadSupport.ReadContext + readContext = new DataWritableReadSupport().init(new InitContext(jobConf, + null, fileMetaData.getSchema())); + + // Compute stats + for (BlockMetaData bmd : blocks) { + serDeStats.setRowCount(serDeStats.getRowCount() + bmd.getRowCount()); + serDeStats.setRawDataSize(serDeStats.getRawDataSize() + bmd.getTotalByteSize()); + } + + schemaSize = MessageTypeParser.parseMessageType(readContext.getReadSupportMetadata() + .get(DataWritableReadSupport.HIVE_TABLE_AS_PARQUET_SCHEMA)).getFieldCount(); + final List splitGroup = new ArrayList(); + final long splitStart = ((FileSplit) oldSplit).getStart(); + final long splitLength = ((FileSplit) oldSplit).getLength(); + for (final BlockMetaData block : blocks) { + final long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset(); + if (firstDataPage >= splitStart && firstDataPage < splitStart + splitLength) { + splitGroup.add(block); + } + } + if (splitGroup.isEmpty()) { + LOG.warn("Skipping split, could not find row group in: " + oldSplit); + return null; + } + + FilterCompat.Filter filter = setFilter(jobConf, fileMetaData.getSchema()); + if (filter != null) { + filtedBlocks = RowGroupFilter.filterRowGroups(filter, splitGroup, fileMetaData.getSchema()); + if (filtedBlocks.isEmpty()) { + LOG.debug("All row groups are dropped due to filter predicates"); + return null; + } + + long droppedBlocks = splitGroup.size() - filtedBlocks.size(); + if (droppedBlocks > 0) { + LOG.debug("Dropping " + droppedBlocks + " row groups that do not pass filter predicate"); + } + } else { + filtedBlocks = splitGroup; + } + + if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION)) { + skipTimestampConversion = !Strings.nullToEmpty(fileMetaData.getCreatedBy()).startsWith("parquet-mr"); + } + split = new ParquetInputSplit(finalPath, + splitStart, + splitLength, + oldSplit.getLocations(), + filtedBlocks, + readContext.getRequestedSchema().toString(), + fileMetaData.getSchema().toString(), + fileMetaData.getKeyValueMetaData(), + readContext.getReadSupportMetadata()); + return split; + } else { + throw new IllegalArgumentException("Unknown split type: " + oldSplit); + } + } + + public FilterCompat.Filter setFilter(final JobConf conf, MessageType schema) { + SearchArgument sarg = ConvertAstToSearchArg.createFromConf(conf); + if (sarg == null) { + return null; + } + + // Create the Parquet FilterPredicate without including columns that do not exist + // on the schema (such as partition columns). + FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema); + if (p != null) { + // Filter may have sensitive information. Do not send to debug. + LOG.debug("PARQUET predicate push down generated."); + ParquetInputFormat.setFilterPredicate(conf, p); + return FilterCompat.get(p); + } else { + // Filter may have sensitive information. Do not send to debug. + LOG.debug("No PARQUET predicate push down is generated."); + return null; + } + } + + public List getFiltedBlocks() { + return filtedBlocks; + } + + public SerDeStats getStats() { + return serDeStats; + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/VectorizedParquetInputFormat.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/VectorizedParquetInputFormat.java index 2072533..322178a 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/VectorizedParquetInputFormat.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/VectorizedParquetInputFormat.java @@ -15,147 +15,29 @@ import java.io.IOException; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; -import org.apache.hadoop.hive.ql.exec.Utilities; -import org.apache.hadoop.hive.ql.exec.vector.VectorColumnAssign; -import org.apache.hadoop.hive.ql.exec.vector.VectorColumnAssignFactory; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedInputFormatInterface; +import org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader; +import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; -import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; -import org.apache.hadoop.hive.ql.io.parquet.read.ParquetRecordReaderWrapper; -import org.apache.hadoop.hive.ql.metadata.HiveException; -import org.apache.hadoop.hive.ql.plan.MapWork; -import org.apache.hadoop.io.ArrayWritable; import org.apache.hadoop.io.NullWritable; -import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.FileInputFormat; -import org.apache.hadoop.mapred.FileSplit; -import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.RecordReader; import org.apache.hadoop.mapred.Reporter; -import org.apache.parquet.hadoop.ParquetInputFormat; /** * Vectorized input format for Parquet files */ -public class VectorizedParquetInputFormat extends FileInputFormat - implements VectorizedInputFormatInterface { - - private static final Logger LOG = LoggerFactory.getLogger(VectorizedParquetInputFormat.class); - - /** - * Vectorized record reader for vectorized Parquet input format - */ - private static class VectorizedParquetRecordReader implements - RecordReader { - private static final Logger LOG = LoggerFactory.getLogger(VectorizedParquetRecordReader.class); - - private final ParquetRecordReaderWrapper internalReader; - private VectorizedRowBatchCtx rbCtx; - private Object[] partitionValues; - private ArrayWritable internalValues; - private NullWritable internalKey; - private VectorColumnAssign[] assigners; - - public VectorizedParquetRecordReader( - ParquetInputFormat realInput, - FileSplit split, - JobConf conf, Reporter reporter) throws IOException, InterruptedException { - internalReader = new ParquetRecordReaderWrapper( - realInput, - split, - conf, - reporter); - rbCtx = Utilities.getVectorizedRowBatchCtx(conf); - int partitionColumnCount = rbCtx.getPartitionColumnCount(); - if (partitionColumnCount > 0) { - partitionValues = new Object[partitionColumnCount]; - rbCtx.getPartitionValues(rbCtx, conf, split, partitionValues); - } - } - - @Override - public NullWritable createKey() { - internalKey = internalReader.createKey(); - return NullWritable.get(); - } - - @Override - public VectorizedRowBatch createValue() { - VectorizedRowBatch outputBatch; - outputBatch = rbCtx.createVectorizedRowBatch(); - internalValues = internalReader.createValue(); - return outputBatch; - } - - @Override - public long getPos() throws IOException { - return internalReader.getPos(); - } +public class VectorizedParquetInputFormat + extends FileInputFormat { - @Override - public void close() throws IOException { - internalReader.close(); - } - - @Override - public float getProgress() throws IOException { - return internalReader.getProgress(); - } - - @Override - public boolean next(NullWritable key, VectorizedRowBatch outputBatch) - throws IOException { - if (assigners != null) { - assert(outputBatch.numCols == assigners.length); - } - outputBatch.reset(); - int maxSize = outputBatch.getMaxSize(); - try { - while (outputBatch.size < maxSize) { - if (false == internalReader.next(internalKey, internalValues)) { - outputBatch.endOfFile = true; - break; - } - Writable[] writables = internalValues.get(); - - if (null == assigners) { - // Normally we'd build the assigners from the rbCtx.rowOI, but with Parquet - // we have a discrepancy between the metadata type (Eg. tinyint -> BYTE) and - // the writable value (IntWritable). see Parquet's ETypeConverter class. - assigners = VectorColumnAssignFactory.buildAssigners(outputBatch, writables); - } - - for(int i = 0; i < writables.length; ++i) { - assigners[i].assignObjectValue(writables[i], outputBatch.size); - } - ++outputBatch.size; - } - } catch (HiveException e) { - throw new RuntimeException(e); - } - return outputBatch.size > 0; - } + public VectorizedParquetInputFormat() { } - private final ParquetInputFormat realInput; - - public VectorizedParquetInputFormat(ParquetInputFormat realInput) { - this.realInput = realInput; - } - - @SuppressWarnings("unchecked") @Override public RecordReader getRecordReader( - InputSplit split, JobConf conf, Reporter reporter) throws IOException { - try { - return (RecordReader) - new VectorizedParquetRecordReader(realInput, (FileSplit) split, conf, reporter); - } catch (final InterruptedException e) { - throw new RuntimeException("Cannot create a VectorizedParquetRecordReader", e); - } + InputSplit inputSplit, + JobConf jobConf, + Reporter reporter) throws IOException { + return new VectorizedParquetRecordReader(inputSplit, jobConf); } - } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java index 8d8b0c5..16064b2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/DataWritableReadSupport.java @@ -70,7 +70,7 @@ * @param columns comma separated list of columns * @return list with virtual columns removed */ - private static List getColumnNames(final String columns) { + public static List getColumnNames(final String columns) { return (List) VirtualColumn. removeVirtualColumns(StringUtils.getStringCollection(columns)); } @@ -82,7 +82,7 @@ * @param types Comma separated list of types * @return A list of TypeInfo objects. */ - private static List getColumnTypes(final String types) { + public static List getColumnTypes(final String types) { return TypeInfoUtils.getTypeInfosFromTypeString(types); } @@ -177,7 +177,7 @@ private static Type getProjectedType(TypeInfo colType, Type fieldType) { * @param colTypes List of column types. * @return A MessageType object of projected columns. */ - private static MessageType getSchemaByName(MessageType schema, List colNames, List colTypes) { + public static MessageType getSchemaByName(MessageType schema, List colNames, List colTypes) { List projectedFields = getProjectedGroupFields(schema, colNames, colTypes); Type[] typesArray = projectedFields.toArray(new Type[0]); @@ -195,7 +195,7 @@ private static MessageType getSchemaByName(MessageType schema, List colN * @param colIndexes List of column indexes. * @return A MessageType object of the column names found. */ - private static MessageType getSchemaByIndex(MessageType schema, List colNames, List colIndexes) { + public static MessageType getSchemaByIndex(MessageType schema, List colNames, List colIndexes) { List schemaTypes = new ArrayList(); for (Integer i : colIndexes) { diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java index d2e1b13..ac430a6 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/read/ParquetRecordReaderWrapper.java @@ -14,24 +14,19 @@ package org.apache.hadoop.hive.ql.io.parquet.read; import java.io.IOException; -import java.util.ArrayList; -import java.util.List; +import org.apache.hadoop.hive.ql.io.parquet.ParquetRecordReaderBase; import org.slf4j.Logger; import org.slf4j.LoggerFactory; import org.apache.hadoop.conf.Configuration; -import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.conf.HiveConf; import org.apache.hadoop.hive.ql.io.IOConstants; import org.apache.hadoop.hive.ql.io.StatsProvidingRecordReader; import org.apache.hadoop.hive.ql.io.parquet.ProjectionPusher; -import org.apache.hadoop.hive.ql.io.sarg.ConvertAstToSearchArg; -import org.apache.hadoop.hive.ql.io.sarg.SearchArgument; import org.apache.hadoop.hive.serde2.SerDeStats; import org.apache.hadoop.io.ArrayWritable; import org.apache.hadoop.io.NullWritable; import org.apache.hadoop.io.Writable; -import org.apache.hadoop.mapred.FileSplit; import org.apache.hadoop.mapred.InputSplit; import org.apache.hadoop.mapred.JobConf; import org.apache.hadoop.mapred.RecordReader; @@ -39,25 +34,12 @@ import org.apache.hadoop.mapreduce.TaskAttemptContext; import org.apache.hadoop.mapreduce.TaskAttemptID; -import org.apache.parquet.filter2.compat.FilterCompat; -import org.apache.parquet.filter2.compat.RowGroupFilter; -import org.apache.parquet.filter2.predicate.FilterPredicate; -import org.apache.parquet.hadoop.ParquetFileReader; import org.apache.parquet.hadoop.ParquetInputFormat; import org.apache.parquet.hadoop.ParquetInputSplit; -import org.apache.parquet.hadoop.api.InitContext; -import org.apache.parquet.hadoop.api.ReadSupport.ReadContext; -import org.apache.parquet.hadoop.metadata.BlockMetaData; -import org.apache.parquet.hadoop.metadata.FileMetaData; -import org.apache.parquet.hadoop.metadata.ParquetMetadata; import org.apache.parquet.hadoop.util.ContextUtil; -import org.apache.parquet.schema.MessageType; -import org.apache.parquet.schema.MessageTypeParser; -import com.google.common.base.Strings; - -public class ParquetRecordReaderWrapper implements RecordReader, - StatsProvidingRecordReader { +public class ParquetRecordReaderWrapper extends ParquetRecordReaderBase + implements RecordReader, StatsProvidingRecordReader { public static final Logger LOG = LoggerFactory.getLogger(ParquetRecordReaderWrapper.class); private final long splitLen; // for getPos() @@ -68,12 +50,6 @@ private ArrayWritable valueObj = null; private boolean firstRecord = false; private boolean eof = false; - private int schemaSize; - private boolean skipTimestampConversion = false; - private JobConf jobConf; - private final ProjectionPusher projectionPusher; - private List filtedBlocks; - private final SerDeStats serDeStats; public ParquetRecordReaderWrapper( final ParquetInputFormat newInputFormat, @@ -137,27 +113,6 @@ public ParquetRecordReaderWrapper( } } - public FilterCompat.Filter setFilter(final JobConf conf, MessageType schema) { - SearchArgument sarg = ConvertAstToSearchArg.createFromConf(conf); - if (sarg == null) { - return null; - } - - // Create the Parquet FilterPredicate without including columns that do not exist - // on the shema (such as partition columns). - FilterPredicate p = ParquetFilterPredicateConverter.toFilterPredicate(sarg, schema); - if (p != null) { - // Filter may have sensitive information. Do not send to debug. - LOG.debug("PARQUET predicate push down generated."); - ParquetInputFormat.setFilterPredicate(conf, p); - return FilterCompat.get(p); - } else { - // Filter may have sensitive information. Do not send to debug. - LOG.debug("No PARQUET predicate push down is generated."); - return null; - } - } - @Override public void close() throws IOException { if (realReader != null) { @@ -227,94 +182,4 @@ public boolean next(final NullWritable key, final ArrayWritable value) throws IO throw new IOException(e); } } - - /** - * gets a ParquetInputSplit corresponding to a split given by Hive - * - * @param oldSplit The split given by Hive - * @param conf The JobConf of the Hive job - * @return a ParquetInputSplit corresponding to the oldSplit - * @throws IOException if the config cannot be enhanced or if the footer cannot be read from the file - */ - @SuppressWarnings("deprecation") - protected ParquetInputSplit getSplit( - final InputSplit oldSplit, - final JobConf conf - ) throws IOException { - ParquetInputSplit split; - if (oldSplit instanceof FileSplit) { - final Path finalPath = ((FileSplit) oldSplit).getPath(); - jobConf = projectionPusher.pushProjectionsAndFilters(conf, finalPath.getParent()); - - final ParquetMetadata parquetMetadata = ParquetFileReader.readFooter(jobConf, finalPath); - final List blocks = parquetMetadata.getBlocks(); - final FileMetaData fileMetaData = parquetMetadata.getFileMetaData(); - - final ReadContext readContext = new DataWritableReadSupport().init(new InitContext(jobConf, - null, fileMetaData.getSchema())); - - // Compute stats - for (BlockMetaData bmd : blocks) { - serDeStats.setRowCount(serDeStats.getRowCount() + bmd.getRowCount()); - serDeStats.setRawDataSize(serDeStats.getRawDataSize() + bmd.getTotalByteSize()); - } - - schemaSize = MessageTypeParser.parseMessageType(readContext.getReadSupportMetadata() - .get(DataWritableReadSupport.HIVE_TABLE_AS_PARQUET_SCHEMA)).getFieldCount(); - final List splitGroup = new ArrayList(); - final long splitStart = ((FileSplit) oldSplit).getStart(); - final long splitLength = ((FileSplit) oldSplit).getLength(); - for (final BlockMetaData block : blocks) { - final long firstDataPage = block.getColumns().get(0).getFirstDataPageOffset(); - if (firstDataPage >= splitStart && firstDataPage < splitStart + splitLength) { - splitGroup.add(block); - } - } - if (splitGroup.isEmpty()) { - LOG.warn("Skipping split, could not find row group in: " + (FileSplit) oldSplit); - return null; - } - - FilterCompat.Filter filter = setFilter(jobConf, fileMetaData.getSchema()); - if (filter != null) { - filtedBlocks = RowGroupFilter.filterRowGroups(filter, splitGroup, fileMetaData.getSchema()); - if (filtedBlocks.isEmpty()) { - LOG.debug("All row groups are dropped due to filter predicates"); - return null; - } - - long droppedBlocks = splitGroup.size() - filtedBlocks.size(); - if (droppedBlocks > 0) { - LOG.debug("Dropping " + droppedBlocks + " row groups that do not pass filter predicate"); - } - } else { - filtedBlocks = splitGroup; - } - - if (HiveConf.getBoolVar(conf, HiveConf.ConfVars.HIVE_PARQUET_TIMESTAMP_SKIP_CONVERSION)) { - skipTimestampConversion = !Strings.nullToEmpty(fileMetaData.getCreatedBy()).startsWith("parquet-mr"); - } - split = new ParquetInputSplit(finalPath, - splitStart, - splitLength, - ((FileSplit) oldSplit).getLocations(), - filtedBlocks, - readContext.getRequestedSchema().toString(), - fileMetaData.getSchema().toString(), - fileMetaData.getKeyValueMetaData(), - readContext.getReadSupportMetadata()); - return split; - } else { - throw new IllegalArgumentException("Unknown split type: " + oldSplit); - } - } - - public List getFiltedBlocks() { - return filtedBlocks; - } - - @Override - public SerDeStats getStats() { - return serDeStats; - } } diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java index aace48e..3fd75d2 100644 --- ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/timestamp/NanoTimeUtils.java @@ -49,7 +49,7 @@ private static Calendar getLocalCalendar() { return parquetLocalCalendar.get(); } - private static Calendar getCalendar(boolean skipConversion) { + public static Calendar getCalendar(boolean skipConversion) { Calendar calendar = skipConversion ? getLocalCalendar() : getGMTCalendar(); calendar.clear(); // Reset all fields before reusing this instance return calendar; diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedColumnReader.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedColumnReader.java new file mode 100644 index 0000000..ba92db9 --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedColumnReader.java @@ -0,0 +1,575 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.vector; + +import org.apache.commons.lang.ArrayUtils; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.ColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DecimalColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.TimestampColumnVector; +import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTime; +import org.apache.hadoop.hive.ql.io.parquet.timestamp.NanoTimeUtils; +import org.apache.hadoop.hive.serde2.typeinfo.PrimitiveTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.parquet.bytes.BytesInput; +import org.apache.parquet.bytes.BytesUtils; +import org.apache.parquet.column.ColumnDescriptor; +import org.apache.parquet.column.Dictionary; +import org.apache.parquet.column.Encoding; +import org.apache.parquet.column.page.DataPage; +import org.apache.parquet.column.page.DataPageV1; +import org.apache.parquet.column.page.DataPageV2; +import org.apache.parquet.column.page.DictionaryPage; +import org.apache.parquet.column.page.PageReader; +import org.apache.parquet.column.values.ValuesReader; +import org.apache.parquet.column.values.rle.RunLengthBitPackingHybridDecoder; +import org.apache.parquet.io.ParquetDecodingException; +import org.apache.parquet.io.api.Binary; +import org.apache.parquet.schema.Type; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.ByteArrayInputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; +import java.sql.Timestamp; +import java.util.Arrays; + +import static org.apache.parquet.column.ValuesType.DEFINITION_LEVEL; +import static org.apache.parquet.column.ValuesType.REPETITION_LEVEL; +import static org.apache.parquet.column.ValuesType.VALUES; + +/** + * It's column level Parquet reader which is used to read a batch of records for a column, + * part of the code is referred from Apache Spark and Apache Parquet. + */ +public class VectorizedColumnReader { + + private static final Logger LOG = LoggerFactory.getLogger(VectorizedColumnReader.class); + + private boolean skipTimestampConversion = false; + + /** + * Total number of values read. + */ + private long valuesRead; + + /** + * value that indicates the end of the current page. That is, + * if valuesRead == endOfPageValueCount, we are at the end of the page. + */ + private long endOfPageValueCount; + + /** + * The dictionary, if this column has dictionary encoding. + */ + private final Dictionary dictionary; + + /** + * If true, the current page is dictionary encoded. + */ + private boolean isCurrentPageDictionaryEncoded; + + /** + * Maximum definition level for this column. + */ + private final int maxDefLevel; + + private int definitionLevel; + private int repetitionLevel; + + /** + * Repetition/Definition/Value readers. + */ + private IntIterator repetitionLevelColumn; + private IntIterator definitionLevelColumn; + private ValuesReader dataColumn; + + /** + * Total values in the current page. + */ + private int pageValueCount; + + private final PageReader pageReader; + private final ColumnDescriptor descriptor; + private final Type type; + + public VectorizedColumnReader( + ColumnDescriptor descriptor, + PageReader pageReader, + boolean skipTimestampConversion, + Type type) throws IOException { + this.descriptor = descriptor; + this.type = type; + this.pageReader = pageReader; + this.maxDefLevel = descriptor.getMaxDefinitionLevel(); + this.skipTimestampConversion = skipTimestampConversion; + + DictionaryPage dictionaryPage = pageReader.readDictionaryPage(); + if (dictionaryPage != null) { + try { + this.dictionary = dictionaryPage.getEncoding().initDictionary(descriptor, dictionaryPage); + this.isCurrentPageDictionaryEncoded = true; + } catch (IOException e) { + throw new IOException("could not decode the dictionary for " + descriptor, e); + } + } else { + this.dictionary = null; + this.isCurrentPageDictionaryEncoded = false; + } + } + + void readBatch( + int total, + ColumnVector column, + TypeInfo columnType) throws IOException { + + int rowId = 0; + while (total > 0) { + // Compute the number of values we want to read in this page. + int leftInPage = (int) (endOfPageValueCount - valuesRead); + if (leftInPage == 0) { + readPage(); + leftInPage = (int) (endOfPageValueCount - valuesRead); + } + + int num = Math.min(total, leftInPage); + if (isCurrentPageDictionaryEncoded) { + LongColumnVector dictionaryIds = new LongColumnVector(); + // Read and decode dictionary ids. + readDictionaryIDs(num, dictionaryIds, rowId); + decodeDictionaryIds(rowId, num, column, dictionaryIds); + } else { + // assign values in vector + PrimitiveTypeInfo primitiveColumnType = (PrimitiveTypeInfo) columnType; + switch (primitiveColumnType.getPrimitiveCategory()) { + case INT: + case BYTE: + case SHORT: + readIntegers(num, (LongColumnVector) column, rowId); + break; + case DATE: + case INTERVAL_YEAR_MONTH: + case LONG: + readLongs(num, (LongColumnVector) column, rowId); + break; + case BOOLEAN: + readBooleans(num, (LongColumnVector) column, rowId); + break; + case DOUBLE: + readDoubles(num, (DoubleColumnVector) column, rowId); + break; + case BINARY: + case STRING: + case CHAR: + case VARCHAR: + readBinaries(num, (BytesColumnVector) column, rowId); + break; + case FLOAT: + readFloats(num, (DoubleColumnVector) column, rowId); + break; + case DECIMAL: + readDecimal(num, (DecimalColumnVector) column, rowId); + break; + case INTERVAL_DAY_TIME: + case TIMESTAMP: + default: + throw new IOException( + "Unsupported type category: " + primitiveColumnType.getPrimitiveCategory()); + } + } + rowId += num; + total -= num; + } + } + + private void readDictionaryIDs( + int total, + LongColumnVector c, + int rowId) throws IOException { + int left = total; + while (left > 0) { + readRepetitionAndDefinitionLevels(); + if (definitionLevel >= maxDefLevel) { + c.vector[rowId] = dataColumn.readValueDictionaryId(); + c.isNull[rowId] = false; + c.isRepeating = c.isRepeating && (c.vector[0] == c.vector[rowId]); + } else { + c.isNull[rowId] = true; + c.isRepeating = false; + c.noNulls = false; + } + rowId++; + left--; + } + } + + private void readIntegers( + int total, + LongColumnVector c, + int rowId) throws IOException { + int left = total; + while (left > 0) { + readRepetitionAndDefinitionLevels(); + if (definitionLevel >= maxDefLevel) { + c.vector[rowId] = dataColumn.readInteger(); + c.isNull[rowId] = false; + c.isRepeating = c.isRepeating && (c.vector[0] == c.vector[rowId]); + } else { + c.isNull[rowId] = true; + c.isRepeating = false; + c.noNulls = false; + } + rowId++; + left--; + } + } + + private void readDoubles( + int total, + DoubleColumnVector c, + int rowId) throws IOException { + int left = total; + while (left > 0) { + readRepetitionAndDefinitionLevels(); + if (definitionLevel >= maxDefLevel) { + c.vector[rowId] = dataColumn.readDouble(); + c.isNull[rowId] = false; + c.isRepeating = c.isRepeating && (c.vector[0] == c.vector[rowId]); + } else { + c.isNull[rowId] = true; + c.isRepeating = false; + c.noNulls = false; + } + rowId++; + left--; + } + } + + private void readBooleans( + int total, + LongColumnVector c, + int rowId) throws IOException { + int left = total; + while (left > 0) { + readRepetitionAndDefinitionLevels(); + if (definitionLevel >= maxDefLevel) { + c.vector[rowId] = dataColumn.readBoolean() ? 1 : 0; + c.isNull[rowId] = false; + c.isRepeating = c.isRepeating && (c.vector[0] == c.vector[rowId]); + } else { + c.isNull[rowId] = true; + c.isRepeating = false; + c.noNulls = false; + } + rowId++; + left--; + } + } + + private void readLongs( + int total, + LongColumnVector c, + int rowId) throws IOException { + int left = total; + while (left > 0) { + readRepetitionAndDefinitionLevels(); + if (definitionLevel >= maxDefLevel) { + c.vector[rowId] = dataColumn.readLong(); + c.isNull[rowId] = false; + c.isRepeating = c.isRepeating && (c.vector[0] == c.vector[rowId]); + } else { + c.isNull[rowId] = true; + c.isRepeating = false; + c.noNulls = false; + } + rowId++; + left--; + } + } + + private void readFloats( + int total, + DoubleColumnVector c, + int rowId) throws IOException { + int left = total; + while (left > 0) { + readRepetitionAndDefinitionLevels(); + if (definitionLevel >= maxDefLevel) { + c.vector[rowId] = dataColumn.readFloat(); + c.isNull[rowId] = false; + c.isRepeating = c.isRepeating && (c.vector[0] == c.vector[rowId]); + } else { + c.isNull[rowId] = true; + c.isRepeating = false; + c.noNulls = false; + } + rowId++; + left--; + } + } + + private void readDecimal( + int total, + DecimalColumnVector c, + int rowId) throws IOException { + int left = total; + c.precision = (short) type.asPrimitiveType().getDecimalMetadata().getPrecision(); + c.scale = (short) type.asPrimitiveType().getDecimalMetadata().getScale(); + while (left > 0) { + readRepetitionAndDefinitionLevels(); + if (definitionLevel >= maxDefLevel) { + c.vector[rowId].set(dataColumn.readBytes().getBytesUnsafe(), c.scale); + c.isNull[rowId] = false; + c.isRepeating = c.isRepeating && (c.vector[0] == c.vector[rowId]); + } else { + c.isNull[rowId] = true; + c.isRepeating = false; + c.noNulls = false; + } + rowId++; + left--; + } + } + + private void readBinaries( + int total, + BytesColumnVector c, + int rowId) throws IOException { + int left = total; + byte[] bytes = null; + while (left > 0) { + readRepetitionAndDefinitionLevels(); + if (definitionLevel >= maxDefLevel) { + Binary binary = dataColumn.readBytes(); + c.setVal(rowId, binary.getBytesUnsafe()); + c.isNull[rowId] = false; + if (bytes == null) { + bytes = ArrayUtils.subarray(c.vector[0], c.start[0], c.length[0]); + } + c.isRepeating = c.isRepeating && Arrays.equals(binary.getBytesUnsafe(), bytes); + } else { + c.isNull[rowId] = true; + c.isRepeating = false; + c.noNulls = false; + } + rowId++; + left--; + } + } + + /** + * Reads `num` values into column, decoding the values from `dictionaryIds` and `dictionary`. + */ + private void decodeDictionaryIds(int rowId, int num, ColumnVector column, + LongColumnVector dictionaryIds) { + System.arraycopy(dictionaryIds.isNull, rowId, column.isNull, rowId, num); + if (column.noNulls) { + column.noNulls = dictionaryIds.noNulls; + } + column.isRepeating = column.isRepeating && dictionaryIds.isRepeating; + + switch (descriptor.getType()) { + case INT32: + for (int i = rowId; i < rowId + num; ++i) { + ((LongColumnVector) column).vector[i] = + dictionary.decodeToInt((int) dictionaryIds.vector[i]); + } + break; + case INT64: + for (int i = rowId; i < rowId + num; ++i) { + ((LongColumnVector) column).vector[i] = + dictionary.decodeToLong((int) dictionaryIds.vector[i]); + } + break; + case FLOAT: + for (int i = rowId; i < rowId + num; ++i) { + ((DoubleColumnVector) column).vector[i] = + dictionary.decodeToFloat((int) dictionaryIds.vector[i]); + } + break; + case DOUBLE: + for (int i = rowId; i < rowId + num; ++i) { + ((DoubleColumnVector) column).vector[i] = + dictionary.decodeToDouble((int) dictionaryIds.vector[i]); + } + break; + case INT96: + for (int i = rowId; i < rowId + num; ++i) { + ByteBuffer buf = dictionary.decodeToBinary((int) dictionaryIds.vector[i]).toByteBuffer(); + buf.order(ByteOrder.LITTLE_ENDIAN); + long timeOfDayNanos = buf.getLong(); + int julianDay = buf.getInt(); + NanoTime nt = new NanoTime(julianDay, timeOfDayNanos); + Timestamp ts = NanoTimeUtils.getTimestamp(nt, skipTimestampConversion); + ((TimestampColumnVector) column).set(i, ts); + } + break; + case BINARY: + case FIXED_LEN_BYTE_ARRAY: + for (int i = rowId; i < rowId + num; ++i) { + ((BytesColumnVector) column) + .setVal(i, dictionary.decodeToBinary((int) dictionaryIds.vector[i]).getBytesUnsafe()); + } + break; + default: + throw new UnsupportedOperationException("Unsupported type: " + descriptor.getType()); + } + } + + private void readRepetitionAndDefinitionLevels() { + repetitionLevel = repetitionLevelColumn.nextInt(); + definitionLevel = definitionLevelColumn.nextInt(); + valuesRead++; + } + + private void readPage() throws IOException { + DataPage page = pageReader.readPage(); + // TODO: Why is this a visitor? + page.accept(new DataPage.Visitor() { + @Override + public Void visit(DataPageV1 dataPageV1) { + readPageV1(dataPageV1); + return null; + } + + @Override + public Void visit(DataPageV2 dataPageV2) { + readPageV2(dataPageV2); + return null; + } + }); + } + + private void initDataReader(Encoding dataEncoding, byte[] bytes, int offset, int valueCount) throws IOException { + this.pageValueCount = valueCount; + this.endOfPageValueCount = valuesRead + pageValueCount; + if (dataEncoding.usesDictionary()) { + this.dataColumn = null; + if (dictionary == null) { + throw new IOException( + "could not read page in col " + descriptor + + " as the dictionary was missing for encoding " + dataEncoding); + } + dataColumn = dataEncoding.getDictionaryBasedValuesReader(descriptor, VALUES, dictionary); + this.isCurrentPageDictionaryEncoded = true; + } else { + if (dataEncoding != Encoding.PLAIN) { + throw new UnsupportedOperationException("Unsupported encoding: " + dataEncoding); + } + dataColumn = dataEncoding.getValuesReader(descriptor, VALUES); + this.isCurrentPageDictionaryEncoded = false; + } + + try { + dataColumn.initFromPage(pageValueCount, bytes, offset); + } catch (IOException e) { + throw new IOException("could not read page in col " + descriptor, e); + } + } + + private void readPageV1(DataPageV1 page) { + ValuesReader rlReader = page.getRlEncoding().getValuesReader(descriptor, REPETITION_LEVEL); + ValuesReader dlReader = page.getDlEncoding().getValuesReader(descriptor, DEFINITION_LEVEL); + this.repetitionLevelColumn = new ValuesReaderIntIterator(rlReader); + this.definitionLevelColumn = new ValuesReaderIntIterator(dlReader); + try { + byte[] bytes = page.getBytes().toByteArray(); + LOG.debug("page size " + bytes.length + " bytes and " + pageValueCount + " records"); + LOG.debug("reading repetition levels at 0"); + rlReader.initFromPage(pageValueCount, bytes, 0); + int next = rlReader.getNextOffset(); + LOG.debug("reading definition levels at " + next); + dlReader.initFromPage(pageValueCount, bytes, next); + next = dlReader.getNextOffset(); + LOG.debug("reading data at " + next); + initDataReader(page.getValueEncoding(), bytes, next, page.getValueCount()); + } catch (IOException e) { + throw new ParquetDecodingException("could not read page " + page + " in col " + descriptor, e); + } + } + + private void readPageV2(DataPageV2 page) { + this.pageValueCount = page.getValueCount(); + this.repetitionLevelColumn = newRLEIterator(descriptor.getMaxRepetitionLevel(), + page.getRepetitionLevels()); + this.definitionLevelColumn = newRLEIterator(descriptor.getMaxDefinitionLevel(), page.getDefinitionLevels()); + try { + LOG.debug("page data size " + page.getData().size() + " bytes and " + pageValueCount + " records"); + initDataReader(page.getDataEncoding(), page.getData().toByteArray(), 0, page.getValueCount()); + } catch (IOException e) { + throw new ParquetDecodingException("could not read page " + page + " in col " + descriptor, e); + } + } + + private IntIterator newRLEIterator(int maxLevel, BytesInput bytes) { + try { + if (maxLevel == 0) { + return new NullIntIterator(); + } + return new RLEIntIterator( + new RunLengthBitPackingHybridDecoder( + BytesUtils.getWidthFromMaxInt(maxLevel), + new ByteArrayInputStream(bytes.toByteArray()))); + } catch (IOException e) { + throw new ParquetDecodingException("could not read levels in page for col " + descriptor, e); + } + } + + /** + * Utility classes to abstract over different way to read ints with different encodings. + * TODO: remove this layer of abstraction? + */ + abstract static class IntIterator { + abstract int nextInt(); + } + + protected static final class ValuesReaderIntIterator extends IntIterator { + ValuesReader delegate; + + public ValuesReaderIntIterator(ValuesReader delegate) { + this.delegate = delegate; + } + + @Override + int nextInt() { + return delegate.readInteger(); + } + } + + protected static final class RLEIntIterator extends IntIterator { + RunLengthBitPackingHybridDecoder delegate; + + public RLEIntIterator(RunLengthBitPackingHybridDecoder delegate) { + this.delegate = delegate; + } + + @Override + int nextInt() { + try { + return delegate.readInt(); + } catch (IOException e) { + throw new ParquetDecodingException(e); + } + } + } + + protected static final class NullIntIterator extends IntIterator { + @Override + int nextInt() { return 0; } + } +} diff --git ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java new file mode 100644 index 0000000..f94c49a --- /dev/null +++ ql/src/java/org/apache/hadoop/hive/ql/io/parquet/vector/VectorizedParquetRecordReader.java @@ -0,0 +1,289 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.hadoop.hive.ql.io.parquet.vector; + +import com.google.common.annotations.VisibleForTesting; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; +import org.apache.hadoop.hive.ql.io.IOConstants; +import org.apache.hadoop.hive.ql.io.parquet.ParquetRecordReaderBase; +import org.apache.hadoop.hive.ql.io.parquet.ProjectionPusher; +import org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport; +import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; +import org.apache.hadoop.hive.serde2.SerDeStats; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapred.RecordReader; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.parquet.column.ColumnDescriptor; +import org.apache.parquet.column.page.PageReadStore; +import org.apache.parquet.filter2.compat.FilterCompat; +import org.apache.parquet.hadoop.ParquetFileReader; +import org.apache.parquet.hadoop.ParquetInputSplit; +import org.apache.parquet.hadoop.metadata.BlockMetaData; +import org.apache.parquet.hadoop.metadata.ParquetMetadata; +import org.apache.parquet.schema.MessageType; +import org.apache.parquet.schema.Type; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +import static org.apache.parquet.filter2.compat.RowGroupFilter.filterRowGroups; +import static org.apache.parquet.format.converter.ParquetMetadataConverter.NO_FILTER; +import static org.apache.parquet.format.converter.ParquetMetadataConverter.range; +import static org.apache.parquet.hadoop.ParquetFileReader.readFooter; +import static org.apache.parquet.hadoop.ParquetInputFormat.getFilter; + +/** + * This reader is used to read a batch of record from inputsplit, part of the code is referred + * from Apache Spark and Apache Parquet. + */ +public class VectorizedParquetRecordReader extends ParquetRecordReaderBase + implements RecordReader { + public static final Logger LOG = LoggerFactory.getLogger(VectorizedParquetRecordReader.class); + + private List colsToInclude; + + protected MessageType fileSchema; + protected MessageType requestedSchema; + private List columnNamesList; + private List columnTypesList; + private VectorizedRowBatchCtx rbCtx; + + /** + * For each request column, the reader to read this column. This is NULL if this column + * is missing from the file, in which case we populate the attribute with NULL. + */ + private VectorizedColumnReader[] columnReaders; + + /** + * The number of rows that have been returned. + */ + private long rowsReturned; + + /** + * The number of rows that have been reading, including the current in flight row group. + */ + private long totalCountLoadedSoFar = 0; + + /** + * The total number of rows this RecordReader will eventually read. The sum of the + * rows of all the row groups. + */ + protected long totalRowCount; + + @VisibleForTesting + public VectorizedParquetRecordReader( + InputSplit inputSplit, + JobConf conf) { + try { + serDeStats = new SerDeStats(); + projectionPusher = new ProjectionPusher(); + initialize(inputSplit, conf); + colsToInclude = ColumnProjectionUtils.getReadColumnIDs(conf); + rbCtx = Utilities.getVectorizedRowBatchCtx(conf); + } catch (Throwable e) { + LOG.error("Failed to create the vectorized reader due to exception " + e); + throw new RuntimeException(e); + } + } + + public VectorizedParquetRecordReader( + org.apache.hadoop.mapred.InputSplit oldInputSplit, + JobConf conf) { + try { + serDeStats = new SerDeStats(); + projectionPusher = new ProjectionPusher(); + initialize(getSplit(oldInputSplit, conf), conf); + colsToInclude = ColumnProjectionUtils.getReadColumnIDs(conf); + rbCtx = Utilities.getVectorizedRowBatchCtx(conf); + } catch (Throwable e) { + LOG.error("Failed to create the vectorized reader due to exception " + e); + throw new RuntimeException(e); + } + } + + public void initialize( + InputSplit oldSplit, + JobConf configuration) throws IOException, InterruptedException { + jobConf = configuration; + ParquetMetadata footer; + List blocks; + ParquetInputSplit split = (ParquetInputSplit) oldSplit; + boolean indexAccess = + configuration.getBoolean(DataWritableReadSupport.PARQUET_COLUMN_INDEX_ACCESS, false); + this.file = split.getPath(); + long[] rowGroupOffsets = split.getRowGroupOffsets(); + + String columnNames = configuration.get(IOConstants.COLUMNS); + columnNamesList = DataWritableReadSupport.getColumnNames(columnNames); + String columnTypes = configuration.get(IOConstants.COLUMNS_TYPES); + columnTypesList = DataWritableReadSupport.getColumnTypes(columnTypes); + + // if task.side.metadata is set, rowGroupOffsets is null + if (rowGroupOffsets == null) { + //TODO check whether rowGroupOffSets can be null + // then we need to apply the predicate push down filter + footer = readFooter(configuration, file, range(split.getStart(), split.getEnd())); + MessageType fileSchema = footer.getFileMetaData().getSchema(); + FilterCompat.Filter filter = getFilter(configuration); + blocks = filterRowGroups(filter, footer.getBlocks(), fileSchema); + } else { + // otherwise we find the row groups that were selected on the client + footer = readFooter(configuration, file, NO_FILTER); + Set offsets = new HashSet<>(); + for (long offset : rowGroupOffsets) { + offsets.add(offset); + } + blocks = new ArrayList<>(); + for (BlockMetaData block : footer.getBlocks()) { + if (offsets.contains(block.getStartingPos())) { + blocks.add(block); + } + } + // verify we found them all + if (blocks.size() != rowGroupOffsets.length) { + long[] foundRowGroupOffsets = new long[footer.getBlocks().size()]; + for (int i = 0; i < foundRowGroupOffsets.length; i++) { + foundRowGroupOffsets[i] = footer.getBlocks().get(i).getStartingPos(); + } + // this should never happen. + // provide a good error message in case there's a bug + throw new IllegalStateException( + "All the offsets listed in the split should be found in the file." + + " expected: " + Arrays.toString(rowGroupOffsets) + + " found: " + blocks + + " out of: " + Arrays.toString(foundRowGroupOffsets) + + " in range " + split.getStart() + ", " + split.getEnd()); + } + } + + for (BlockMetaData block : blocks) { + this.totalRowCount += block.getRowCount(); + } + this.fileSchema = footer.getFileMetaData().getSchema(); + + MessageType tableSchema; + if (indexAccess) { + List indexSequence = new ArrayList<>(); + + // Generates a sequence list of indexes + for(int i = 0; i < columnNamesList.size(); i++) { + indexSequence.add(i); + } + + tableSchema = DataWritableReadSupport.getSchemaByIndex(fileSchema, columnNamesList, + indexSequence); + } else { + tableSchema = DataWritableReadSupport.getSchemaByName(fileSchema, columnNamesList, + columnTypesList); + } + + List indexColumnsWanted = ColumnProjectionUtils.getReadColumnIDs(configuration); + if (!ColumnProjectionUtils.isReadAllColumns(configuration) && !indexColumnsWanted.isEmpty()) { + requestedSchema = + DataWritableReadSupport.getSchemaByIndex(tableSchema, columnNamesList, indexColumnsWanted); + } else { + requestedSchema = fileSchema; + } + + this.reader = new ParquetFileReader( + configuration, footer.getFileMetaData(), file, blocks, requestedSchema.getColumns()); + } + + @Override + public boolean next( + NullWritable nullWritable, + VectorizedRowBatch vectorizedRowBatch) throws IOException { + return nextBatch(vectorizedRowBatch); + } + + @Override + public NullWritable createKey() { + return NullWritable.get(); + } + + @Override + public VectorizedRowBatch createValue() { + return rbCtx.createVectorizedRowBatch(); + } + + @Override + public long getPos() throws IOException { + //TODO + return 0; + } + + @Override + public void close() throws IOException { + } + + @Override + public float getProgress() throws IOException { + //TODO + return 0; + } + + /** + * Advances to the next batch of rows. Returns false if there are no more. + */ + private boolean nextBatch(VectorizedRowBatch columnarBatch) throws IOException { + columnarBatch.reset(); + if (rowsReturned >= totalRowCount) { + return false; + } + checkEndOfRowGroup(); + + int num = (int) Math.min(VectorizedRowBatch.DEFAULT_SIZE, totalCountLoadedSoFar - rowsReturned); + for (int i = 0; i < columnReaders.length; ++i) { + if (columnReaders[i] == null) { + continue; + } + columnarBatch.cols[colsToInclude.get(i)].isRepeating = true; + columnReaders[i].readBatch(num, columnarBatch.cols[colsToInclude.get(i)], + columnTypesList.get(colsToInclude.get(i))); + } + rowsReturned += num; + columnarBatch.size = num; + return true; + } + + private void checkEndOfRowGroup() throws IOException { + if (rowsReturned != totalCountLoadedSoFar) { + return; + } + PageReadStore pages = reader.readNextRowGroup(); + if (pages == null) { + throw new IOException("expecting more rows but reached last block. Read " + + rowsReturned + " out of " + totalRowCount); + } + List columns = requestedSchema.getColumns(); + List types = requestedSchema.getFields(); + columnReaders = new VectorizedColumnReader[columns.size()]; + for (int i = 0; i < columns.size(); ++i) { + columnReaders[i] = + new VectorizedColumnReader(columns.get(i), pages.getPageReader(columns.get(i)), + skipTimestampConversion, types.get(i)); + } + totalCountLoadedSoFar += pages.getRowCount(); + } +} diff --git ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedColumnReader.java ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedColumnReader.java new file mode 100644 index 0000000..276ff19 --- /dev/null +++ ql/src/test/org/apache/hadoop/hive/ql/io/parquet/TestVectorizedColumnReader.java @@ -0,0 +1,429 @@ +/** + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hadoop.hive.ql.io.parquet; + +import org.apache.commons.lang.ArrayUtils; +import org.apache.hadoop.conf.Configuration; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.ql.exec.Utilities; +import org.apache.hadoop.hive.ql.exec.vector.BytesColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.DoubleColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.LongColumnVector; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatch; +import org.apache.hadoop.hive.ql.exec.vector.VectorizedRowBatchCtx; +import org.apache.hadoop.hive.ql.io.IOConstants; +import org.apache.hadoop.hive.ql.io.parquet.read.DataWritableReadSupport; +import org.apache.hadoop.hive.ql.io.parquet.serde.ArrayWritableObjectInspector; +import org.apache.hadoop.hive.ql.io.parquet.vector.VectorizedParquetRecordReader; +import org.apache.hadoop.hive.ql.metadata.HiveException; +import org.apache.hadoop.hive.ql.plan.MapWork; +import org.apache.hadoop.hive.serde2.ColumnProjectionUtils; +import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector; +import org.apache.hadoop.hive.serde2.typeinfo.StructTypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo; +import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoFactory; +import org.apache.hadoop.io.NullWritable; +import org.apache.hadoop.mapreduce.InputSplit; +import org.apache.hadoop.mapred.JobConf; +import org.apache.hadoop.mapreduce.Job; +import org.apache.parquet.example.data.Group; +import org.apache.parquet.example.data.simple.SimpleGroupFactory; +import org.apache.parquet.hadoop.ParquetInputFormat; +import org.apache.parquet.hadoop.ParquetWriter; +import org.apache.parquet.hadoop.example.GroupReadSupport; +import org.apache.parquet.hadoop.example.GroupWriteSupport; +import org.apache.parquet.io.api.Binary; +import org.apache.parquet.schema.MessageType; +import org.junit.AfterClass; +import org.junit.BeforeClass; +import org.junit.Test; + +import java.io.IOException; +import java.util.List; +import java.util.Random; + +import static junit.framework.Assert.assertTrue; +import static junit.framework.TestCase.assertFalse; +import static org.apache.parquet.column.ParquetProperties.WriterVersion.PARQUET_1_0; +import static org.apache.parquet.hadoop.api.ReadSupport.PARQUET_READ_SCHEMA; +import static org.apache.parquet.hadoop.metadata.CompressionCodecName.GZIP; +import static org.apache.parquet.schema.MessageTypeParser.parseMessageType; +import static org.junit.Assert.assertEquals; + +public class TestVectorizedColumnReader { + + private static final int nElements = 2500; + protected static final Configuration conf = new Configuration(); + protected static final Path file = + new Path("target/test/TestParquetVectorReader/testParquetFile"); + private static String[] uniqueStrs = new String[nElements]; + private static boolean[] isNulls = new boolean[nElements]; + private static Random random = new Random(); + protected static final MessageType schema = parseMessageType( + "message test { " + + "required int32 int32_field; " + + "required int64 int64_field; " + + "required int96 int96_field; " + + "required double double_field; " + + "required float float_field; " + + "required boolean boolean_field; " + + "required fixed_len_byte_array(3) flba_field; " + + "optional fixed_len_byte_array(1) some_null_field; " + + "optional fixed_len_byte_array(1) all_null_field; " + + "optional binary binary_field; " + + "optional binary binary_field_non_repeating; " + + "} "); + + @AfterClass + public static void cleanup() throws IOException { + FileSystem fs = file.getFileSystem(conf); + if (fs.exists(file)) { + fs.delete(file, true); + } + } + + @BeforeClass + public static void prepareFile() throws IOException { + cleanup(); + + boolean dictionaryEnabled = true; + boolean validating = false; + GroupWriteSupport.setSchema(schema, conf); + SimpleGroupFactory f = new SimpleGroupFactory(schema); + ParquetWriter writer = new ParquetWriter( + file, + new GroupWriteSupport(), + GZIP, 1024*1024, 1024, 1024*1024, + dictionaryEnabled, validating, PARQUET_1_0, conf); + writeData(f, writer); + } + + protected static void writeData(SimpleGroupFactory f, ParquetWriter writer) throws IOException { + initialStrings(uniqueStrs); + for (int i = 0; i < nElements; i++) { + Group group = f.newGroup() + .append("int32_field", i) + .append("int64_field", (long) 2 * i) + .append("int96_field", Binary.fromReusedByteArray("999999999999".getBytes())) + .append("double_field", i * 1.0) + .append("float_field", ((float) (i * 2.0))) + .append("boolean_field", i % 5 == 0) + .append("flba_field", "abc"); + + if (i % 2 == 1) { + group.append("some_null_field", "x"); + } + + if (i % 13 != 1) { + int binaryLen = i % 10; + group.append("binary_field", + Binary.fromString(new String(new char[binaryLen]).replace("\0", "x"))); + } + + if (uniqueStrs[i] != null) { + group.append("binary_field_non_repeating", Binary.fromString(uniqueStrs[i])); + } + writer.write(group); + } + writer.close(); + } + + private static String getRandomStr() { + int len = random.nextInt(10); + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < len; i++) { + sb.append((char) ('a' + random.nextInt(25))); + } + return sb.toString(); + } + + public static void initialStrings(String[] uniqueStrs) { + for (int i = 0; i < uniqueStrs.length; i++) { + String str = getRandomStr(); + if (!str.isEmpty()) { + uniqueStrs[i] = str; + isNulls[i] = false; + }else{ + isNulls[i] = true; + } + } + } + + private VectorizedParquetRecordReader createParquetReader(String schemaString, Configuration conf) + throws IOException, InterruptedException, HiveException { + conf.set(PARQUET_READ_SCHEMA, schemaString); + HiveConf.setBoolVar(conf, HiveConf.ConfVars.HIVE_VECTORIZATION_ENABLED, true); + HiveConf.setVar(conf, HiveConf.ConfVars.PLAN, "//tmp"); + + Job vectorJob = new Job(conf, "read vector"); + ParquetInputFormat.setInputPaths(vectorJob, file); + ParquetInputFormat parquetInputFormat = new ParquetInputFormat(GroupReadSupport.class); + InputSplit split = (InputSplit) parquetInputFormat.getSplits(vectorJob).get(0); + initialVectorizedRowBatchCtx(conf); + return new VectorizedParquetRecordReader(split, new JobConf(conf)); + } + + private void initialVectorizedRowBatchCtx(Configuration conf) throws HiveException { + MapWork mapWork = new MapWork(); + VectorizedRowBatchCtx rbCtx = new VectorizedRowBatchCtx(); + rbCtx.init(createStructObjectInspector(conf), new String[0]); + mapWork.setVectorMode(true); + mapWork.setVectorizedRowBatchCtx(rbCtx); + Utilities.setMapWork(conf, mapWork); + } + + private StructObjectInspector createStructObjectInspector(Configuration conf) { + // Create row related objects + String columnNames = conf.get(IOConstants.COLUMNS); + List columnNamesList = DataWritableReadSupport.getColumnNames(columnNames); + String columnTypes = conf.get(IOConstants.COLUMNS_TYPES); + List columnTypesList = DataWritableReadSupport.getColumnTypes(columnTypes); + TypeInfo rowTypeInfo = TypeInfoFactory.getStructTypeInfo(columnNamesList, columnTypesList); + return new ArrayWritableObjectInspector((StructTypeInfo) rowTypeInfo); + } + + @Test + public void testIntRead() throws Exception { + Configuration conf = new Configuration(); + conf.set(IOConstants.COLUMNS,"int32_field"); + conf.set(IOConstants.COLUMNS_TYPES,"int"); + conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); + conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0"); + VectorizedParquetRecordReader reader = + createParquetReader("message test { required int32 int32_field;}", conf); + VectorizedRowBatch previous = reader.createValue(); + try { + long c = 0; + while (reader.next(NullWritable.get(), previous)) { + LongColumnVector vector = (LongColumnVector) previous.cols[0]; + assertTrue(vector.noNulls); + for (int i = 0; i < vector.vector.length; i++) { + if(c == nElements){ + break; + } + assertEquals(c, vector.vector[i]); + assertFalse(vector.isNull[i]); + c++; + } + } + assertEquals(nElements, c); + } finally { + reader.close(); + } + } + + @Test + public void testLongRead() throws Exception { + Configuration conf = new Configuration(); + conf.set(IOConstants.COLUMNS,"int64_field"); + conf.set(IOConstants.COLUMNS_TYPES, "bigint"); + conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); + conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0"); + VectorizedParquetRecordReader reader = + createParquetReader("message test { required int64 int64_field;}", conf); + VectorizedRowBatch previous = reader.createValue(); + try { + long c = 0; + while (reader.next(NullWritable.get(), previous)) { + LongColumnVector vector = (LongColumnVector) previous.cols[0]; + assertTrue(vector.noNulls); + for (int i = 0; i < vector.vector.length; i++) { + if(c == nElements){ + break; + } + assertEquals(2 * c, vector.vector[i]); + assertFalse(vector.isNull[i]); + c++; + } + } + assertEquals(nElements, c); + } finally { + reader.close(); + } + } + + @Test + public void testDoubleRead() throws Exception { + Configuration conf = new Configuration(); + conf.set(IOConstants.COLUMNS,"double_field"); + conf.set(IOConstants.COLUMNS_TYPES, "double"); + conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); + conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0"); + VectorizedParquetRecordReader reader = + createParquetReader("message test { required double double_field;}", conf); + VectorizedRowBatch previous = reader.createValue(); + try { + long c = 0; + while (reader.next(NullWritable.get(), previous)) { + DoubleColumnVector vector = (DoubleColumnVector) previous.cols[0]; + assertTrue(vector.noNulls); + for (int i = 0; i < vector.vector.length; i++) { + if(c == nElements){ + break; + } + assertEquals(1.0 * c, vector.vector[i], 0); + assertFalse(vector.isNull[i]); + c++; + } + } + assertEquals(nElements, c); + } finally { + reader.close(); + } + } + + @Test + public void testFloatRead() throws Exception { + Configuration conf = new Configuration(); + conf.set(IOConstants.COLUMNS,"float_field"); + conf.set(IOConstants.COLUMNS_TYPES, "float"); + conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); + conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0"); + VectorizedParquetRecordReader reader = + createParquetReader("message test { required float float_field;}", conf); + VectorizedRowBatch previous = reader.createValue(); + try { + long c = 0; + while (reader.next(NullWritable.get(), previous)) { + DoubleColumnVector vector = (DoubleColumnVector) previous.cols[0]; + assertTrue(vector.noNulls); + for (int i = 0; i < vector.vector.length; i++) { + if(c == nElements){ + break; + } + assertEquals((float)2.0 * c, vector.vector[i], 0); + assertFalse(vector.isNull[i]); + c++; + } + } + assertEquals(nElements, c); + } finally { + reader.close(); + } + } + + @Test + public void testBooleanRead() throws Exception { + Configuration conf = new Configuration(); + conf.set(IOConstants.COLUMNS,"boolean_field"); + conf.set(IOConstants.COLUMNS_TYPES, "boolean"); + conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); + conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0"); + VectorizedParquetRecordReader reader = + createParquetReader("message test { required boolean boolean_field;}", conf); + VectorizedRowBatch previous = reader.createValue(); + try { + long c = 0; + while (reader.next(NullWritable.get(), previous)) { + LongColumnVector vector = (LongColumnVector) previous.cols[0]; + assertTrue(vector.noNulls); + for (int i = 0; i < vector.vector.length; i++) { + if(c == nElements){ + break; + } + int e = (c % 5 == 0) ? 1 : 0; + assertEquals(e, vector.vector[i]); + assertFalse(vector.isNull[i]); + c++; + } + } + assertEquals(nElements, c); + } finally { + reader.close(); + } + } + + @Test + public void testBinaryReadDictionaryEncoding() throws Exception { + Configuration conf = new Configuration(); + conf.set(IOConstants.COLUMNS,"binary_field"); + conf.set(IOConstants.COLUMNS_TYPES, "string"); + conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); + conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0"); + VectorizedParquetRecordReader reader = + createParquetReader("message test { required binary binary_field;}", conf); + VectorizedRowBatch previous = reader.createValue(); + int c = 0; + try { + while (reader.next(NullWritable.get(), previous)) { + BytesColumnVector vector = (BytesColumnVector) previous.cols[0]; + boolean noNull = true; + for (int i = 0; i < vector.vector.length; i++) { + if(c == nElements){ + break; + } + if (c % 13 == 1) { + assertTrue(vector.isNull[i]); + } else { + assertFalse(vector.isNull[i]); + int binaryLen = c % 10; + String expected = new String(new char[binaryLen]).replace("\0", "x"); + String actual = new String(ArrayUtils + .subarray(vector.vector[i], vector.start[i], vector.start[i] + vector.length[i])); + assertEquals("Failed at " + c, expected, actual); + noNull = false; + } + c++; + } + assertEquals("No Null check failed at " + c, noNull, vector.noNulls); + assertFalse(vector.isRepeating); + } + assertEquals(nElements, c); + } finally { + reader.close(); + } + } + + @Test + public void testBinaryRead() throws Exception { + Configuration conf = new Configuration(); + conf.set(IOConstants.COLUMNS,"binary_field_non_repeating"); + conf.set(IOConstants.COLUMNS_TYPES, "string"); + conf.setBoolean(ColumnProjectionUtils.READ_ALL_COLUMNS, false); + conf.set(ColumnProjectionUtils.READ_COLUMN_IDS_CONF_STR, "0"); + VectorizedParquetRecordReader reader = + createParquetReader("message test { required binary binary_field_non_repeating;}", conf); + VectorizedRowBatch previous = reader.createValue(); + int c = 0; + try { + while (reader.next(NullWritable.get(), previous)) { + BytesColumnVector vector = (BytesColumnVector) previous.cols[0]; + boolean noNull = true; + for (int i = 0; i < vector.vector.length; i++) { + if(c == nElements){ + break; + } + String actual; + assertEquals("Null assert failed at " + c, isNulls[c], vector.isNull[i]); + if (!vector.isNull[i]) { + actual = new String(ArrayUtils + .subarray(vector.vector[i], vector.start[i], vector.start[i] + vector.length[i])); + assertEquals("failed at " + c, uniqueStrs[c], actual); + }else{ + noNull = false; + } + c++; + } + assertEquals("No Null check failed at " + c, noNull, vector.noNulls); + assertFalse(vector.isRepeating); + } + assertEquals("It doesn't exit at expected position", nElements, c); + } finally { + reader.close(); + } + } +} diff --git ql/src/test/queries/clientpositive/parquet_types_non_dictionary_encoding_vectorization.q ql/src/test/queries/clientpositive/parquet_types_non_dictionary_encoding_vectorization.q new file mode 100644 index 0000000..7de444f --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_types_non_dictionary_encoding_vectorization.q @@ -0,0 +1,94 @@ +set hive.mapred.mode=nonstrict; +DROP TABLE parquet_types_staging; +DROP TABLE parquet_types; + +set hive.vectorized.execution.enabled=true; +set hive.vectorized.execution.reduce.enabled=true; +set hive.vectorized.use.row.serde.deserialize=true; +set hive.vectorized.use.vector.serde.deserialize=true; +set hive.vectorized.execution.reduce.groupby.enabled = true; + +CREATE TABLE parquet_types_staging ( + cint int, + ctinyint tinyint, + csmallint smallint, + cfloat float, + cdouble double, + cstring1 string, + t timestamp, + cchar char(5), + cvarchar varchar(10), + cbinary string, + m1 map, + l1 array, + st1 struct, + d date +) ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' +MAP KEYS TERMINATED BY ':'; + +CREATE TABLE parquet_types ( + cint int, + ctinyint tinyint, + csmallint smallint, + cfloat float, + cdouble double, + cstring1 string, + t timestamp, + cchar char(5), + cvarchar varchar(10), + cbinary binary, + m1 map, + l1 array, + st1 struct, + d date +) STORED AS PARQUET; + +LOAD DATA LOCAL INPATH '../../data/files/parquet_non_dictionary_types.txt' OVERWRITE INTO TABLE +parquet_types_staging; + +SELECT * FROM parquet_types_staging; + +INSERT OVERWRITE TABLE parquet_types +SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, +unhex(cbinary), m1, l1, st1, d FROM parquet_types_staging; + +-- test types in group by + +EXPLAIN SELECT ctinyint, + MAX(cint), + MIN(csmallint), + COUNT(cstring1), + ROUND(AVG(cfloat), 5), + ROUND(STDDEV_POP(cdouble),5) +FROM parquet_types +GROUP BY ctinyint +ORDER BY ctinyint +; + +SELECT ctinyint, + MAX(cint), + MIN(csmallint), + COUNT(cstring1), + ROUND(AVG(cfloat), 5), + ROUND(STDDEV_POP(cdouble),5) +FROM parquet_types +GROUP BY ctinyint +ORDER BY ctinyint +; + +EXPLAIN SELECT cfloat, count(*) FROM parquet_types GROUP BY cfloat ORDER BY cfloat; +SELECT cfloat, count(*) FROM parquet_types GROUP BY cfloat ORDER BY cfloat; + +EXPLAIN SELECT cchar, count(*) FROM parquet_types GROUP BY cchar ORDER BY cchar; +SELECT cchar, count(*) FROM parquet_types GROUP BY cchar ORDER BY cchar; + +EXPLAIN SELECT cvarchar, count(*) FROM parquet_types GROUP BY cvarchar ORDER BY cvarchar; +SELECT cvarchar, count(*) FROM parquet_types GROUP BY cvarchar ORDER BY cvarchar; + +EXPLAIN SELECT cstring1, count(*) FROM parquet_types GROUP BY cstring1 ORDER BY cstring1; +SELECT cstring1, count(*) FROM parquet_types GROUP BY cstring1 ORDER BY cstring1; + +EXPLAIN SELECT hex(cbinary), count(*) FROM parquet_types GROUP BY cbinary; +SELECT hex(cbinary), count(*) FROM parquet_types GROUP BY cbinary; \ No newline at end of file diff --git ql/src/test/queries/clientpositive/parquet_types_vectorization.q ql/src/test/queries/clientpositive/parquet_types_vectorization.q new file mode 100644 index 0000000..bb0e5b2 --- /dev/null +++ ql/src/test/queries/clientpositive/parquet_types_vectorization.q @@ -0,0 +1,96 @@ +set hive.mapred.mode=nonstrict; +DROP TABLE parquet_types_staging; +DROP TABLE parquet_types; + +set hive.vectorized.execution.enabled=true; +set hive.vectorized.execution.reduce.enabled=true; +set hive.vectorized.use.row.serde.deserialize=true; +set hive.vectorized.use.vector.serde.deserialize=true; +set hive.vectorized.execution.reduce.groupby.enabled = true; + +CREATE TABLE parquet_types_staging ( + cint int, + ctinyint tinyint, + csmallint smallint, + cfloat float, + cdouble double, + cstring1 string, + t timestamp, + cchar char(5), + cvarchar varchar(10), + cbinary string, + m1 map, + l1 array, + st1 struct, + d date +) ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' +MAP KEYS TERMINATED BY ':'; + +CREATE TABLE parquet_types ( + cint int, + ctinyint tinyint, + csmallint smallint, + cfloat float, + cdouble double, + cstring1 string, + t timestamp, + cchar char(5), + cvarchar varchar(10), + cbinary binary, + m1 map, + l1 array, + st1 struct, + d date +) STORED AS PARQUET; + +LOAD DATA LOCAL INPATH '../../data/files/parquet_types.txt' OVERWRITE INTO TABLE parquet_types_staging; + +SELECT * FROM parquet_types_staging; + +INSERT OVERWRITE TABLE parquet_types +SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, +unhex(cbinary), m1, l1, st1, d FROM parquet_types_staging; + +-- test types in group by + +EXPLAIN SELECT ctinyint, + MAX(cint), + MIN(csmallint), + COUNT(cstring1), + ROUND(AVG(cfloat), 5), + ROUND(STDDEV_POP(cdouble),5) +FROM parquet_types +GROUP BY ctinyint +ORDER BY ctinyint +; + +SELECT ctinyint, + MAX(cint), + MIN(csmallint), + COUNT(cstring1), + ROUND(AVG(cfloat), 5), + ROUND(STDDEV_POP(cdouble),5) +FROM parquet_types +GROUP BY ctinyint +ORDER BY ctinyint +; + +EXPLAIN SELECT cfloat, count(*) FROM parquet_types GROUP BY cfloat ORDER BY cfloat; +SELECT cfloat, count(*) FROM parquet_types GROUP BY cfloat ORDER BY cfloat; + +EXPLAIN SELECT cchar, count(*) FROM parquet_types GROUP BY cchar ORDER BY cchar; +SELECT cchar, count(*) FROM parquet_types GROUP BY cchar ORDER BY cchar; + +EXPLAIN SELECT cvarchar, count(*) FROM parquet_types GROUP BY cvarchar ORDER BY cvarchar; +SELECT cvarchar, count(*) FROM parquet_types GROUP BY cvarchar ORDER BY cvarchar; + +EXPLAIN SELECT cstring1, count(*) FROM parquet_types GROUP BY cstring1 ORDER BY cstring1; +SELECT cstring1, count(*) FROM parquet_types GROUP BY cstring1 ORDER BY cstring1; + +EXPLAIN SELECT t, count(*) FROM parquet_types GROUP BY t ORDER BY t; +SELECT t, count(*) FROM parquet_types GROUP BY t ORDER BY t; + +EXPLAIN SELECT hex(cbinary), count(*) FROM parquet_types GROUP BY cbinary; +SELECT hex(cbinary), count(*) FROM parquet_types GROUP BY cbinary; \ No newline at end of file diff --git ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out index 8345132..e42453d 100644 --- ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_parquet.q.out @@ -150,7 +150,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: tinyint) Statistics: Num rows: 12288 Data size: 73728 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct) - Execution mode: llap + Execution mode: vectorized, llap LLAP IO: no inputs Reducer 2 Execution mode: llap diff --git ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out index b49d5dd..0524cb3 100644 --- ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out +++ ql/src/test/results/clientpositive/llap/vectorized_parquet_types.q.out @@ -250,19 +250,19 @@ Stage-0 limit:-1 Stage-1 Reducer 3 vectorized, llap - File Output Operator [FS_10] - Select Operator [SEL_9] (rows=11 width=11) + File Output Operator [FS_12] + Select Operator [SEL_11] (rows=11 width=11) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"] <-Reducer 2 [SIMPLE_EDGE] llap SHUFFLE [RS_6] Group By Operator [GBY_4] (rows=11 width=11) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["max(VALUE._col0)","min(VALUE._col1)","count(VALUE._col2)","avg(VALUE._col3)","stddev_pop(VALUE._col4)","max(VALUE._col5)"],keys:KEY._col0 - <-Map 1 [SIMPLE_EDGE] llap + <-Map 1 [SIMPLE_EDGE] vectorized, llap SHUFFLE [RS_3] PartitionCols:_col0 - Group By Operator [GBY_2] (rows=22 width=11) + Group By Operator [GBY_10] (rows=22 width=11) Output:["_col0","_col1","_col2","_col3","_col4","_col5","_col6"],aggregations:["max(cint)","min(csmallint)","count(cstring1)","avg(cfloat)","stddev_pop(cdouble)","max(cdecimal)"],keys:ctinyint - Select Operator [SEL_1] (rows=22 width=11) + Select Operator [SEL_9] (rows=22 width=11) Output:["ctinyint","cint","csmallint","cstring1","cfloat","cdouble","cdecimal"] TableScan [TS_0] (rows=22 width=11) default@parquet_types,parquet_types,Tbl:COMPLETE,Col:NONE,Output:["cint","ctinyint","csmallint","cfloat","cdouble","cstring1","cdecimal"] diff --git ql/src/test/results/clientpositive/parquet_types_non_dictionary_encoding_vectorization.q.out ql/src/test/results/clientpositive/parquet_types_non_dictionary_encoding_vectorization.q.out new file mode 100644 index 0000000..ce32df1 --- /dev/null +++ ql/src/test/results/clientpositive/parquet_types_non_dictionary_encoding_vectorization.q.out @@ -0,0 +1,2452 @@ +PREHOOK: query: DROP TABLE parquet_types_staging +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE parquet_types_staging +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE parquet_types +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE parquet_types +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE parquet_types_staging ( + cint int, + ctinyint tinyint, + csmallint smallint, + cfloat float, + cdouble double, + cstring1 string, + t timestamp, + cchar char(5), + cvarchar varchar(10), + cbinary string, + m1 map, + l1 array, + st1 struct, + d date +) ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' +MAP KEYS TERMINATED BY ':' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_types_staging +POSTHOOK: query: CREATE TABLE parquet_types_staging ( + cint int, + ctinyint tinyint, + csmallint smallint, + cfloat float, + cdouble double, + cstring1 string, + t timestamp, + cchar char(5), + cvarchar varchar(10), + cbinary string, + m1 map, + l1 array, + st1 struct, + d date +) ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' +MAP KEYS TERMINATED BY ':' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_types_staging +PREHOOK: query: CREATE TABLE parquet_types ( + cint int, + ctinyint tinyint, + csmallint smallint, + cfloat float, + cdouble double, + cstring1 string, + t timestamp, + cchar char(5), + cvarchar varchar(10), + cbinary binary, + m1 map, + l1 array, + st1 struct, + d date +) STORED AS PARQUET +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_types +POSTHOOK: query: CREATE TABLE parquet_types ( + cint int, + ctinyint tinyint, + csmallint smallint, + cfloat float, + cdouble double, + cstring1 string, + t timestamp, + cchar char(5), + cvarchar varchar(10), + cbinary binary, + m1 map, + l1 array, + st1 struct, + d date +) STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_types +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_non_dictionary_types.txt' OVERWRITE INTO TABLE +parquet_types_staging +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_types_staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_non_dictionary_types.txt' OVERWRITE INTO TABLE +parquet_types_staging +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_types_staging +PREHOOK: query: SELECT * FROM parquet_types_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_types_staging +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_types_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_types_staging +#### A masked pattern was here #### +1000 -128 0 0.0 0.0 0.3 NULL 1940- {} [null] {"c1":1000,"c2":"1"} NULL +1001 -127 1 3000.0 0.3 1.3 NULL 1941- b b {"b":null} [null] {"c1":1001,"c2":"1"} NULL +1002 -126 2 6000.0 0.6 2.3 NULL 1942- c c {"c":null} [null] {"c1":1002,"c2":"1"} NULL +1003 -125 3 9000.0 0.9 3.3 NULL 1943- d d {"d":null} [null] {"c1":1003,"c2":"1"} NULL +1004 -124 4 12000.0 1.2 4.3 NULL 1944- e e {"e":null} [null] {"c1":1004,"c2":"1"} NULL +1005 -123 5 15000.0 1.5 5.3 NULL 1945- f f {"f":null} [null] {"c1":1005,"c2":"1"} NULL +1006 -122 6 18000.0 1.8 6.3 NULL 1946- g g {"g":null} [null] {"c1":1006,"c2":"1"} NULL +1007 -121 7 21000.0 2.1 7.3 NULL 1947- h h {"h":null} [null] {"c1":1007,"c2":"1"} NULL +1008 -120 8 24000.0 2.4 8.3 NULL 1948- i i {"i":null} [null] {"c1":1008,"c2":"1"} NULL +1009 -119 9 27000.0 2.7 9.3 NULL 1949- j j {"j":null} [null] {"c1":1009,"c2":"1"} NULL +1010 -118 10 30000.0 3.0 10.3 NULL 1950- k k {"k":null} [null] {"c1":1010,"c2":"1"} NULL +1011 -117 11 33000.0 3.3 11.3 NULL 1951- l l {"l":null} [null] {"c1":1011,"c2":"1"} NULL +1012 -116 12 36000.0 3.6 12.3 NULL 1952- m m {"m":null} [null] {"c1":1012,"c2":"1"} NULL +1013 -115 13 39000.0 3.9 13.3 NULL 1953- n n {"n":null} [null] {"c1":1013,"c2":"1"} NULL +1014 -114 14 42000.0 4.2 14.3 NULL 1954- o o {"o":null} [null] {"c1":1014,"c2":"1"} NULL +1015 -113 15 45000.0 4.5 15.3 NULL 1955- p p {"p":null} [null] {"c1":1015,"c2":"1"} NULL +1016 -112 16 48000.0 4.8 16.3 NULL 1956- q q {"q":null} [null] {"c1":1016,"c2":"1"} NULL +1017 -111 17 51000.0 5.1 17.3 NULL 1957- r r {"r":null} [null] {"c1":1017,"c2":"1"} NULL +1018 -110 18 54000.0 5.4 18.3 NULL 1958- s s {"s":null} [null] {"c1":1018,"c2":"1"} NULL +1019 -109 19 57000.0 5.7 19.3 NULL 1959- t t {"t":null} [null] {"c1":1019,"c2":"1"} NULL +1020 -108 20 60000.0 6.0 20.3 NULL 1960- u u {"u":null} [null] {"c1":1020,"c2":"1"} NULL +1021 -107 21 63000.0 6.3 21.3 NULL 1961- v v {"v":null} [null] {"c1":1021,"c2":"1"} NULL +1022 -106 22 66000.0 6.6 22.3 NULL 1962- w w {"w":null} [null] {"c1":1022,"c2":"1"} NULL +1023 -105 23 69000.0 6.9 23.3 NULL 1963- x x {"x":null} [null] {"c1":1023,"c2":"1"} NULL +1024 -104 24 72000.0 7.2 24.3 NULL 1964- y y {"y":null} [null] {"c1":1024,"c2":"1"} NULL +1025 -103 25 75000.0 7.5 25.3 NULL 1965- z z {"z":null} [null] {"c1":1025,"c2":"1"} NULL +1026 -102 26 78000.0 7.8 26.3 NULL 1966- ba ba {"ba":null} [null] {"c1":1026,"c2":"1"} NULL +1027 -101 27 81000.0 8.1 27.3 NULL 1967- bb bb {"bb":null} [null] {"c1":1027,"c2":"1"} NULL +1028 -100 28 84000.0 8.4 28.3 NULL 1968- bc bc {"bc":null} [null] {"c1":1028,"c2":"1"} NULL +1029 -99 29 87000.0 8.7 29.3 NULL 1969- bd bd {"bd":null} [null] {"c1":1029,"c2":"1"} NULL +1030 -98 30 90000.0 9.0 30.3 NULL 1970- be be {"be":null} [null] {"c1":1030,"c2":"1"} NULL +1031 -97 31 93000.0 9.3 31.3 NULL 1971- bf bf {"bf":null} [null] {"c1":1031,"c2":"1"} NULL +1032 -96 32 96000.0 9.6 32.3 NULL 1972- bg bg {"bg":null} [null] {"c1":1032,"c2":"1"} NULL +1033 -95 33 99000.0 9.9 33.3 NULL 1973- bh bh {"bh":null} [null] {"c1":1033,"c2":"1"} NULL +1034 -94 34 102000.0 10.2 34.3 NULL 1974- bi bi {"bi":null} [null] {"c1":1034,"c2":"1"} NULL +1035 -93 35 105000.0 10.5 35.3 NULL 1975- bj bj {"bj":null} [null] {"c1":1035,"c2":"1"} NULL +1036 -92 36 108000.0 10.8 36.3 NULL 1976- bk bk {"bk":null} [null] {"c1":1036,"c2":"1"} NULL +1037 -91 37 111000.0 11.1 37.3 NULL 1977- bl bl {"bl":null} [null] {"c1":1037,"c2":"1"} NULL +1038 -90 38 114000.0 11.4 38.3 NULL 1978- bm bm {"bm":null} [null] {"c1":1038,"c2":"1"} NULL +1039 -89 39 117000.0 11.7 39.3 NULL 1979- bn bn {"bn":null} [null] {"c1":1039,"c2":"1"} NULL +1040 -88 40 120000.0 12.0 40.3 NULL 1980- bo bo {"bo":null} [null] {"c1":1040,"c2":"1"} NULL +1041 -87 41 123000.0 12.3 41.3 NULL 1981- bp bp {"bp":null} [null] {"c1":1041,"c2":"1"} NULL +1042 -86 42 126000.0 12.6 42.3 NULL 1982- bq bq {"bq":null} [null] {"c1":1042,"c2":"1"} NULL +1043 -85 43 129000.0 12.9 43.3 NULL 1983- br br {"br":null} [null] {"c1":1043,"c2":"1"} NULL +1044 -84 44 132000.0 13.2 44.3 NULL 1984- bs bs {"bs":null} [null] {"c1":1044,"c2":"1"} NULL +1045 -83 45 135000.0 13.5 45.3 NULL 1985- bt bt {"bt":null} [null] {"c1":1045,"c2":"1"} NULL +1046 -82 46 138000.0 13.8 46.3 NULL 1986- bu bu {"bu":null} [null] {"c1":1046,"c2":"1"} NULL +1047 -81 47 141000.0 14.1 47.3 NULL 1987- bv bv {"bv":null} [null] {"c1":1047,"c2":"1"} NULL +1048 -80 48 144000.0 14.4 48.3 NULL 1988- bw bw {"bw":null} [null] {"c1":1048,"c2":"1"} NULL +1049 -79 49 147000.0 14.7 49.3 NULL 1989- bx bx {"bx":null} [null] {"c1":1049,"c2":"1"} NULL +1050 -78 50 150000.0 15.0 50.3 NULL 1990- by by {"by":null} [null] {"c1":1050,"c2":"1"} NULL +1051 -77 51 153000.0 15.3 51.3 NULL 1991- bz bz {"bz":null} [null] {"c1":1051,"c2":"1"} NULL +1052 -76 52 156000.0 15.6 52.3 NULL 1992- ca ca {"ca":null} [null] {"c1":1052,"c2":"1"} NULL +1053 -75 53 159000.0 15.9 53.3 NULL 1993- cb cb {"cb":null} [null] {"c1":1053,"c2":"1"} NULL +1054 -74 54 162000.0 16.2 54.3 NULL 1994- cc cc {"cc":null} [null] {"c1":1054,"c2":"1"} NULL +1055 -73 55 165000.0 16.5 55.3 NULL 1995- cd cd {"cd":null} [null] {"c1":1055,"c2":"1"} NULL +1056 -72 56 168000.0 16.8 56.3 NULL 1996- ce ce {"ce":null} [null] {"c1":1056,"c2":"1"} NULL +1057 -71 57 171000.0 17.1 57.3 NULL 1997- cf cf {"cf":null} [null] {"c1":1057,"c2":"1"} NULL +1058 -70 58 174000.0 17.4 58.3 NULL 1998- cg cg {"cg":null} [null] {"c1":1058,"c2":"1"} NULL +1059 -69 59 177000.0 17.7 59.3 NULL 1999- ch ch {"ch":null} [null] {"c1":1059,"c2":"1"} NULL +1060 -68 60 180000.0 18.0 60.3 NULL 2000- ci ci {"ci":null} [null] {"c1":1060,"c2":"1"} NULL +1061 -67 61 183000.0 18.3 61.3 NULL 2001- cj cj {"cj":null} [null] {"c1":1061,"c2":"1"} NULL +1062 -66 62 186000.0 18.6 62.3 NULL 2002- ck ck {"ck":null} [null] {"c1":1062,"c2":"1"} NULL +1063 -65 63 189000.0 18.9 63.3 NULL 2003- cl cl {"cl":null} [null] {"c1":1063,"c2":"1"} NULL +1064 -64 64 192000.0 19.2 64.3 NULL 2004- cm cm {"cm":null} [null] {"c1":1064,"c2":"1"} NULL +1065 -63 65 195000.0 19.5 65.3 NULL 2005- cn cn {"cn":null} [null] {"c1":1065,"c2":"1"} NULL +1066 -62 66 198000.0 19.8 66.3 NULL 2006- co co {"co":null} [null] {"c1":1066,"c2":"1"} NULL +1067 -61 67 201000.0 20.1 67.3 NULL 2007- cp cp {"cp":null} [null] {"c1":1067,"c2":"1"} NULL +1068 -60 68 204000.0 20.4 68.3 NULL 2008- cq cq {"cq":null} [null] {"c1":1068,"c2":"1"} NULL +1069 -59 69 207000.0 20.7 69.3 NULL 2009- cr cr {"cr":null} [null] {"c1":1069,"c2":"1"} NULL +1070 -58 70 210000.0 21.0 70.3 NULL 2010- cs cs {"cs":null} [null] {"c1":1070,"c2":"1"} NULL +1071 -57 71 213000.0 21.3 71.3 NULL 2011- ct ct {"ct":null} [null] {"c1":1071,"c2":"1"} NULL +1072 -56 72 216000.0 21.6 72.3 NULL 2012- cu cu {"cu":null} [null] {"c1":1072,"c2":"1"} NULL +1073 -55 73 219000.0 21.9 73.3 NULL 2013- cv cv {"cv":null} [null] {"c1":1073,"c2":"1"} NULL +1074 -54 74 222000.0 22.2 74.3 NULL 2014- cw cw {"cw":null} [null] {"c1":1074,"c2":"1"} NULL +1075 -53 75 225000.0 22.5 75.3 NULL 2015- cx cx {"cx":null} [null] {"c1":1075,"c2":"1"} NULL +1076 -52 76 228000.0 22.8 76.3 NULL 2016- cy cy {"cy":null} [null] {"c1":1076,"c2":"1"} NULL +1077 -51 77 231000.0 23.1 77.3 NULL 2017- cz cz {"cz":null} [null] {"c1":1077,"c2":"1"} NULL +1078 -50 78 234000.0 23.4 78.3 NULL 2018- da da {"da":null} [null] {"c1":1078,"c2":"1"} NULL +1079 -49 79 237000.0 23.7 79.3 NULL 2019- db db {"db":null} [null] {"c1":1079,"c2":"1"} NULL +1080 -48 80 240000.0 24.0 80.3 NULL 2020- dc dc {"dc":null} [null] {"c1":1080,"c2":"1"} NULL +1081 -47 81 243000.0 24.3 81.3 NULL 2021- dd dd {"dd":null} [null] {"c1":1081,"c2":"1"} NULL +1082 -46 82 246000.0 24.6 82.3 NULL 2022- de de {"de":null} [null] {"c1":1082,"c2":"1"} NULL +1083 -45 83 249000.0 24.9 83.3 NULL 2023- df df {"df":null} [null] {"c1":1083,"c2":"1"} NULL +1084 -44 84 252000.0 25.2 84.3 NULL 2024- dg dg {"dg":null} [null] {"c1":1084,"c2":"1"} NULL +1085 -43 85 255000.0 25.5 85.3 NULL 2025- dh dh {"dh":null} [null] {"c1":1085,"c2":"1"} NULL +1086 -42 86 258000.0 25.8 86.3 NULL 2026- di di {"di":null} [null] {"c1":1086,"c2":"1"} NULL +1087 -41 87 261000.0 26.1 87.3 NULL 2027- dj dj {"dj":null} [null] {"c1":1087,"c2":"1"} NULL +1088 -40 88 264000.0 26.4 88.3 NULL 2028- dk dk {"dk":null} [null] {"c1":1088,"c2":"1"} NULL +1089 -39 89 267000.0 26.7 89.3 NULL 2029- dl dl {"dl":null} [null] {"c1":1089,"c2":"1"} NULL +1090 -38 90 270000.0 27.0 90.3 NULL 2030- dm dm {"dm":null} [null] {"c1":1090,"c2":"1"} NULL +1091 -37 91 273000.0 27.3 91.3 NULL 2031- dn dn {"dn":null} [null] {"c1":1091,"c2":"1"} NULL +1092 -36 92 276000.0 27.6 92.3 NULL 2032- do do {"do":null} [null] {"c1":1092,"c2":"1"} NULL +1093 -35 93 279000.0 27.9 93.3 NULL 2033- dp dp {"dp":null} [null] {"c1":1093,"c2":"1"} NULL +1094 -34 94 282000.0 28.2 94.3 NULL 2034- dq dq {"dq":null} [null] {"c1":1094,"c2":"1"} NULL +1095 -33 95 285000.0 28.5 95.3 NULL 2035- dr dr {"dr":null} [null] {"c1":1095,"c2":"1"} NULL +1096 -32 96 288000.0 28.8 96.3 NULL 2036- ds ds {"ds":null} [null] {"c1":1096,"c2":"1"} NULL +1097 -31 97 291000.0 29.1 97.3 NULL 2037- dt dt {"dt":null} [null] {"c1":1097,"c2":"1"} NULL +1098 -30 98 294000.0 29.4 98.3 NULL 2038- du du {"du":null} [null] {"c1":1098,"c2":"1"} NULL +1099 -29 99 297000.0 29.7 99.3 NULL 2039- dv dv {"dv":null} [null] {"c1":1099,"c2":"1"} NULL +1100 -28 100 300000.0 30.0 100.3 NULL 2040- dw dw {"dw":null} [null] {"c1":1100,"c2":"1"} NULL +1101 -27 101 303000.0 30.3 101.3 NULL 2041- dx dx {"dx":null} [null] {"c1":1101,"c2":"1"} NULL +1102 -26 102 306000.0 30.6 102.3 NULL 2042- dy dy {"dy":null} [null] {"c1":1102,"c2":"1"} NULL +1103 -25 103 309000.0 30.9 103.3 NULL 2043- dz dz {"dz":null} [null] {"c1":1103,"c2":"1"} NULL +1104 -24 104 312000.0 31.2 104.3 NULL 2044- ea ea {"ea":null} [null] {"c1":1104,"c2":"1"} NULL +1105 -23 105 315000.0 31.5 105.3 NULL 2045- eb eb {"eb":null} [null] {"c1":1105,"c2":"1"} NULL +1106 -22 106 318000.0 31.8 106.3 NULL 2046- ec ec {"ec":null} [null] {"c1":1106,"c2":"1"} NULL +1107 -21 107 321000.0 32.1 107.3 NULL 2047- ed ed {"ed":null} [null] {"c1":1107,"c2":"1"} NULL +1108 -20 108 324000.0 32.4 108.3 NULL 2048- ee ee {"ee":null} [null] {"c1":1108,"c2":"1"} NULL +1109 -19 109 327000.0 32.7 109.3 NULL 2049- ef ef {"ef":null} [null] {"c1":1109,"c2":"1"} NULL +1110 -18 110 330000.0 33.0 110.3 NULL 2050- eg eg {"eg":null} [null] {"c1":1110,"c2":"1"} NULL +1111 -17 111 333000.0 33.3 111.3 NULL 2051- eh eh {"eh":null} [null] {"c1":1111,"c2":"1"} NULL +1112 -16 112 336000.0 33.6 112.3 NULL 2052- ei ei {"ei":null} [null] {"c1":1112,"c2":"1"} NULL +1113 -15 113 339000.0 33.9 113.3 NULL 2053- ej ej {"ej":null} [null] {"c1":1113,"c2":"1"} NULL +1114 -14 114 342000.0 34.2 114.3 NULL 2054- ek ek {"ek":null} [null] {"c1":1114,"c2":"1"} NULL +1115 -13 115 345000.0 34.5 115.3 NULL 2055- el el {"el":null} [null] {"c1":1115,"c2":"1"} NULL +1116 -12 116 348000.0 34.8 116.3 NULL 2056- em em {"em":null} [null] {"c1":1116,"c2":"1"} NULL +1117 -11 117 351000.0 35.1 117.3 NULL 2057- en en {"en":null} [null] {"c1":1117,"c2":"1"} NULL +1118 -10 118 354000.0 35.4 118.3 NULL 2058- eo eo {"eo":null} [null] {"c1":1118,"c2":"1"} NULL +1119 -9 119 357000.0 35.7 119.3 NULL 2059- ep ep {"ep":null} [null] {"c1":1119,"c2":"1"} NULL +1120 -8 120 360000.0 36.0 120.3 NULL 2060- eq eq {"eq":null} [null] {"c1":1120,"c2":"1"} NULL +1121 -7 121 363000.0 36.3 121.3 NULL 2061- er er {"er":null} [null] {"c1":1121,"c2":"1"} NULL +1122 -6 122 366000.0 36.6 122.3 NULL 2062- es es {"es":null} [null] {"c1":1122,"c2":"1"} NULL +1123 -5 123 369000.0 36.9 123.3 NULL 2063- et et {"et":null} [null] {"c1":1123,"c2":"1"} NULL +1124 -4 124 372000.0 37.2 124.3 NULL 2064- eu eu {"eu":null} [null] {"c1":1124,"c2":"1"} NULL +1125 -3 125 375000.0 37.5 125.3 NULL 2065- ev ev {"ev":null} [null] {"c1":1125,"c2":"1"} NULL +1126 -2 126 378000.0 37.8 126.3 NULL 2066- ew ew {"ew":null} [null] {"c1":1126,"c2":"1"} NULL +1127 -1 127 381000.0 38.1 127.3 NULL 2067- ex ex {"ex":null} [null] {"c1":1127,"c2":"1"} NULL +1128 0 128 384000.0 38.4 128.3 NULL 2068- ey ey {"ey":null} [null] {"c1":1128,"c2":"1"} NULL +1129 1 129 387000.0 38.7 129.3 NULL 2069- ez ez {"ez":null} [null] {"c1":1129,"c2":"1"} NULL +1130 2 130 390000.0 39.0 130.3 NULL 2070- fa fa {"fa":null} [null] {"c1":1130,"c2":"1"} NULL +1131 3 131 393000.0 39.3 131.3 NULL 2071- fb fb {"fb":null} [null] {"c1":1131,"c2":"1"} NULL +1132 4 132 396000.0 39.6 132.3 NULL 2072- fc fc {"fc":null} [null] {"c1":1132,"c2":"1"} NULL +1133 5 133 399000.0 39.9 133.3 NULL 2073- fd fd {"fd":null} [null] {"c1":1133,"c2":"1"} NULL +1134 6 134 402000.0 40.2 134.3 NULL 2074- fe fe {"fe":null} [null] {"c1":1134,"c2":"1"} NULL +1135 7 135 405000.0 40.5 135.3 NULL 2075- ff ff {"ff":null} [null] {"c1":1135,"c2":"1"} NULL +1136 8 136 408000.0 40.8 136.3 NULL 2076- fg fg {"fg":null} [null] {"c1":1136,"c2":"1"} NULL +1137 9 137 411000.0 41.1 137.3 NULL 2077- fh fh {"fh":null} [null] {"c1":1137,"c2":"1"} NULL +1138 10 138 414000.0 41.4 138.3 NULL 2078- fi fi {"fi":null} [null] {"c1":1138,"c2":"1"} NULL +1139 11 139 417000.0 41.7 139.3 NULL 2079- fj fj {"fj":null} [null] {"c1":1139,"c2":"1"} NULL +1140 12 140 420000.0 42.0 140.3 NULL 2080- fk fk {"fk":null} [null] {"c1":1140,"c2":"1"} NULL +1141 13 141 423000.0 42.3 141.3 NULL 2081- fl fl {"fl":null} [null] {"c1":1141,"c2":"1"} NULL +1142 14 142 426000.0 42.6 142.3 NULL 2082- fm fm {"fm":null} [null] {"c1":1142,"c2":"1"} NULL +1143 15 143 429000.0 42.9 143.3 NULL 2083- fn fn {"fn":null} [null] {"c1":1143,"c2":"1"} NULL +1144 16 144 432000.0 43.2 144.3 NULL 2084- fo fo {"fo":null} [null] {"c1":1144,"c2":"1"} NULL +1145 17 145 435000.0 43.5 145.3 NULL 2085- fp fp {"fp":null} [null] {"c1":1145,"c2":"1"} NULL +1146 18 146 438000.0 43.8 146.3 NULL 2086- fq fq {"fq":null} [null] {"c1":1146,"c2":"1"} NULL +1147 19 147 441000.0 44.1 147.3 NULL 2087- fr fr {"fr":null} [null] {"c1":1147,"c2":"1"} NULL +1148 20 148 444000.0 44.4 148.3 NULL 2088- fs fs {"fs":null} [null] {"c1":1148,"c2":"1"} NULL +1149 21 149 447000.0 44.7 149.3 NULL 2089- ft ft {"ft":null} [null] {"c1":1149,"c2":"1"} NULL +1150 22 150 450000.0 45.0 150.3 NULL 2090- fu fu {"fu":null} [null] {"c1":1150,"c2":"1"} NULL +1151 23 151 453000.0 45.3 151.3 NULL 2091- fv fv {"fv":null} [null] {"c1":1151,"c2":"1"} NULL +1152 24 152 456000.0 45.6 152.3 NULL 2092- fw fw {"fw":null} [null] {"c1":1152,"c2":"1"} NULL +1153 25 153 459000.0 45.9 153.3 NULL 2093- fx fx {"fx":null} [null] {"c1":1153,"c2":"1"} NULL +1154 26 154 462000.0 46.2 154.3 NULL 2094- fy fy {"fy":null} [null] {"c1":1154,"c2":"1"} NULL +1155 27 155 465000.0 46.5 155.3 NULL 2095- fz fz {"fz":null} [null] {"c1":1155,"c2":"1"} NULL +1156 28 156 468000.0 46.8 156.3 NULL 2096- ga ga {"ga":null} [null] {"c1":1156,"c2":"1"} NULL +1157 29 157 471000.0 47.1 157.3 NULL 2097- gb gb {"gb":null} [null] {"c1":1157,"c2":"1"} NULL +1158 30 158 474000.0 47.4 158.3 NULL 2098- gc gc {"gc":null} [null] {"c1":1158,"c2":"1"} NULL +1159 31 159 477000.0 47.7 159.3 NULL 2099- gd gd {"gd":null} [null] {"c1":1159,"c2":"1"} NULL +1160 32 160 480000.0 48.0 160.3 NULL 2100- ge ge {"ge":null} [null] {"c1":1160,"c2":"1"} NULL +1161 33 161 483000.0 48.3 161.3 NULL 2101- gf gf {"gf":null} [null] {"c1":1161,"c2":"1"} NULL +1162 34 162 486000.0 48.6 162.3 NULL 2102- gg gg {"gg":null} [null] {"c1":1162,"c2":"1"} NULL +1163 35 163 489000.0 48.9 163.3 NULL 2103- gh gh {"gh":null} [null] {"c1":1163,"c2":"1"} NULL +1164 36 164 492000.0 49.2 164.3 NULL 2104- gi gi {"gi":null} [null] {"c1":1164,"c2":"1"} NULL +1165 37 165 495000.0 49.5 165.3 NULL 2105- gj gj {"gj":null} [null] {"c1":1165,"c2":"1"} NULL +1166 38 166 498000.0 49.8 166.3 NULL 2106- gk gk {"gk":null} [null] {"c1":1166,"c2":"1"} NULL +1167 39 167 501000.0 50.1 167.3 NULL 2107- gl gl {"gl":null} [null] {"c1":1167,"c2":"1"} NULL +1168 40 168 504000.0 50.4 168.3 NULL 2108- gm gm {"gm":null} [null] {"c1":1168,"c2":"1"} NULL +1169 41 169 507000.0 50.7 169.3 NULL 2109- gn gn {"gn":null} [null] {"c1":1169,"c2":"1"} NULL +1170 42 170 510000.0 51.0 170.3 NULL 2110- go go {"go":null} [null] {"c1":1170,"c2":"1"} NULL +1171 43 171 513000.0 51.3 171.3 NULL 2111- gp gp {"gp":null} [null] {"c1":1171,"c2":"1"} NULL +1172 44 172 516000.0 51.6 172.3 NULL 2112- gq gq {"gq":null} [null] {"c1":1172,"c2":"1"} NULL +1173 45 173 519000.0 51.9 173.3 NULL 2113- gr gr {"gr":null} [null] {"c1":1173,"c2":"1"} NULL +1174 46 174 522000.0 52.2 174.3 NULL 2114- gs gs {"gs":null} [null] {"c1":1174,"c2":"1"} NULL +1175 47 175 525000.0 52.5 175.3 NULL 2115- gt gt {"gt":null} [null] {"c1":1175,"c2":"1"} NULL +1176 48 176 528000.0 52.8 176.3 NULL 2116- gu gu {"gu":null} [null] {"c1":1176,"c2":"1"} NULL +1177 49 177 531000.0 53.1 177.3 NULL 2117- gv gv {"gv":null} [null] {"c1":1177,"c2":"1"} NULL +1178 50 178 534000.0 53.4 178.3 NULL 2118- gw gw {"gw":null} [null] {"c1":1178,"c2":"1"} NULL +1179 51 179 537000.0 53.7 179.3 NULL 2119- gx gx {"gx":null} [null] {"c1":1179,"c2":"1"} NULL +1180 52 180 540000.0 54.0 180.3 NULL 2120- gy gy {"gy":null} [null] {"c1":1180,"c2":"1"} NULL +1181 53 181 543000.0 54.3 181.3 NULL 2121- gz gz {"gz":null} [null] {"c1":1181,"c2":"1"} NULL +1182 54 182 546000.0 54.6 182.3 NULL 2122- ha ha {"ha":null} [null] {"c1":1182,"c2":"1"} NULL +1183 55 183 549000.0 54.9 183.3 NULL 2123- hb hb {"hb":null} [null] {"c1":1183,"c2":"1"} NULL +1184 56 184 552000.0 55.2 184.3 NULL 2124- hc hc {"hc":null} [null] {"c1":1184,"c2":"1"} NULL +1185 57 185 555000.0 55.5 185.3 NULL 2125- hd hd {"hd":null} [null] {"c1":1185,"c2":"1"} NULL +1186 58 186 558000.0 55.8 186.3 NULL 2126- he he {"he":null} [null] {"c1":1186,"c2":"1"} NULL +1187 59 187 561000.0 56.1 187.3 NULL 2127- hf hf {"hf":null} [null] {"c1":1187,"c2":"1"} NULL +1188 60 188 564000.0 56.4 188.3 NULL 2128- hg hg {"hg":null} [null] {"c1":1188,"c2":"1"} NULL +1189 61 189 567000.0 56.7 189.3 NULL 2129- hh hh {"hh":null} [null] {"c1":1189,"c2":"1"} NULL +1190 62 190 570000.0 57.0 190.3 NULL 2130- hi hi {"hi":null} [null] {"c1":1190,"c2":"1"} NULL +1191 63 191 573000.0 57.3 191.3 NULL 2131- hj hj {"hj":null} [null] {"c1":1191,"c2":"1"} NULL +1192 64 192 576000.0 57.6 192.3 NULL 2132- hk hk {"hk":null} [null] {"c1":1192,"c2":"1"} NULL +1193 65 193 579000.0 57.9 193.3 NULL 2133- hl hl {"hl":null} [null] {"c1":1193,"c2":"1"} NULL +1194 66 194 582000.0 58.2 194.3 NULL 2134- hm hm {"hm":null} [null] {"c1":1194,"c2":"1"} NULL +1195 67 195 585000.0 58.5 195.3 NULL 2135- hn hn {"hn":null} [null] {"c1":1195,"c2":"1"} NULL +1196 68 196 588000.0 58.8 196.3 NULL 2136- ho ho {"ho":null} [null] {"c1":1196,"c2":"1"} NULL +1197 69 197 591000.0 59.1 197.3 NULL 2137- hp hp {"hp":null} [null] {"c1":1197,"c2":"1"} NULL +1198 70 198 594000.0 59.4 198.3 NULL 2138- hq hq {"hq":null} [null] {"c1":1198,"c2":"1"} NULL +1199 71 199 597000.0 59.7 199.3 NULL 2139- hr hr {"hr":null} [null] {"c1":1199,"c2":"1"} NULL +1200 72 200 600000.0 60.0 200.3 NULL 2140- hs hs {"hs":null} [null] {"c1":1200,"c2":"1"} NULL +1201 73 201 603000.0 60.3 201.3 NULL 2141- ht ht {"ht":null} [null] {"c1":1201,"c2":"1"} NULL +1202 74 202 606000.0 60.6 202.3 NULL 2142- hu hu {"hu":null} [null] {"c1":1202,"c2":"1"} NULL +1203 75 203 609000.0 60.9 203.3 NULL 2143- hv hv {"hv":null} [null] {"c1":1203,"c2":"1"} NULL +1204 76 204 612000.0 61.2 204.3 NULL 2144- hw hw {"hw":null} [null] {"c1":1204,"c2":"1"} NULL +1205 77 205 615000.0 61.5 205.3 NULL 2145- hx hx {"hx":null} [null] {"c1":1205,"c2":"1"} NULL +1206 78 206 618000.0 61.8 206.3 NULL 2146- hy hy {"hy":null} [null] {"c1":1206,"c2":"1"} NULL +1207 79 207 621000.0 62.1 207.3 NULL 2147- hz hz {"hz":null} [null] {"c1":1207,"c2":"1"} NULL +1208 80 208 624000.0 62.4 208.3 NULL 2148- ia ia {"ia":null} [null] {"c1":1208,"c2":"1"} NULL +1209 81 209 627000.0 62.7 209.3 NULL 2149- ib ib {"ib":null} [null] {"c1":1209,"c2":"1"} NULL +1210 82 210 630000.0 63.0 210.3 NULL 2150- ic ic {"ic":null} [null] {"c1":1210,"c2":"1"} NULL +1211 83 211 633000.0 63.3 211.3 NULL 2151- id id {"id":null} [null] {"c1":1211,"c2":"1"} NULL +1212 84 212 636000.0 63.6 212.3 NULL 2152- ie ie {"ie":null} [null] {"c1":1212,"c2":"1"} NULL +1213 85 213 639000.0 63.9 213.3 NULL 2153- if if {"if":null} [null] {"c1":1213,"c2":"1"} NULL +1214 86 214 642000.0 64.2 214.3 NULL 2154- ig ig {"ig":null} [null] {"c1":1214,"c2":"1"} NULL +1215 87 215 645000.0 64.5 215.3 NULL 2155- ih ih {"ih":null} [null] {"c1":1215,"c2":"1"} NULL +1216 88 216 648000.0 64.8 216.3 NULL 2156- ii ii {"ii":null} [null] {"c1":1216,"c2":"1"} NULL +1217 89 217 651000.0 65.1 217.3 NULL 2157- ij ij {"ij":null} [null] {"c1":1217,"c2":"1"} NULL +1218 90 218 654000.0 65.4 218.3 NULL 2158- ik ik {"ik":null} [null] {"c1":1218,"c2":"1"} NULL +1219 91 219 657000.0 65.7 219.3 NULL 2159- il il {"il":null} [null] {"c1":1219,"c2":"1"} NULL +1220 92 220 660000.0 66.0 220.3 NULL 2160- im im {"im":null} [null] {"c1":1220,"c2":"1"} NULL +1221 93 221 663000.0 66.3 221.3 NULL 2161- in in {"in":null} [null] {"c1":1221,"c2":"1"} NULL +1222 94 222 666000.0 66.6 222.3 NULL 2162- io io {"io":null} [null] {"c1":1222,"c2":"1"} NULL +1223 95 223 669000.0 66.9 223.3 NULL 2163- ip ip {"ip":null} [null] {"c1":1223,"c2":"1"} NULL +1224 96 224 672000.0 67.2 224.3 NULL 2164- iq iq {"iq":null} [null] {"c1":1224,"c2":"1"} NULL +1225 97 225 675000.0 67.5 225.3 NULL 2165- ir ir {"ir":null} [null] {"c1":1225,"c2":"1"} NULL +1226 98 226 678000.0 67.8 226.3 NULL 2166- is is {"is":null} [null] {"c1":1226,"c2":"1"} NULL +1227 99 227 681000.0 68.1 227.3 NULL 2167- it it {"it":null} [null] {"c1":1227,"c2":"1"} NULL +1228 100 228 684000.0 68.4 228.3 NULL 2168- iu iu {"iu":null} [null] {"c1":1228,"c2":"1"} NULL +1229 101 229 687000.0 68.7 229.3 NULL 2169- iv iv {"iv":null} [null] {"c1":1229,"c2":"1"} NULL +1230 102 230 690000.0 69.0 230.3 NULL 2170- iw iw {"iw":null} [null] {"c1":1230,"c2":"1"} NULL +1231 103 231 693000.0 69.3 231.3 NULL 2171- ix ix {"ix":null} [null] {"c1":1231,"c2":"1"} NULL +1232 104 232 696000.0 69.6 232.3 NULL 2172- iy iy {"iy":null} [null] {"c1":1232,"c2":"1"} NULL +1233 105 233 699000.0 69.9 233.3 NULL 2173- iz iz {"iz":null} [null] {"c1":1233,"c2":"1"} NULL +1234 106 234 702000.0 70.2 234.3 NULL 2174- ja ja {"ja":null} [null] {"c1":1234,"c2":"1"} NULL +1235 107 235 705000.0 70.5 235.3 NULL 2175- jb jb {"jb":null} [null] {"c1":1235,"c2":"1"} NULL +1236 108 236 708000.0 70.8 236.3 NULL 2176- jc jc {"jc":null} [null] {"c1":1236,"c2":"1"} NULL +1237 109 237 711000.0 71.1 237.3 NULL 2177- jd jd {"jd":null} [null] {"c1":1237,"c2":"1"} NULL +1238 110 238 714000.0 71.4 238.3 NULL 2178- je je {"je":null} [null] {"c1":1238,"c2":"1"} NULL +1239 111 239 717000.0 71.7 239.3 NULL 2179- jf jf {"jf":null} [null] {"c1":1239,"c2":"1"} NULL +1240 112 240 720000.0 72.0 240.3 NULL 2180- jg jg {"jg":null} [null] {"c1":1240,"c2":"1"} NULL +1241 113 241 723000.0 72.3 241.3 NULL 2181- jh jh {"jh":null} [null] {"c1":1241,"c2":"1"} NULL +1242 114 242 726000.0 72.6 242.3 NULL 2182- ji ji {"ji":null} [null] {"c1":1242,"c2":"1"} NULL +1243 115 243 729000.0 72.9 243.3 NULL 2183- jj jj {"jj":null} [null] {"c1":1243,"c2":"1"} NULL +1244 116 244 732000.0 73.2 244.3 NULL 2184- jk jk {"jk":null} [null] {"c1":1244,"c2":"1"} NULL +1245 117 245 735000.0 73.5 245.3 NULL 2185- jl jl {"jl":null} [null] {"c1":1245,"c2":"1"} NULL +1246 118 246 738000.0 73.8 246.3 NULL 2186- jm jm {"jm":null} [null] {"c1":1246,"c2":"1"} NULL +1247 119 247 741000.0 74.1 247.3 NULL 2187- jn jn {"jn":null} [null] {"c1":1247,"c2":"1"} NULL +1248 120 248 744000.0 74.4 248.3 NULL 2188- jo jo {"jo":null} [null] {"c1":1248,"c2":"1"} NULL +1249 121 249 747000.0 74.7 249.3 NULL 2189- jp jp {"jp":null} [null] {"c1":1249,"c2":"1"} NULL +1250 122 250 750000.0 75.0 250.3 NULL 2190- jq jq {"jq":null} [null] {"c1":1250,"c2":"1"} NULL +1251 123 251 753000.0 75.3 251.3 NULL 2191- jr jr {"jr":null} [null] {"c1":1251,"c2":"1"} NULL +1252 124 252 756000.0 75.6 252.3 NULL 2192- js js {"js":null} [null] {"c1":1252,"c2":"1"} NULL +1253 125 253 759000.0 75.9 253.3 NULL 2193- jt jt {"jt":null} [null] {"c1":1253,"c2":"1"} NULL +1254 126 254 762000.0 76.2 254.3 NULL 2194- ju ju {"ju":null} [null] {"c1":1254,"c2":"1"} NULL +1255 127 255 765000.0 76.5 255.3 NULL 2195- jv jv {"jv":null} [null] {"c1":1255,"c2":"1"} NULL +1256 -128 256 768000.0 76.8 256.3 NULL 2196- jw jw {"jw":null} [null] {"c1":1256,"c2":"1"} NULL +1257 -127 257 771000.0 77.1 257.3 NULL 2197- jx jx {"jx":null} [null] {"c1":1257,"c2":"1"} NULL +1258 -126 258 774000.0 77.4 258.3 NULL 2198- jy jy {"jy":null} [null] {"c1":1258,"c2":"1"} NULL +1259 -125 259 777000.0 77.7 259.3 NULL 2199- jz jz {"jz":null} [null] {"c1":1259,"c2":"1"} NULL +1260 -124 260 780000.0 78.0 260.3 NULL 2200- ka ka {"ka":null} [null] {"c1":1260,"c2":"1"} NULL +1261 -123 261 783000.0 78.3 261.3 NULL 2201- kb kb {"kb":null} [null] {"c1":1261,"c2":"1"} NULL +1262 -122 262 786000.0 78.6 262.3 NULL 2202- kc kc {"kc":null} [null] {"c1":1262,"c2":"1"} NULL +1263 -121 263 789000.0 78.9 263.3 NULL 2203- kd kd {"kd":null} [null] {"c1":1263,"c2":"1"} NULL +1264 -120 264 792000.0 79.2 264.3 NULL 2204- ke ke {"ke":null} [null] {"c1":1264,"c2":"1"} NULL +1265 -119 265 795000.0 79.5 265.3 NULL 2205- kf kf {"kf":null} [null] {"c1":1265,"c2":"1"} NULL +1266 -118 266 798000.0 79.8 266.3 NULL 2206- kg kg {"kg":null} [null] {"c1":1266,"c2":"1"} NULL +1267 -117 267 801000.0 80.1 267.3 NULL 2207- kh kh {"kh":null} [null] {"c1":1267,"c2":"1"} NULL +1268 -116 268 804000.0 80.4 268.3 NULL 2208- ki ki {"ki":null} [null] {"c1":1268,"c2":"1"} NULL +1269 -115 269 807000.0 80.7 269.3 NULL 2209- kj kj {"kj":null} [null] {"c1":1269,"c2":"1"} NULL +1270 -114 270 810000.0 81.0 270.3 NULL 2210- kk kk {"kk":null} [null] {"c1":1270,"c2":"1"} NULL +1271 -113 271 813000.0 81.3 271.3 NULL 2211- kl kl {"kl":null} [null] {"c1":1271,"c2":"1"} NULL +1272 -112 272 816000.0 81.6 272.3 NULL 2212- km km {"km":null} [null] {"c1":1272,"c2":"1"} NULL +1273 -111 273 819000.0 81.9 273.3 NULL 2213- kn kn {"kn":null} [null] {"c1":1273,"c2":"1"} NULL +1274 -110 274 822000.0 82.2 274.3 NULL 2214- ko ko {"ko":null} [null] {"c1":1274,"c2":"1"} NULL +1275 -109 275 825000.0 82.5 275.3 NULL 2215- kp kp {"kp":null} [null] {"c1":1275,"c2":"1"} NULL +1276 -108 276 828000.0 82.8 276.3 NULL 2216- kq kq {"kq":null} [null] {"c1":1276,"c2":"1"} NULL +1277 -107 277 831000.0 83.1 277.3 NULL 2217- kr kr {"kr":null} [null] {"c1":1277,"c2":"1"} NULL +1278 -106 278 834000.0 83.4 278.3 NULL 2218- ks ks {"ks":null} [null] {"c1":1278,"c2":"1"} NULL +1279 -105 279 837000.0 83.7 279.3 NULL 2219- kt kt {"kt":null} [null] {"c1":1279,"c2":"1"} NULL +1280 -104 280 840000.0 84.0 280.3 NULL 2220- ku ku {"ku":null} [null] {"c1":1280,"c2":"1"} NULL +1281 -103 281 843000.0 84.3 281.3 NULL 2221- kv kv {"kv":null} [null] {"c1":1281,"c2":"1"} NULL +1282 -102 282 846000.0 84.6 282.3 NULL 2222- kw kw {"kw":null} [null] {"c1":1282,"c2":"1"} NULL +1283 -101 283 849000.0 84.9 283.3 NULL 2223- kx kx {"kx":null} [null] {"c1":1283,"c2":"1"} NULL +1284 -100 284 852000.0 85.2 284.3 NULL 2224- ky ky {"ky":null} [null] {"c1":1284,"c2":"1"} NULL +1285 -99 285 855000.0 85.5 285.3 NULL 2225- kz kz {"kz":null} [null] {"c1":1285,"c2":"1"} NULL +1286 -98 286 858000.0 85.8 286.3 NULL 2226- la la {"la":null} [null] {"c1":1286,"c2":"1"} NULL +1287 -97 287 861000.0 86.1 287.3 NULL 2227- lb lb {"lb":null} [null] {"c1":1287,"c2":"1"} NULL +1288 -96 288 864000.0 86.4 288.3 NULL 2228- lc lc {"lc":null} [null] {"c1":1288,"c2":"1"} NULL +1289 -95 289 867000.0 86.7 289.3 NULL 2229- ld ld {"ld":null} [null] {"c1":1289,"c2":"1"} NULL +1290 -94 290 870000.0 87.0 290.3 NULL 2230- le le {"le":null} [null] {"c1":1290,"c2":"1"} NULL +1291 -93 291 873000.0 87.3 291.3 NULL 2231- lf lf {"lf":null} [null] {"c1":1291,"c2":"1"} NULL +1292 -92 292 876000.0 87.6 292.3 NULL 2232- lg lg {"lg":null} [null] {"c1":1292,"c2":"1"} NULL +1293 -91 293 879000.0 87.9 293.3 NULL 2233- lh lh {"lh":null} [null] {"c1":1293,"c2":"1"} NULL +1294 -90 294 882000.0 88.2 294.3 NULL 2234- li li {"li":null} [null] {"c1":1294,"c2":"1"} NULL +1295 -89 295 885000.0 88.5 295.3 NULL 2235- lj lj {"lj":null} [null] {"c1":1295,"c2":"1"} NULL +1296 -88 296 888000.0 88.8 296.3 NULL 2236- lk lk {"lk":null} [null] {"c1":1296,"c2":"1"} NULL +1297 -87 297 891000.0 89.1 297.3 NULL 2237- ll ll {"ll":null} [null] {"c1":1297,"c2":"1"} NULL +1298 -86 298 894000.0 89.4 298.3 NULL 2238- lm lm {"lm":null} [null] {"c1":1298,"c2":"1"} NULL +1299 -85 299 897000.0 89.7 299.3 NULL 2239- ln ln {"ln":null} [null] {"c1":1299,"c2":"1"} NULL +PREHOOK: query: INSERT OVERWRITE TABLE parquet_types +SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, +unhex(cbinary), m1, l1, st1, d FROM parquet_types_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_types_staging +PREHOOK: Output: default@parquet_types +POSTHOOK: query: INSERT OVERWRITE TABLE parquet_types +SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, +unhex(cbinary), m1, l1, st1, d FROM parquet_types_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_types_staging +POSTHOOK: Output: default@parquet_types +POSTHOOK: Lineage: parquet_types.cbinary EXPRESSION [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cbinary, type:string, comment:null), ] +POSTHOOK: Lineage: parquet_types.cchar SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cchar, type:char(5), comment:null), ] +POSTHOOK: Lineage: parquet_types.cdouble SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: parquet_types.cfloat SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: parquet_types.cint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: parquet_types.csmallint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:csmallint, type:smallint, comment:null), ] +POSTHOOK: Lineage: parquet_types.cstring1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cstring1, type:string, comment:null), ] +POSTHOOK: Lineage: parquet_types.ctinyint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] +POSTHOOK: Lineage: parquet_types.cvarchar SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cvarchar, type:varchar(10), comment:null), ] +POSTHOOK: Lineage: parquet_types.d SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:d, type:date, comment:null), ] +POSTHOOK: Lineage: parquet_types.l1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: parquet_types.m1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: parquet_types.st1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: parquet_types.t SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:t, type:timestamp, comment:null), ] +PREHOOK: query: -- test types in group by + +EXPLAIN SELECT ctinyint, + MAX(cint), + MIN(csmallint), + COUNT(cstring1), + ROUND(AVG(cfloat), 5), + ROUND(STDDEV_POP(cdouble),5) +FROM parquet_types +GROUP BY ctinyint +ORDER BY ctinyint +PREHOOK: type: QUERY +POSTHOOK: query: -- test types in group by + +EXPLAIN SELECT ctinyint, + MAX(cint), + MIN(csmallint), + COUNT(cstring1), + ROUND(AVG(cfloat), 5), + ROUND(STDDEV_POP(cdouble),5) +FROM parquet_types +GROUP BY ctinyint +ORDER BY ctinyint +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: parquet_types + Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), cint (type: int), csmallint (type: smallint), cstring1 (type: string), cfloat (type: float), cdouble (type: double) + outputColumnNames: ctinyint, cint, csmallint, cstring1, cfloat, cdouble + Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(cint), min(csmallint), count(cstring1), avg(cfloat), stddev_pop(cdouble) + keys: ctinyint (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4) + keys: KEY._col0 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), round(_col4, 5) (type: double), round(_col5, 5) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: double) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: int), VALUE._col1 (type: smallint), VALUE._col2 (type: bigint), VALUE._col3 (type: double), VALUE._col4 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT ctinyint, + MAX(cint), + MIN(csmallint), + COUNT(cstring1), + ROUND(AVG(cfloat), 5), + ROUND(STDDEV_POP(cdouble),5) +FROM parquet_types +GROUP BY ctinyint +ORDER BY ctinyint +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_types +#### A masked pattern was here #### +POSTHOOK: query: SELECT ctinyint, + MAX(cint), + MIN(csmallint), + COUNT(cstring1), + ROUND(AVG(cfloat), 5), + ROUND(STDDEV_POP(cdouble),5) +FROM parquet_types +GROUP BY ctinyint +ORDER BY ctinyint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_types +#### A masked pattern was here #### +-128 1256 0 2 384000.0 38.4 +-127 1257 1 2 387000.0 38.4 +-126 1258 2 2 390000.0 38.4 +-125 1259 3 2 393000.0 38.4 +-124 1260 4 2 396000.0 38.4 +-123 1261 5 2 399000.0 38.4 +-122 1262 6 2 402000.0 38.4 +-121 1263 7 2 405000.0 38.4 +-120 1264 8 2 408000.0 38.4 +-119 1265 9 2 411000.0 38.4 +-118 1266 10 2 414000.0 38.4 +-117 1267 11 2 417000.0 38.4 +-116 1268 12 2 420000.0 38.4 +-115 1269 13 2 423000.0 38.4 +-114 1270 14 2 426000.0 38.4 +-113 1271 15 2 429000.0 38.4 +-112 1272 16 2 432000.0 38.4 +-111 1273 17 2 435000.0 38.4 +-110 1274 18 2 438000.0 38.4 +-109 1275 19 2 441000.0 38.4 +-108 1276 20 2 444000.0 38.4 +-107 1277 21 2 447000.0 38.4 +-106 1278 22 2 450000.0 38.4 +-105 1279 23 2 453000.0 38.4 +-104 1280 24 2 456000.0 38.4 +-103 1281 25 2 459000.0 38.4 +-102 1282 26 2 462000.0 38.4 +-101 1283 27 2 465000.0 38.4 +-100 1284 28 2 468000.0 38.4 +-99 1285 29 2 471000.0 38.4 +-98 1286 30 2 474000.0 38.4 +-97 1287 31 2 477000.0 38.4 +-96 1288 32 2 480000.0 38.4 +-95 1289 33 2 483000.0 38.4 +-94 1290 34 2 486000.0 38.4 +-93 1291 35 2 489000.0 38.4 +-92 1292 36 2 492000.0 38.4 +-91 1293 37 2 495000.0 38.4 +-90 1294 38 2 498000.0 38.4 +-89 1295 39 2 501000.0 38.4 +-88 1296 40 2 504000.0 38.4 +-87 1297 41 2 507000.0 38.4 +-86 1298 42 2 510000.0 38.4 +-85 1299 43 2 513000.0 38.4 +-84 1044 44 1 132000.0 0.0 +-83 1045 45 1 135000.0 0.0 +-82 1046 46 1 138000.0 0.0 +-81 1047 47 1 141000.0 0.0 +-80 1048 48 1 144000.0 0.0 +-79 1049 49 1 147000.0 0.0 +-78 1050 50 1 150000.0 0.0 +-77 1051 51 1 153000.0 0.0 +-76 1052 52 1 156000.0 0.0 +-75 1053 53 1 159000.0 0.0 +-74 1054 54 1 162000.0 0.0 +-73 1055 55 1 165000.0 0.0 +-72 1056 56 1 168000.0 0.0 +-71 1057 57 1 171000.0 0.0 +-70 1058 58 1 174000.0 0.0 +-69 1059 59 1 177000.0 0.0 +-68 1060 60 1 180000.0 0.0 +-67 1061 61 1 183000.0 0.0 +-66 1062 62 1 186000.0 0.0 +-65 1063 63 1 189000.0 0.0 +-64 1064 64 1 192000.0 0.0 +-63 1065 65 1 195000.0 0.0 +-62 1066 66 1 198000.0 0.0 +-61 1067 67 1 201000.0 0.0 +-60 1068 68 1 204000.0 0.0 +-59 1069 69 1 207000.0 0.0 +-58 1070 70 1 210000.0 0.0 +-57 1071 71 1 213000.0 0.0 +-56 1072 72 1 216000.0 0.0 +-55 1073 73 1 219000.0 0.0 +-54 1074 74 1 222000.0 0.0 +-53 1075 75 1 225000.0 0.0 +-52 1076 76 1 228000.0 0.0 +-51 1077 77 1 231000.0 0.0 +-50 1078 78 1 234000.0 0.0 +-49 1079 79 1 237000.0 0.0 +-48 1080 80 1 240000.0 0.0 +-47 1081 81 1 243000.0 0.0 +-46 1082 82 1 246000.0 0.0 +-45 1083 83 1 249000.0 0.0 +-44 1084 84 1 252000.0 0.0 +-43 1085 85 1 255000.0 0.0 +-42 1086 86 1 258000.0 0.0 +-41 1087 87 1 261000.0 0.0 +-40 1088 88 1 264000.0 0.0 +-39 1089 89 1 267000.0 0.0 +-38 1090 90 1 270000.0 0.0 +-37 1091 91 1 273000.0 0.0 +-36 1092 92 1 276000.0 0.0 +-35 1093 93 1 279000.0 0.0 +-34 1094 94 1 282000.0 0.0 +-33 1095 95 1 285000.0 0.0 +-32 1096 96 1 288000.0 0.0 +-31 1097 97 1 291000.0 0.0 +-30 1098 98 1 294000.0 0.0 +-29 1099 99 1 297000.0 0.0 +-28 1100 100 1 300000.0 0.0 +-27 1101 101 1 303000.0 0.0 +-26 1102 102 1 306000.0 0.0 +-25 1103 103 1 309000.0 0.0 +-24 1104 104 1 312000.0 0.0 +-23 1105 105 1 315000.0 0.0 +-22 1106 106 1 318000.0 0.0 +-21 1107 107 1 321000.0 0.0 +-20 1108 108 1 324000.0 0.0 +-19 1109 109 1 327000.0 0.0 +-18 1110 110 1 330000.0 0.0 +-17 1111 111 1 333000.0 0.0 +-16 1112 112 1 336000.0 0.0 +-15 1113 113 1 339000.0 0.0 +-14 1114 114 1 342000.0 0.0 +-13 1115 115 1 345000.0 0.0 +-12 1116 116 1 348000.0 0.0 +-11 1117 117 1 351000.0 0.0 +-10 1118 118 1 354000.0 0.0 +-9 1119 119 1 357000.0 0.0 +-8 1120 120 1 360000.0 0.0 +-7 1121 121 1 363000.0 0.0 +-6 1122 122 1 366000.0 0.0 +-5 1123 123 1 369000.0 0.0 +-4 1124 124 1 372000.0 0.0 +-3 1125 125 1 375000.0 0.0 +-2 1126 126 1 378000.0 0.0 +-1 1127 127 1 381000.0 0.0 +0 1128 128 1 384000.0 0.0 +1 1129 129 1 387000.0 0.0 +2 1130 130 1 390000.0 0.0 +3 1131 131 1 393000.0 0.0 +4 1132 132 1 396000.0 0.0 +5 1133 133 1 399000.0 0.0 +6 1134 134 1 402000.0 0.0 +7 1135 135 1 405000.0 0.0 +8 1136 136 1 408000.0 0.0 +9 1137 137 1 411000.0 0.0 +10 1138 138 1 414000.0 0.0 +11 1139 139 1 417000.0 0.0 +12 1140 140 1 420000.0 0.0 +13 1141 141 1 423000.0 0.0 +14 1142 142 1 426000.0 0.0 +15 1143 143 1 429000.0 0.0 +16 1144 144 1 432000.0 0.0 +17 1145 145 1 435000.0 0.0 +18 1146 146 1 438000.0 0.0 +19 1147 147 1 441000.0 0.0 +20 1148 148 1 444000.0 0.0 +21 1149 149 1 447000.0 0.0 +22 1150 150 1 450000.0 0.0 +23 1151 151 1 453000.0 0.0 +24 1152 152 1 456000.0 0.0 +25 1153 153 1 459000.0 0.0 +26 1154 154 1 462000.0 0.0 +27 1155 155 1 465000.0 0.0 +28 1156 156 1 468000.0 0.0 +29 1157 157 1 471000.0 0.0 +30 1158 158 1 474000.0 0.0 +31 1159 159 1 477000.0 0.0 +32 1160 160 1 480000.0 0.0 +33 1161 161 1 483000.0 0.0 +34 1162 162 1 486000.0 0.0 +35 1163 163 1 489000.0 0.0 +36 1164 164 1 492000.0 0.0 +37 1165 165 1 495000.0 0.0 +38 1166 166 1 498000.0 0.0 +39 1167 167 1 501000.0 0.0 +40 1168 168 1 504000.0 0.0 +41 1169 169 1 507000.0 0.0 +42 1170 170 1 510000.0 0.0 +43 1171 171 1 513000.0 0.0 +44 1172 172 1 516000.0 0.0 +45 1173 173 1 519000.0 0.0 +46 1174 174 1 522000.0 0.0 +47 1175 175 1 525000.0 0.0 +48 1176 176 1 528000.0 0.0 +49 1177 177 1 531000.0 0.0 +50 1178 178 1 534000.0 0.0 +51 1179 179 1 537000.0 0.0 +52 1180 180 1 540000.0 0.0 +53 1181 181 1 543000.0 0.0 +54 1182 182 1 546000.0 0.0 +55 1183 183 1 549000.0 0.0 +56 1184 184 1 552000.0 0.0 +57 1185 185 1 555000.0 0.0 +58 1186 186 1 558000.0 0.0 +59 1187 187 1 561000.0 0.0 +60 1188 188 1 564000.0 0.0 +61 1189 189 1 567000.0 0.0 +62 1190 190 1 570000.0 0.0 +63 1191 191 1 573000.0 0.0 +64 1192 192 1 576000.0 0.0 +65 1193 193 1 579000.0 0.0 +66 1194 194 1 582000.0 0.0 +67 1195 195 1 585000.0 0.0 +68 1196 196 1 588000.0 0.0 +69 1197 197 1 591000.0 0.0 +70 1198 198 1 594000.0 0.0 +71 1199 199 1 597000.0 0.0 +72 1200 200 1 600000.0 0.0 +73 1201 201 1 603000.0 0.0 +74 1202 202 1 606000.0 0.0 +75 1203 203 1 609000.0 0.0 +76 1204 204 1 612000.0 0.0 +77 1205 205 1 615000.0 0.0 +78 1206 206 1 618000.0 0.0 +79 1207 207 1 621000.0 0.0 +80 1208 208 1 624000.0 0.0 +81 1209 209 1 627000.0 0.0 +82 1210 210 1 630000.0 0.0 +83 1211 211 1 633000.0 0.0 +84 1212 212 1 636000.0 0.0 +85 1213 213 1 639000.0 0.0 +86 1214 214 1 642000.0 0.0 +87 1215 215 1 645000.0 0.0 +88 1216 216 1 648000.0 0.0 +89 1217 217 1 651000.0 0.0 +90 1218 218 1 654000.0 0.0 +91 1219 219 1 657000.0 0.0 +92 1220 220 1 660000.0 0.0 +93 1221 221 1 663000.0 0.0 +94 1222 222 1 666000.0 0.0 +95 1223 223 1 669000.0 0.0 +96 1224 224 1 672000.0 0.0 +97 1225 225 1 675000.0 0.0 +98 1226 226 1 678000.0 0.0 +99 1227 227 1 681000.0 0.0 +100 1228 228 1 684000.0 0.0 +101 1229 229 1 687000.0 0.0 +102 1230 230 1 690000.0 0.0 +103 1231 231 1 693000.0 0.0 +104 1232 232 1 696000.0 0.0 +105 1233 233 1 699000.0 0.0 +106 1234 234 1 702000.0 0.0 +107 1235 235 1 705000.0 0.0 +108 1236 236 1 708000.0 0.0 +109 1237 237 1 711000.0 0.0 +110 1238 238 1 714000.0 0.0 +111 1239 239 1 717000.0 0.0 +112 1240 240 1 720000.0 0.0 +113 1241 241 1 723000.0 0.0 +114 1242 242 1 726000.0 0.0 +115 1243 243 1 729000.0 0.0 +116 1244 244 1 732000.0 0.0 +117 1245 245 1 735000.0 0.0 +118 1246 246 1 738000.0 0.0 +119 1247 247 1 741000.0 0.0 +120 1248 248 1 744000.0 0.0 +121 1249 249 1 747000.0 0.0 +122 1250 250 1 750000.0 0.0 +123 1251 251 1 753000.0 0.0 +124 1252 252 1 756000.0 0.0 +125 1253 253 1 759000.0 0.0 +126 1254 254 1 762000.0 0.0 +127 1255 255 1 765000.0 0.0 +PREHOOK: query: EXPLAIN SELECT cfloat, count(*) FROM parquet_types GROUP BY cfloat ORDER BY cfloat +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT cfloat, count(*) FROM parquet_types GROUP BY cfloat ORDER BY cfloat +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: parquet_types + Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cfloat (type: float) + outputColumnNames: cfloat + Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: cfloat (type: float) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: float) + sort order: + + Map-reduce partition columns: _col0 (type: float) + Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: float) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: float) + sort order: + + Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: float), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT cfloat, count(*) FROM parquet_types GROUP BY cfloat ORDER BY cfloat +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_types +#### A masked pattern was here #### +POSTHOOK: query: SELECT cfloat, count(*) FROM parquet_types GROUP BY cfloat ORDER BY cfloat +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_types +#### A masked pattern was here #### +0.0 1 +3000.0 1 +6000.0 1 +9000.0 1 +12000.0 1 +15000.0 1 +18000.0 1 +21000.0 1 +24000.0 1 +27000.0 1 +30000.0 1 +33000.0 1 +36000.0 1 +39000.0 1 +42000.0 1 +45000.0 1 +48000.0 1 +51000.0 1 +54000.0 1 +57000.0 1 +60000.0 1 +63000.0 1 +66000.0 1 +69000.0 1 +72000.0 1 +75000.0 1 +78000.0 1 +81000.0 1 +84000.0 1 +87000.0 1 +90000.0 1 +93000.0 1 +96000.0 1 +99000.0 1 +102000.0 1 +105000.0 1 +108000.0 1 +111000.0 1 +114000.0 1 +117000.0 1 +120000.0 1 +123000.0 1 +126000.0 1 +129000.0 1 +132000.0 1 +135000.0 1 +138000.0 1 +141000.0 1 +144000.0 1 +147000.0 1 +150000.0 1 +153000.0 1 +156000.0 1 +159000.0 1 +162000.0 1 +165000.0 1 +168000.0 1 +171000.0 1 +174000.0 1 +177000.0 1 +180000.0 1 +183000.0 1 +186000.0 1 +189000.0 1 +192000.0 1 +195000.0 1 +198000.0 1 +201000.0 1 +204000.0 1 +207000.0 1 +210000.0 1 +213000.0 1 +216000.0 1 +219000.0 1 +222000.0 1 +225000.0 1 +228000.0 1 +231000.0 1 +234000.0 1 +237000.0 1 +240000.0 1 +243000.0 1 +246000.0 1 +249000.0 1 +252000.0 1 +255000.0 1 +258000.0 1 +261000.0 1 +264000.0 1 +267000.0 1 +270000.0 1 +273000.0 1 +276000.0 1 +279000.0 1 +282000.0 1 +285000.0 1 +288000.0 1 +291000.0 1 +294000.0 1 +297000.0 1 +300000.0 1 +303000.0 1 +306000.0 1 +309000.0 1 +312000.0 1 +315000.0 1 +318000.0 1 +321000.0 1 +324000.0 1 +327000.0 1 +330000.0 1 +333000.0 1 +336000.0 1 +339000.0 1 +342000.0 1 +345000.0 1 +348000.0 1 +351000.0 1 +354000.0 1 +357000.0 1 +360000.0 1 +363000.0 1 +366000.0 1 +369000.0 1 +372000.0 1 +375000.0 1 +378000.0 1 +381000.0 1 +384000.0 1 +387000.0 1 +390000.0 1 +393000.0 1 +396000.0 1 +399000.0 1 +402000.0 1 +405000.0 1 +408000.0 1 +411000.0 1 +414000.0 1 +417000.0 1 +420000.0 1 +423000.0 1 +426000.0 1 +429000.0 1 +432000.0 1 +435000.0 1 +438000.0 1 +441000.0 1 +444000.0 1 +447000.0 1 +450000.0 1 +453000.0 1 +456000.0 1 +459000.0 1 +462000.0 1 +465000.0 1 +468000.0 1 +471000.0 1 +474000.0 1 +477000.0 1 +480000.0 1 +483000.0 1 +486000.0 1 +489000.0 1 +492000.0 1 +495000.0 1 +498000.0 1 +501000.0 1 +504000.0 1 +507000.0 1 +510000.0 1 +513000.0 1 +516000.0 1 +519000.0 1 +522000.0 1 +525000.0 1 +528000.0 1 +531000.0 1 +534000.0 1 +537000.0 1 +540000.0 1 +543000.0 1 +546000.0 1 +549000.0 1 +552000.0 1 +555000.0 1 +558000.0 1 +561000.0 1 +564000.0 1 +567000.0 1 +570000.0 1 +573000.0 1 +576000.0 1 +579000.0 1 +582000.0 1 +585000.0 1 +588000.0 1 +591000.0 1 +594000.0 1 +597000.0 1 +600000.0 1 +603000.0 1 +606000.0 1 +609000.0 1 +612000.0 1 +615000.0 1 +618000.0 1 +621000.0 1 +624000.0 1 +627000.0 1 +630000.0 1 +633000.0 1 +636000.0 1 +639000.0 1 +642000.0 1 +645000.0 1 +648000.0 1 +651000.0 1 +654000.0 1 +657000.0 1 +660000.0 1 +663000.0 1 +666000.0 1 +669000.0 1 +672000.0 1 +675000.0 1 +678000.0 1 +681000.0 1 +684000.0 1 +687000.0 1 +690000.0 1 +693000.0 1 +696000.0 1 +699000.0 1 +702000.0 1 +705000.0 1 +708000.0 1 +711000.0 1 +714000.0 1 +717000.0 1 +720000.0 1 +723000.0 1 +726000.0 1 +729000.0 1 +732000.0 1 +735000.0 1 +738000.0 1 +741000.0 1 +744000.0 1 +747000.0 1 +750000.0 1 +753000.0 1 +756000.0 1 +759000.0 1 +762000.0 1 +765000.0 1 +768000.0 1 +771000.0 1 +774000.0 1 +777000.0 1 +780000.0 1 +783000.0 1 +786000.0 1 +789000.0 1 +792000.0 1 +795000.0 1 +798000.0 1 +801000.0 1 +804000.0 1 +807000.0 1 +810000.0 1 +813000.0 1 +816000.0 1 +819000.0 1 +822000.0 1 +825000.0 1 +828000.0 1 +831000.0 1 +834000.0 1 +837000.0 1 +840000.0 1 +843000.0 1 +846000.0 1 +849000.0 1 +852000.0 1 +855000.0 1 +858000.0 1 +861000.0 1 +864000.0 1 +867000.0 1 +870000.0 1 +873000.0 1 +876000.0 1 +879000.0 1 +882000.0 1 +885000.0 1 +888000.0 1 +891000.0 1 +894000.0 1 +897000.0 1 +PREHOOK: query: EXPLAIN SELECT cchar, count(*) FROM parquet_types GROUP BY cchar ORDER BY cchar +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT cchar, count(*) FROM parquet_types GROUP BY cchar ORDER BY cchar +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: parquet_types + Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cchar (type: char(5)) + outputColumnNames: cchar + Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: cchar (type: char(5)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(5)) + sort order: + + Map-reduce partition columns: _col0 (type: char(5)) + Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: char(5)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: char(5)) + sort order: + + Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: char(5)), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT cchar, count(*) FROM parquet_types GROUP BY cchar ORDER BY cchar +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_types +#### A masked pattern was here #### +POSTHOOK: query: SELECT cchar, count(*) FROM parquet_types GROUP BY cchar ORDER BY cchar +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_types +#### A masked pattern was here #### +1940- 1 +1941- 1 +1942- 1 +1943- 1 +1944- 1 +1945- 1 +1946- 1 +1947- 1 +1948- 1 +1949- 1 +1950- 1 +1951- 1 +1952- 1 +1953- 1 +1954- 1 +1955- 1 +1956- 1 +1957- 1 +1958- 1 +1959- 1 +1960- 1 +1961- 1 +1962- 1 +1963- 1 +1964- 1 +1965- 1 +1966- 1 +1967- 1 +1968- 1 +1969- 1 +1970- 1 +1971- 1 +1972- 1 +1973- 1 +1974- 1 +1975- 1 +1976- 1 +1977- 1 +1978- 1 +1979- 1 +1980- 1 +1981- 1 +1982- 1 +1983- 1 +1984- 1 +1985- 1 +1986- 1 +1987- 1 +1988- 1 +1989- 1 +1990- 1 +1991- 1 +1992- 1 +1993- 1 +1994- 1 +1995- 1 +1996- 1 +1997- 1 +1998- 1 +1999- 1 +2000- 1 +2001- 1 +2002- 1 +2003- 1 +2004- 1 +2005- 1 +2006- 1 +2007- 1 +2008- 1 +2009- 1 +2010- 1 +2011- 1 +2012- 1 +2013- 1 +2014- 1 +2015- 1 +2016- 1 +2017- 1 +2018- 1 +2019- 1 +2020- 1 +2021- 1 +2022- 1 +2023- 1 +2024- 1 +2025- 1 +2026- 1 +2027- 1 +2028- 1 +2029- 1 +2030- 1 +2031- 1 +2032- 1 +2033- 1 +2034- 1 +2035- 1 +2036- 1 +2037- 1 +2038- 1 +2039- 1 +2040- 1 +2041- 1 +2042- 1 +2043- 1 +2044- 1 +2045- 1 +2046- 1 +2047- 1 +2048- 1 +2049- 1 +2050- 1 +2051- 1 +2052- 1 +2053- 1 +2054- 1 +2055- 1 +2056- 1 +2057- 1 +2058- 1 +2059- 1 +2060- 1 +2061- 1 +2062- 1 +2063- 1 +2064- 1 +2065- 1 +2066- 1 +2067- 1 +2068- 1 +2069- 1 +2070- 1 +2071- 1 +2072- 1 +2073- 1 +2074- 1 +2075- 1 +2076- 1 +2077- 1 +2078- 1 +2079- 1 +2080- 1 +2081- 1 +2082- 1 +2083- 1 +2084- 1 +2085- 1 +2086- 1 +2087- 1 +2088- 1 +2089- 1 +2090- 1 +2091- 1 +2092- 1 +2093- 1 +2094- 1 +2095- 1 +2096- 1 +2097- 1 +2098- 1 +2099- 1 +2100- 1 +2101- 1 +2102- 1 +2103- 1 +2104- 1 +2105- 1 +2106- 1 +2107- 1 +2108- 1 +2109- 1 +2110- 1 +2111- 1 +2112- 1 +2113- 1 +2114- 1 +2115- 1 +2116- 1 +2117- 1 +2118- 1 +2119- 1 +2120- 1 +2121- 1 +2122- 1 +2123- 1 +2124- 1 +2125- 1 +2126- 1 +2127- 1 +2128- 1 +2129- 1 +2130- 1 +2131- 1 +2132- 1 +2133- 1 +2134- 1 +2135- 1 +2136- 1 +2137- 1 +2138- 1 +2139- 1 +2140- 1 +2141- 1 +2142- 1 +2143- 1 +2144- 1 +2145- 1 +2146- 1 +2147- 1 +2148- 1 +2149- 1 +2150- 1 +2151- 1 +2152- 1 +2153- 1 +2154- 1 +2155- 1 +2156- 1 +2157- 1 +2158- 1 +2159- 1 +2160- 1 +2161- 1 +2162- 1 +2163- 1 +2164- 1 +2165- 1 +2166- 1 +2167- 1 +2168- 1 +2169- 1 +2170- 1 +2171- 1 +2172- 1 +2173- 1 +2174- 1 +2175- 1 +2176- 1 +2177- 1 +2178- 1 +2179- 1 +2180- 1 +2181- 1 +2182- 1 +2183- 1 +2184- 1 +2185- 1 +2186- 1 +2187- 1 +2188- 1 +2189- 1 +2190- 1 +2191- 1 +2192- 1 +2193- 1 +2194- 1 +2195- 1 +2196- 1 +2197- 1 +2198- 1 +2199- 1 +2200- 1 +2201- 1 +2202- 1 +2203- 1 +2204- 1 +2205- 1 +2206- 1 +2207- 1 +2208- 1 +2209- 1 +2210- 1 +2211- 1 +2212- 1 +2213- 1 +2214- 1 +2215- 1 +2216- 1 +2217- 1 +2218- 1 +2219- 1 +2220- 1 +2221- 1 +2222- 1 +2223- 1 +2224- 1 +2225- 1 +2226- 1 +2227- 1 +2228- 1 +2229- 1 +2230- 1 +2231- 1 +2232- 1 +2233- 1 +2234- 1 +2235- 1 +2236- 1 +2237- 1 +2238- 1 +2239- 1 +PREHOOK: query: EXPLAIN SELECT cvarchar, count(*) FROM parquet_types GROUP BY cvarchar ORDER BY cvarchar +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT cvarchar, count(*) FROM parquet_types GROUP BY cvarchar ORDER BY cvarchar +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: parquet_types + Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cvarchar (type: varchar(10)) + outputColumnNames: cvarchar + Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: cvarchar (type: varchar(10)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: varchar(10)) + sort order: + + Map-reduce partition columns: _col0 (type: varchar(10)) + Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: varchar(10)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: varchar(10)) + sort order: + + Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: varchar(10)), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT cvarchar, count(*) FROM parquet_types GROUP BY cvarchar ORDER BY cvarchar +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_types +#### A masked pattern was here #### +POSTHOOK: query: SELECT cvarchar, count(*) FROM parquet_types GROUP BY cvarchar ORDER BY cvarchar +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_types +#### A masked pattern was here #### + 1 +b 1 +ba 1 +bb 1 +bc 1 +bd 1 +be 1 +bf 1 +bg 1 +bh 1 +bi 1 +bj 1 +bk 1 +bl 1 +bm 1 +bn 1 +bo 1 +bp 1 +bq 1 +br 1 +bs 1 +bt 1 +bu 1 +bv 1 +bw 1 +bx 1 +by 1 +bz 1 +c 1 +ca 1 +cb 1 +cc 1 +cd 1 +ce 1 +cf 1 +cg 1 +ch 1 +ci 1 +cj 1 +ck 1 +cl 1 +cm 1 +cn 1 +co 1 +cp 1 +cq 1 +cr 1 +cs 1 +ct 1 +cu 1 +cv 1 +cw 1 +cx 1 +cy 1 +cz 1 +d 1 +da 1 +db 1 +dc 1 +dd 1 +de 1 +df 1 +dg 1 +dh 1 +di 1 +dj 1 +dk 1 +dl 1 +dm 1 +dn 1 +do 1 +dp 1 +dq 1 +dr 1 +ds 1 +dt 1 +du 1 +dv 1 +dw 1 +dx 1 +dy 1 +dz 1 +e 1 +ea 1 +eb 1 +ec 1 +ed 1 +ee 1 +ef 1 +eg 1 +eh 1 +ei 1 +ej 1 +ek 1 +el 1 +em 1 +en 1 +eo 1 +ep 1 +eq 1 +er 1 +es 1 +et 1 +eu 1 +ev 1 +ew 1 +ex 1 +ey 1 +ez 1 +f 1 +fa 1 +fb 1 +fc 1 +fd 1 +fe 1 +ff 1 +fg 1 +fh 1 +fi 1 +fj 1 +fk 1 +fl 1 +fm 1 +fn 1 +fo 1 +fp 1 +fq 1 +fr 1 +fs 1 +ft 1 +fu 1 +fv 1 +fw 1 +fx 1 +fy 1 +fz 1 +g 1 +ga 1 +gb 1 +gc 1 +gd 1 +ge 1 +gf 1 +gg 1 +gh 1 +gi 1 +gj 1 +gk 1 +gl 1 +gm 1 +gn 1 +go 1 +gp 1 +gq 1 +gr 1 +gs 1 +gt 1 +gu 1 +gv 1 +gw 1 +gx 1 +gy 1 +gz 1 +h 1 +ha 1 +hb 1 +hc 1 +hd 1 +he 1 +hf 1 +hg 1 +hh 1 +hi 1 +hj 1 +hk 1 +hl 1 +hm 1 +hn 1 +ho 1 +hp 1 +hq 1 +hr 1 +hs 1 +ht 1 +hu 1 +hv 1 +hw 1 +hx 1 +hy 1 +hz 1 +i 1 +ia 1 +ib 1 +ic 1 +id 1 +ie 1 +if 1 +ig 1 +ih 1 +ii 1 +ij 1 +ik 1 +il 1 +im 1 +in 1 +io 1 +ip 1 +iq 1 +ir 1 +is 1 +it 1 +iu 1 +iv 1 +iw 1 +ix 1 +iy 1 +iz 1 +j 1 +ja 1 +jb 1 +jc 1 +jd 1 +je 1 +jf 1 +jg 1 +jh 1 +ji 1 +jj 1 +jk 1 +jl 1 +jm 1 +jn 1 +jo 1 +jp 1 +jq 1 +jr 1 +js 1 +jt 1 +ju 1 +jv 1 +jw 1 +jx 1 +jy 1 +jz 1 +k 1 +ka 1 +kb 1 +kc 1 +kd 1 +ke 1 +kf 1 +kg 1 +kh 1 +ki 1 +kj 1 +kk 1 +kl 1 +km 1 +kn 1 +ko 1 +kp 1 +kq 1 +kr 1 +ks 1 +kt 1 +ku 1 +kv 1 +kw 1 +kx 1 +ky 1 +kz 1 +l 1 +la 1 +lb 1 +lc 1 +ld 1 +le 1 +lf 1 +lg 1 +lh 1 +li 1 +lj 1 +lk 1 +ll 1 +lm 1 +ln 1 +m 1 +n 1 +o 1 +p 1 +q 1 +r 1 +s 1 +t 1 +u 1 +v 1 +w 1 +x 1 +y 1 +z 1 +PREHOOK: query: EXPLAIN SELECT cstring1, count(*) FROM parquet_types GROUP BY cstring1 ORDER BY cstring1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT cstring1, count(*) FROM parquet_types GROUP BY cstring1 ORDER BY cstring1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: parquet_types + Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cstring1 (type: string) + outputColumnNames: cstring1 + Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: cstring1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT cstring1, count(*) FROM parquet_types GROUP BY cstring1 ORDER BY cstring1 +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_types +#### A masked pattern was here #### +POSTHOOK: query: SELECT cstring1, count(*) FROM parquet_types GROUP BY cstring1 ORDER BY cstring1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_types +#### A masked pattern was here #### +0.3 1 +1.3 1 +10.3 1 +100.3 1 +101.3 1 +102.3 1 +103.3 1 +104.3 1 +105.3 1 +106.3 1 +107.3 1 +108.3 1 +109.3 1 +11.3 1 +110.3 1 +111.3 1 +112.3 1 +113.3 1 +114.3 1 +115.3 1 +116.3 1 +117.3 1 +118.3 1 +119.3 1 +12.3 1 +120.3 1 +121.3 1 +122.3 1 +123.3 1 +124.3 1 +125.3 1 +126.3 1 +127.3 1 +128.3 1 +129.3 1 +13.3 1 +130.3 1 +131.3 1 +132.3 1 +133.3 1 +134.3 1 +135.3 1 +136.3 1 +137.3 1 +138.3 1 +139.3 1 +14.3 1 +140.3 1 +141.3 1 +142.3 1 +143.3 1 +144.3 1 +145.3 1 +146.3 1 +147.3 1 +148.3 1 +149.3 1 +15.3 1 +150.3 1 +151.3 1 +152.3 1 +153.3 1 +154.3 1 +155.3 1 +156.3 1 +157.3 1 +158.3 1 +159.3 1 +16.3 1 +160.3 1 +161.3 1 +162.3 1 +163.3 1 +164.3 1 +165.3 1 +166.3 1 +167.3 1 +168.3 1 +169.3 1 +17.3 1 +170.3 1 +171.3 1 +172.3 1 +173.3 1 +174.3 1 +175.3 1 +176.3 1 +177.3 1 +178.3 1 +179.3 1 +18.3 1 +180.3 1 +181.3 1 +182.3 1 +183.3 1 +184.3 1 +185.3 1 +186.3 1 +187.3 1 +188.3 1 +189.3 1 +19.3 1 +190.3 1 +191.3 1 +192.3 1 +193.3 1 +194.3 1 +195.3 1 +196.3 1 +197.3 1 +198.3 1 +199.3 1 +2.3 1 +20.3 1 +200.3 1 +201.3 1 +202.3 1 +203.3 1 +204.3 1 +205.3 1 +206.3 1 +207.3 1 +208.3 1 +209.3 1 +21.3 1 +210.3 1 +211.3 1 +212.3 1 +213.3 1 +214.3 1 +215.3 1 +216.3 1 +217.3 1 +218.3 1 +219.3 1 +22.3 1 +220.3 1 +221.3 1 +222.3 1 +223.3 1 +224.3 1 +225.3 1 +226.3 1 +227.3 1 +228.3 1 +229.3 1 +23.3 1 +230.3 1 +231.3 1 +232.3 1 +233.3 1 +234.3 1 +235.3 1 +236.3 1 +237.3 1 +238.3 1 +239.3 1 +24.3 1 +240.3 1 +241.3 1 +242.3 1 +243.3 1 +244.3 1 +245.3 1 +246.3 1 +247.3 1 +248.3 1 +249.3 1 +25.3 1 +250.3 1 +251.3 1 +252.3 1 +253.3 1 +254.3 1 +255.3 1 +256.3 1 +257.3 1 +258.3 1 +259.3 1 +26.3 1 +260.3 1 +261.3 1 +262.3 1 +263.3 1 +264.3 1 +265.3 1 +266.3 1 +267.3 1 +268.3 1 +269.3 1 +27.3 1 +270.3 1 +271.3 1 +272.3 1 +273.3 1 +274.3 1 +275.3 1 +276.3 1 +277.3 1 +278.3 1 +279.3 1 +28.3 1 +280.3 1 +281.3 1 +282.3 1 +283.3 1 +284.3 1 +285.3 1 +286.3 1 +287.3 1 +288.3 1 +289.3 1 +29.3 1 +290.3 1 +291.3 1 +292.3 1 +293.3 1 +294.3 1 +295.3 1 +296.3 1 +297.3 1 +298.3 1 +299.3 1 +3.3 1 +30.3 1 +31.3 1 +32.3 1 +33.3 1 +34.3 1 +35.3 1 +36.3 1 +37.3 1 +38.3 1 +39.3 1 +4.3 1 +40.3 1 +41.3 1 +42.3 1 +43.3 1 +44.3 1 +45.3 1 +46.3 1 +47.3 1 +48.3 1 +49.3 1 +5.3 1 +50.3 1 +51.3 1 +52.3 1 +53.3 1 +54.3 1 +55.3 1 +56.3 1 +57.3 1 +58.3 1 +59.3 1 +6.3 1 +60.3 1 +61.3 1 +62.3 1 +63.3 1 +64.3 1 +65.3 1 +66.3 1 +67.3 1 +68.3 1 +69.3 1 +7.3 1 +70.3 1 +71.3 1 +72.3 1 +73.3 1 +74.3 1 +75.3 1 +76.3 1 +77.3 1 +78.3 1 +79.3 1 +8.3 1 +80.3 1 +81.3 1 +82.3 1 +83.3 1 +84.3 1 +85.3 1 +86.3 1 +87.3 1 +88.3 1 +89.3 1 +9.3 1 +90.3 1 +91.3 1 +92.3 1 +93.3 1 +94.3 1 +95.3 1 +96.3 1 +97.3 1 +98.3 1 +99.3 1 +PREHOOK: query: EXPLAIN SELECT hex(cbinary), count(*) FROM parquet_types GROUP BY cbinary +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT hex(cbinary), count(*) FROM parquet_types GROUP BY cbinary +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: parquet_types + Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cbinary (type: binary) + outputColumnNames: cbinary + Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: cbinary (type: binary) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: binary) + sort order: + + Map-reduce partition columns: _col0 (type: binary) + Statistics: Num rows: 300 Data size: 4200 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: binary) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hex(_col0) (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 150 Data size: 2100 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT hex(cbinary), count(*) FROM parquet_types GROUP BY cbinary +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_types +#### A masked pattern was here #### +POSTHOOK: query: SELECT hex(cbinary), count(*) FROM parquet_types GROUP BY cbinary +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_types +#### A masked pattern was here #### +NULL 264 + 1 +0B 1 +0C 1 +0D 1 +0E 1 +0F 1 +BA 1 +BB 1 +BC 1 +BD 1 +BE 1 +BF 1 +CA 1 +CB 1 +CC 1 +CD 1 +CE 1 +CF 1 +DA 1 +DB 1 +DC 1 +DD 1 +DE 1 +DF 1 +EA 1 +EB 1 +EC 1 +ED 1 +EE 1 +EF 1 +FA 1 +FB 1 +FC 1 +FD 1 +FE 1 +FF 1 diff --git ql/src/test/results/clientpositive/parquet_types_vectorization.q.out ql/src/test/results/clientpositive/parquet_types_vectorization.q.out new file mode 100644 index 0000000..7818d73 --- /dev/null +++ ql/src/test/results/clientpositive/parquet_types_vectorization.q.out @@ -0,0 +1,850 @@ +PREHOOK: query: DROP TABLE parquet_types_staging +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE parquet_types_staging +POSTHOOK: type: DROPTABLE +PREHOOK: query: DROP TABLE parquet_types +PREHOOK: type: DROPTABLE +POSTHOOK: query: DROP TABLE parquet_types +POSTHOOK: type: DROPTABLE +PREHOOK: query: CREATE TABLE parquet_types_staging ( + cint int, + ctinyint tinyint, + csmallint smallint, + cfloat float, + cdouble double, + cstring1 string, + t timestamp, + cchar char(5), + cvarchar varchar(10), + cbinary string, + m1 map, + l1 array, + st1 struct, + d date +) ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' +MAP KEYS TERMINATED BY ':' +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_types_staging +POSTHOOK: query: CREATE TABLE parquet_types_staging ( + cint int, + ctinyint tinyint, + csmallint smallint, + cfloat float, + cdouble double, + cstring1 string, + t timestamp, + cchar char(5), + cvarchar varchar(10), + cbinary string, + m1 map, + l1 array, + st1 struct, + d date +) ROW FORMAT DELIMITED +FIELDS TERMINATED BY '|' +COLLECTION ITEMS TERMINATED BY ',' +MAP KEYS TERMINATED BY ':' +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_types_staging +PREHOOK: query: CREATE TABLE parquet_types ( + cint int, + ctinyint tinyint, + csmallint smallint, + cfloat float, + cdouble double, + cstring1 string, + t timestamp, + cchar char(5), + cvarchar varchar(10), + cbinary binary, + m1 map, + l1 array, + st1 struct, + d date +) STORED AS PARQUET +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@parquet_types +POSTHOOK: query: CREATE TABLE parquet_types ( + cint int, + ctinyint tinyint, + csmallint smallint, + cfloat float, + cdouble double, + cstring1 string, + t timestamp, + cchar char(5), + cvarchar varchar(10), + cbinary binary, + m1 map, + l1 array, + st1 struct, + d date +) STORED AS PARQUET +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@parquet_types +PREHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_types.txt' OVERWRITE INTO TABLE parquet_types_staging +PREHOOK: type: LOAD +#### A masked pattern was here #### +PREHOOK: Output: default@parquet_types_staging +POSTHOOK: query: LOAD DATA LOCAL INPATH '../../data/files/parquet_types.txt' OVERWRITE INTO TABLE parquet_types_staging +POSTHOOK: type: LOAD +#### A masked pattern was here #### +POSTHOOK: Output: default@parquet_types_staging +PREHOOK: query: SELECT * FROM parquet_types_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_types_staging +#### A masked pattern was here #### +POSTHOOK: query: SELECT * FROM parquet_types_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_types_staging +#### A masked pattern was here #### +100 1 1 1.0 0.0 abc 2011-01-01 01:01:01.111111111 a a B4F3CAFDBEDD {"k1":"v1"} [101,200] {"c1":10,"c2":"a"} 2011-01-01 +101 2 2 1.1 0.3 def 2012-02-02 02:02:02.222222222 ab ab 68692CCAC0BDE7 {"k2":"v2"} [102,200] {"c1":10,"c2":"d"} 2012-02-02 +102 3 3 1.2 0.6 ghi 2013-03-03 03:03:03.333333333 abc abc B4F3CAFDBEDD {"k3":"v3"} [103,200] {"c1":10,"c2":"g"} 2013-03-03 +103 1 4 1.3 0.9 jkl 2014-04-04 04:04:04.444444444 abcd abcd 68692CCAC0BDE7 {"k4":"v4"} [104,200] {"c1":10,"c2":"j"} 2014-04-04 +104 2 5 1.4 1.2 mno 2015-05-05 05:05:05.555555555 abcde abcde B4F3CAFDBEDD {"k5":"v5"} [105,200] {"c1":10,"c2":"m"} 2015-05-05 +105 3 1 1.0 1.5 pqr 2016-06-06 06:06:06.666666666 abcde abcdef 68692CCAC0BDE7 {"k6":"v6"} [106,200] {"c1":10,"c2":"p"} 2016-06-06 +106 1 2 1.1 1.8 stu 2017-07-07 07:07:07.777777777 abcde abcdefg B4F3CAFDBEDD {"k7":"v7"} [107,200] {"c1":10,"c2":"s"} 2017-07-07 +107 2 3 1.2 2.1 vwx 2018-08-08 08:08:08.888888888 bcdef abcdefgh 68692CCAC0BDE7 {"k8":"v8"} [108,200] {"c1":10,"c2":"v"} 2018-08-08 +108 3 4 1.3 2.4 yza 2019-09-09 09:09:09.999999999 cdefg B4F3CAFDBE 68656C6C6F {"k9":"v9"} [109,200] {"c1":10,"c2":"y"} 2019-09-09 +109 1 5 1.4 2.7 bcd 2020-10-10 10:10:10.101010101 klmno abcdedef 68692CCAC0BDE7 {"k10":"v10"} [110,200] {"c1":10,"c2":"b"} 2020-10-10 +110 2 1 1.0 3.0 efg 2021-11-11 11:11:11.111111111 pqrst abcdede B4F3CAFDBEDD {"k11":"v11"} [111,200] {"c1":10,"c2":"e"} 2021-11-11 +111 3 2 1.1 3.3 hij 2022-12-12 12:12:12.121212121 nopqr abcded 68692CCAC0BDE7 {"k12":"v12"} [112,200] {"c1":10,"c2":"h"} 2022-12-12 +112 1 3 1.2 3.6 klm 2023-01-02 13:13:13.131313131 opqrs abcdd B4F3CAFDBEDD {"k13":"v13"} [113,200] {"c1":10,"c2":"k"} 2023-01-02 +113 2 4 1.3 3.9 nop 2024-02-02 14:14:14.141414141 pqrst abc 68692CCAC0BDE7 {"k14":"v14"} [114,200] {"c1":10,"c2":"n"} 2024-02-02 +114 3 5 1.4 4.2 qrs 2025-03-03 15:15:15.151515151 qrstu b B4F3CAFDBEDD {"k15":"v15"} [115,200] {"c1":10,"c2":"q"} 2025-03-03 +115 1 1 1.0 4.5 qrs 2026-04-04 16:16:16.161616161 rstuv abcded 68692CCAC0BDE7 {"k16":"v16"} [116,200] {"c1":10,"c2":"q"} 2026-04-04 +116 2 2 1.1 4.8 wxy 2027-05-05 17:17:17.171717171 stuvw abcded B4F3CAFDBEDD {"k17":"v17"} [117,200] {"c1":10,"c2":"w"} 2027-05-05 +117 3 3 1.2 5.1 zab 2028-06-06 18:18:18.181818181 tuvwx abcded 68692CCAC0BDE7 {"k18":"v18"} [118,200] {"c1":10,"c2":"z"} 2028-06-06 +118 1 4 1.3 5.4 cde 2029-07-07 19:19:19.191919191 uvwzy abcdede B4F3CAFDBEDD {"k19":"v19"} [119,200] {"c1":10,"c2":"c"} 2029-07-07 +119 2 5 1.4 5.7 fgh 2030-08-08 20:20:20.202020202 vwxyz abcdede 68692CCAC0BDE7 {"k20":"v20"} [120,200] {"c1":10,"c2":"f"} 2030-08-08 +120 3 1 1.0 6.0 ijk 2031-09-09 21:21:21.212121212 wxyza abcde B4F3CAFDBEDD {"k21":"v21"} [121,200] {"c1":10,"c2":"i"} 2031-09-09 +121 1 2 1.1 6.3 lmn 2032-10-10 22:22:22.222222222 bcdef abcde {"k22":"v22"} [122,200] {"c1":10,"c2":"l"} 2032-10-10 +PREHOOK: query: INSERT OVERWRITE TABLE parquet_types +SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, +unhex(cbinary), m1, l1, st1, d FROM parquet_types_staging +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_types_staging +PREHOOK: Output: default@parquet_types +POSTHOOK: query: INSERT OVERWRITE TABLE parquet_types +SELECT cint, ctinyint, csmallint, cfloat, cdouble, cstring1, t, cchar, cvarchar, +unhex(cbinary), m1, l1, st1, d FROM parquet_types_staging +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_types_staging +POSTHOOK: Output: default@parquet_types +POSTHOOK: Lineage: parquet_types.cbinary EXPRESSION [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cbinary, type:string, comment:null), ] +POSTHOOK: Lineage: parquet_types.cchar SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cchar, type:char(5), comment:null), ] +POSTHOOK: Lineage: parquet_types.cdouble SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cdouble, type:double, comment:null), ] +POSTHOOK: Lineage: parquet_types.cfloat SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cfloat, type:float, comment:null), ] +POSTHOOK: Lineage: parquet_types.cint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cint, type:int, comment:null), ] +POSTHOOK: Lineage: parquet_types.csmallint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:csmallint, type:smallint, comment:null), ] +POSTHOOK: Lineage: parquet_types.cstring1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cstring1, type:string, comment:null), ] +POSTHOOK: Lineage: parquet_types.ctinyint SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:ctinyint, type:tinyint, comment:null), ] +POSTHOOK: Lineage: parquet_types.cvarchar SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:cvarchar, type:varchar(10), comment:null), ] +POSTHOOK: Lineage: parquet_types.d SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:d, type:date, comment:null), ] +POSTHOOK: Lineage: parquet_types.l1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:l1, type:array, comment:null), ] +POSTHOOK: Lineage: parquet_types.m1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:m1, type:map, comment:null), ] +POSTHOOK: Lineage: parquet_types.st1 SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:st1, type:struct, comment:null), ] +POSTHOOK: Lineage: parquet_types.t SIMPLE [(parquet_types_staging)parquet_types_staging.FieldSchema(name:t, type:timestamp, comment:null), ] +PREHOOK: query: -- test types in group by + +EXPLAIN SELECT ctinyint, + MAX(cint), + MIN(csmallint), + COUNT(cstring1), + ROUND(AVG(cfloat), 5), + ROUND(STDDEV_POP(cdouble),5) +FROM parquet_types +GROUP BY ctinyint +ORDER BY ctinyint +PREHOOK: type: QUERY +POSTHOOK: query: -- test types in group by + +EXPLAIN SELECT ctinyint, + MAX(cint), + MIN(csmallint), + COUNT(cstring1), + ROUND(AVG(cfloat), 5), + ROUND(STDDEV_POP(cdouble),5) +FROM parquet_types +GROUP BY ctinyint +ORDER BY ctinyint +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: parquet_types + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: ctinyint (type: tinyint), cint (type: int), csmallint (type: smallint), cstring1 (type: string), cfloat (type: float), cdouble (type: double) + outputColumnNames: ctinyint, cint, csmallint, cstring1, cfloat, cdouble + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: max(cint), min(csmallint), count(cstring1), avg(cfloat), stddev_pop(cdouble) + keys: ctinyint (type: tinyint) + mode: hash + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Map-reduce partition columns: _col0 (type: tinyint) + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4) + keys: KEY._col0 (type: tinyint) + mode: mergepartial + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: _col0 (type: tinyint), _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), round(_col4, 5) (type: double), round(_col5, 5) (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: tinyint) + sort order: + + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: double), _col5 (type: double) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: tinyint), VALUE._col0 (type: int), VALUE._col1 (type: smallint), VALUE._col2 (type: bigint), VALUE._col3 (type: double), VALUE._col4 (type: double) + outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5 + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT ctinyint, + MAX(cint), + MIN(csmallint), + COUNT(cstring1), + ROUND(AVG(cfloat), 5), + ROUND(STDDEV_POP(cdouble),5) +FROM parquet_types +GROUP BY ctinyint +ORDER BY ctinyint +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_types +#### A masked pattern was here #### +POSTHOOK: query: SELECT ctinyint, + MAX(cint), + MIN(csmallint), + COUNT(cstring1), + ROUND(AVG(cfloat), 5), + ROUND(STDDEV_POP(cdouble),5) +FROM parquet_types +GROUP BY ctinyint +ORDER BY ctinyint +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_types +#### A masked pattern was here #### +1 121 1 8 1.175 2.06216 +2 119 1 7 1.21429 1.8 +3 120 1 7 1.17143 1.8 +PREHOOK: query: EXPLAIN SELECT cfloat, count(*) FROM parquet_types GROUP BY cfloat ORDER BY cfloat +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT cfloat, count(*) FROM parquet_types GROUP BY cfloat ORDER BY cfloat +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: parquet_types + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cfloat (type: float) + outputColumnNames: cfloat + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: cfloat (type: float) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: float) + sort order: + + Map-reduce partition columns: _col0 (type: float) + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: float) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: float) + sort order: + + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: float), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT cfloat, count(*) FROM parquet_types GROUP BY cfloat ORDER BY cfloat +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_types +#### A masked pattern was here #### +POSTHOOK: query: SELECT cfloat, count(*) FROM parquet_types GROUP BY cfloat ORDER BY cfloat +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_types +#### A masked pattern was here #### +1.0 5 +1.1 5 +1.2 4 +1.3 4 +1.4 4 +PREHOOK: query: EXPLAIN SELECT cchar, count(*) FROM parquet_types GROUP BY cchar ORDER BY cchar +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT cchar, count(*) FROM parquet_types GROUP BY cchar ORDER BY cchar +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: parquet_types + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cchar (type: char(5)) + outputColumnNames: cchar + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: cchar (type: char(5)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: char(5)) + sort order: + + Map-reduce partition columns: _col0 (type: char(5)) + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: char(5)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: char(5)) + sort order: + + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: char(5)), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT cchar, count(*) FROM parquet_types GROUP BY cchar ORDER BY cchar +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_types +#### A masked pattern was here #### +POSTHOOK: query: SELECT cchar, count(*) FROM parquet_types GROUP BY cchar ORDER BY cchar +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_types +#### A masked pattern was here #### +a 1 +ab 1 +abc 1 +abcd 1 +abcde 3 +bcdef 2 +cdefg 1 +klmno 1 +nopqr 1 +opqrs 1 +pqrst 2 +qrstu 1 +rstuv 1 +stuvw 1 +tuvwx 1 +uvwzy 1 +vwxyz 1 +wxyza 1 +PREHOOK: query: EXPLAIN SELECT cvarchar, count(*) FROM parquet_types GROUP BY cvarchar ORDER BY cvarchar +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT cvarchar, count(*) FROM parquet_types GROUP BY cvarchar ORDER BY cvarchar +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: parquet_types + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cvarchar (type: varchar(10)) + outputColumnNames: cvarchar + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: cvarchar (type: varchar(10)) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: varchar(10)) + sort order: + + Map-reduce partition columns: _col0 (type: varchar(10)) + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: varchar(10)) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: varchar(10)) + sort order: + + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: varchar(10)), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT cvarchar, count(*) FROM parquet_types GROUP BY cvarchar ORDER BY cvarchar +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_types +#### A masked pattern was here #### +POSTHOOK: query: SELECT cvarchar, count(*) FROM parquet_types GROUP BY cvarchar ORDER BY cvarchar +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_types +#### A masked pattern was here #### +B4F3CAFDBE 1 +a 1 +ab 1 +abc 2 +abcd 1 +abcdd 1 +abcde 3 +abcded 4 +abcdede 3 +abcdedef 1 +abcdef 1 +abcdefg 1 +abcdefgh 1 +b 1 +PREHOOK: query: EXPLAIN SELECT cstring1, count(*) FROM parquet_types GROUP BY cstring1 ORDER BY cstring1 +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT cstring1, count(*) FROM parquet_types GROUP BY cstring1 ORDER BY cstring1 +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: parquet_types + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cstring1 (type: string) + outputColumnNames: cstring1 + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: cstring1 (type: string) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Map-reduce partition columns: _col0 (type: string) + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: string) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: string) + sort order: + + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: string), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT cstring1, count(*) FROM parquet_types GROUP BY cstring1 ORDER BY cstring1 +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_types +#### A masked pattern was here #### +POSTHOOK: query: SELECT cstring1, count(*) FROM parquet_types GROUP BY cstring1 ORDER BY cstring1 +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_types +#### A masked pattern was here #### +abc 1 +bcd 1 +cde 1 +def 1 +efg 1 +fgh 1 +ghi 1 +hij 1 +ijk 1 +jkl 1 +klm 1 +lmn 1 +mno 1 +nop 1 +pqr 1 +qrs 2 +stu 1 +vwx 1 +wxy 1 +yza 1 +zab 1 +PREHOOK: query: EXPLAIN SELECT t, count(*) FROM parquet_types GROUP BY t ORDER BY t +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT t, count(*) FROM parquet_types GROUP BY t ORDER BY t +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-2 depends on stages: Stage-1 + Stage-0 depends on stages: Stage-2 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: parquet_types + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: t (type: timestamp) + outputColumnNames: t + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: t (type: timestamp) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Map-reduce partition columns: _col0 (type: timestamp) + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: timestamp) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazybinary.LazyBinarySerDe + + Stage: Stage-2 + Map Reduce + Map Operator Tree: + TableScan + Reduce Output Operator + key expressions: _col0 (type: timestamp) + sort order: + + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Select Operator + expressions: KEY.reducesinkkey0 (type: timestamp), VALUE._col0 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT t, count(*) FROM parquet_types GROUP BY t ORDER BY t +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_types +#### A masked pattern was here #### +POSTHOOK: query: SELECT t, count(*) FROM parquet_types GROUP BY t ORDER BY t +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_types +#### A masked pattern was here #### +2011-01-01 01:01:01.111111111 1 +2012-02-02 02:02:02.222222222 1 +2013-03-03 03:03:03.333333333 1 +2014-04-04 04:04:04.444444444 1 +2015-05-05 05:05:05.555555555 1 +2016-06-06 06:06:06.666666666 1 +2017-07-07 07:07:07.777777777 1 +2018-08-08 08:08:08.888888888 1 +2019-09-09 09:09:09.999999999 1 +2020-10-10 10:10:10.101010101 1 +2021-11-11 11:11:11.111111111 1 +2022-12-12 12:12:12.121212121 1 +2023-01-02 13:13:13.131313131 1 +2024-02-02 14:14:14.141414141 1 +2025-03-03 15:15:15.151515151 1 +2026-04-04 16:16:16.161616161 1 +2027-05-05 17:17:17.171717171 1 +2028-06-06 18:18:18.181818181 1 +2029-07-07 19:19:19.191919191 1 +2030-08-08 20:20:20.202020202 1 +2031-09-09 21:21:21.212121212 1 +2032-10-10 22:22:22.222222222 1 +PREHOOK: query: EXPLAIN SELECT hex(cbinary), count(*) FROM parquet_types GROUP BY cbinary +PREHOOK: type: QUERY +POSTHOOK: query: EXPLAIN SELECT hex(cbinary), count(*) FROM parquet_types GROUP BY cbinary +POSTHOOK: type: QUERY +STAGE DEPENDENCIES: + Stage-1 is a root stage + Stage-0 depends on stages: Stage-1 + +STAGE PLANS: + Stage: Stage-1 + Map Reduce + Map Operator Tree: + TableScan + alias: parquet_types + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: cbinary (type: binary) + outputColumnNames: cbinary + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Group By Operator + aggregations: count() + keys: cbinary (type: binary) + mode: hash + outputColumnNames: _col0, _col1 + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + Reduce Output Operator + key expressions: _col0 (type: binary) + sort order: + + Map-reduce partition columns: _col0 (type: binary) + Statistics: Num rows: 22 Data size: 308 Basic stats: COMPLETE Column stats: NONE + value expressions: _col1 (type: bigint) + Execution mode: vectorized + Reduce Operator Tree: + Group By Operator + aggregations: count(VALUE._col0) + keys: KEY._col0 (type: binary) + mode: mergepartial + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + Select Operator + expressions: hex(_col0) (type: string), _col1 (type: bigint) + outputColumnNames: _col0, _col1 + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + File Output Operator + compressed: false + Statistics: Num rows: 11 Data size: 154 Basic stats: COMPLETE Column stats: NONE + table: + input format: org.apache.hadoop.mapred.SequenceFileInputFormat + output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat + serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + + Stage: Stage-0 + Fetch Operator + limit: -1 + Processor Tree: + ListSink + +PREHOOK: query: SELECT hex(cbinary), count(*) FROM parquet_types GROUP BY cbinary +PREHOOK: type: QUERY +PREHOOK: Input: default@parquet_types +#### A masked pattern was here #### +POSTHOOK: query: SELECT hex(cbinary), count(*) FROM parquet_types GROUP BY cbinary +POSTHOOK: type: QUERY +POSTHOOK: Input: default@parquet_types +#### A masked pattern was here #### + 1 +68656C6C6F 1 +68692CCAC0BDE7 10 +B4F3CAFDBEDD 10 diff --git ql/src/test/results/clientpositive/vectorized_parquet_types.q.out ql/src/test/results/clientpositive/vectorized_parquet_types.q.out index 281fe93..f493102 100644 --- ql/src/test/results/clientpositive/vectorized_parquet_types.q.out +++ ql/src/test/results/clientpositive/vectorized_parquet_types.q.out @@ -149,6 +149,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -216,6 +217,7 @@ STAGE PLANS: input format: org.apache.hadoop.mapred.SequenceFileInputFormat output format: org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe + Execution mode: vectorized Stage: Stage-0 Fetch Operator @@ -305,6 +307,7 @@ STAGE PLANS: Map-reduce partition columns: _col0 (type: tinyint) Statistics: Num rows: 22 Data size: 242 Basic stats: COMPLETE Column stats: NONE value expressions: _col1 (type: int), _col2 (type: smallint), _col3 (type: bigint), _col4 (type: struct), _col5 (type: struct), _col6 (type: decimal(4,2)) + Execution mode: vectorized Reduce Operator Tree: Group By Operator aggregations: max(VALUE._col0), min(VALUE._col1), count(VALUE._col2), avg(VALUE._col3), stddev_pop(VALUE._col4), max(VALUE._col5)